ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.discrete_genotype_matrix.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_DISCRETE_GENOTYPE_MATRIX_HPP__
8 #define JADE_DISCRETE_GENOTYPE_MATRIX_HPP__
9 
10 #include "jade.genotype.hpp"
11 #include "jade.verification.hpp"
12 
13 namespace jade
14 {
15  ///
16  /// A template for a class implementing operations for a discrete genotype
17  /// matrix.
18  ///
19  template <typename TValue>
21  : public basic_genotype_matrix<TValue>
22  {
23  public:
24  /// The value type.
25  typedef TValue value_type;
26 
27  /// The genotype matrix type.
29 
30  /// The matrix type.
32 
33  /// The verification type.
35 
36  /// The initializer list for the genotype matrix.
37  typedef typename
40 
41  ///
42  /// Initializes a new instance of the class.
43  ///
45  : _g ()
46  {
47  }
48 
49  ///
50  /// Initializes a new instance of the class based on values from the
51  /// specified file.
52  ///
54  char const * const path) ///< The path to the file.
55  : _g (path)
56  {
57  }
58 
59  ///
60  /// Initializes a new instance of the class based on values from the
61  /// specified file.
62  ///
64  const std::string & path) ///< The path to the file.
65  : _g (path)
66  {
67  }
68 
69  ///
70  /// Initializes a new instance of the class based on values from the
71  /// specified input stream.
72  ///
74  std::istream & in) ///< The input stream.
75  : _g (in)
76  {
77  }
78 
79  ///
80  /// Initializes a new instance of the class based on the specified
81  /// values.
82  ///
83  /// \param values The two-dimensional values.
84  ///
86  const initializer_list_type & values)
87  : _g (values)
88  {
89  }
90 
91  ///
92  /// \return This instance.
93  ///
94  inline const basic_discrete_genotype_matrix * as_dgm() const override
95  {
96  return this;
97  }
98 
99  ///
100  /// \return This instance.
101  ///
103  {
104  return this;
105  }
106 
107  ///
108  /// Computes the derivative vector and hessian matrix for a specified
109  /// marker of the F matrix.
110  ///
111  virtual void compute_derivatives_f(
112  const matrix_type & q, ///< The Q matrix.
113  const matrix_type & , ///< The F matrix.
114  const matrix_type & , ///< The 1-F matrix.
115  const matrix_type & qfa, ///< The Q*F product.
116  const matrix_type & qfb, ///< The Q*(1-F) product.
117  const size_t j, ///< The marker.
118  matrix_type & d_vec, ///< The derivative vector.
119  matrix_type & h_mat) ///< The hessian matrix.
120  const override
121  {
122  const auto J = _g.get_width();
123  const auto K = d_vec.get_height();
124 
125  #ifndef NDEBUG
126  const auto I = _g.get_height();
127  assert(q.is_size(I, K));
128  assert(qfa.is_size(I, J));
129  assert(qfb.is_size(I, J));
130  assert(j < J);
131  assert(d_vec.is_size(K, 1));
132  assert(h_mat.is_size(K, K));
133  #endif // NDEBUG
134 
135  h_mat.set_values(0);
136  d_vec.set_values(0);
137 
138  //
139  // for (size_t i = 0; i < I; i++)
140  // g_ij --> g(i, j)
141  // q_i0 --> q(i, 0)
142  // qfa_ij --> qfa(i, j)
143  // qfb_ij --> qfb(i, j)
144  //
145  auto g_ij_ptr = _g.get_data() + j;
146  auto q_i0_ptr = q.get_data();
147  auto qfa_ij_ptr = qfa.get_data() + j;
148  auto qfb_ij_ptr = qfb.get_data() + j;
149  const auto g_ij_end = g_ij_ptr + _g.get_length();
150  const auto g_step = J;
151  const auto q_step = K;
152  const auto qf_step = J;
153  while (g_ij_ptr != g_ij_end)
154  {
155  auto is_evaluated = true;
156  value_type g_ij;
157  switch (*g_ij_ptr)
158  {
159  case genotype_major_major: g_ij = 0; break;
160  case genotype_major_minor: g_ij = 1; break;
161  case genotype_minor_minor: g_ij = 2; break;
162  default: is_evaluated = false; break;
163  }
164 
165  if (is_evaluated)
166  {
167  const auto qfa_ij = *qfa_ij_ptr;
168  const auto qfb_ij = *qfb_ij_ptr;
169  const auto term1 = g_ij / qfa_ij;
170  const auto term2 = (2 - g_ij) / qfb_ij;
171  const auto term3 = term1 - term2;
172  const auto term4 = term1 / qfa_ij + term2 / qfb_ij;
173 
174  //
175  // for (size_t k1 = 0; k1 < K; k1++)
176  // d --> d_vec[k1]
177  // h --> h_mat(k1, ...)
178  // q_ik1 --> q(i, k1)
179  //
180  auto d_ptr = d_vec.get_data();
181  auto h_ptr = h_mat.get_data();
182  auto q_ik1_ptr = q_i0_ptr;
183  const auto h_end = h_ptr + h_mat.get_length();
184  while (h_ptr != h_end)
185  {
186  const auto q_ik1 = *q_ik1_ptr;
187 
188  *d_ptr += term3 * q_ik1;
189 
190  //
191  // for (size_t k2 = 0; k2 < K; k2++)
192  // q_ik2 --> q(i, k2)
193  //
194  auto q_ik2_ptr = q_i0_ptr;
195  const auto q_ik2_end = q_ik2_ptr + q.get_width();
196  while (q_ik2_ptr != q_ik2_end)
197  {
198  *h_ptr -= term4 * q_ik1 * *q_ik2_ptr;
199 
200  h_ptr++;
201  q_ik2_ptr++;
202  }
203 
204  q_ik1_ptr++;
205  d_ptr++;
206  }
207  }
208 
209  g_ij_ptr += g_step;
210  qfa_ij_ptr += qf_step;
211  qfb_ij_ptr += qf_step;
212  q_i0_ptr += q_step;
213  }
214  }
215 
216  ///
217  /// Computes the derivative vector and hessian matrix for a specified
218  /// individual of the Q matrix.
219  ///
220  virtual void compute_derivatives_q(
221  const matrix_type & , ///< The Q matrix.
222  const matrix_type & fa, ///< The F matrix.
223  const matrix_type & fb, ///< The 1-F matrix.
224  const matrix_type & qfa, ///< The Q*F product.
225  const matrix_type & qfb, ///< The Q*(1-F) product.
226  const size_t i, ///< The individual.
227  matrix_type & d_vec, ///< The derivative vector.
228  matrix_type & h_mat) ///< The hessian matrix.
229  const override
230  {
231  const auto J = _g.get_width();
232 
233  #ifndef NDEBUG
234  const auto I = _g.get_height();
235  const auto K = d_vec.get_height();
236  assert(fa.is_size(K, J));
237  assert(fb.is_size(K, J));
238  assert(qfa.is_size(I, J));
239  assert(qfb.is_size(I, J));
240  assert(i < I);
241  assert(d_vec.is_size(K, 1));
242  assert(h_mat.is_size(K, K));
243  #endif // NDEBUG
244 
245  h_mat.set_values(0);
246  d_vec.set_values(0);
247 
248  //
249  // for (size_t j = 0; j < J; j++)
250  // g_ij --> g(i, j)
251  // q_fa_ij --> q_fa(i, j)
252  // q_fb_ij --> q_fb(i, j)
253  // fa_0j --> q_fa(0, j)
254  // fb_0j --> q_fb(0, j)
255  //
256  auto g_ij_ptr = _g.get_data(i, 0);
257  const auto g_ij_end = g_ij_ptr + J;
258  auto qfa_ij_ptr = qfa.get_data(i, 0);
259  auto qfb_ij_ptr = qfb.get_data(i, 0);
260  auto fa_0j_ptr = fa.get_data();
261  auto fb_0j_ptr = fb.get_data();
262  const auto f_step = J;
263  while (g_ij_ptr != g_ij_end)
264  {
265  bool is_evaluated = true;
266  value_type g_ij;
267  switch (*g_ij_ptr)
268  {
269  case genotype_major_major: g_ij = 0; break;
270  case genotype_major_minor: g_ij = 1; break;
271  case genotype_minor_minor: g_ij = 2; break;
272  default: is_evaluated = false; break;
273  }
274 
275  if (is_evaluated)
276  {
277  const auto qfa_ij = *qfa_ij_ptr;
278  const auto qfb_ij = *qfb_ij_ptr;
279  const auto term1 = g_ij / qfa_ij;
280  const auto term2 = (2 - g_ij) / qfb_ij;
281  const auto term3 = term1 / qfa_ij;
282  const auto term4 = term2 / qfb_ij;
283 
284  //
285  // for (size_t k1 = 0; k1 < K; k1++)
286  // fa_k1j --> fa(k1, j)
287  // fb_k1j --> fb(k1, j)
288  // d --> d_vec[k1]
289  // h --> h_mat(k1, ...)
290  //
291  auto fa_k1j_ptr = fa_0j_ptr;
292  auto fb_k1j_ptr = fb_0j_ptr;
293  auto d_ptr = d_vec.get_data();
294  auto h_ptr = h_mat.get_data();
295  const auto h_end = h_ptr + h_mat.get_length();
296  while (h_ptr != h_end)
297  {
298  const auto fa_k1j = *fa_k1j_ptr;
299  const auto fb_k1j = *fb_k1j_ptr;
300 
301  *d_ptr += term1 * fa_k1j + term2 * fb_k1j;
302 
303  //
304  // for (size_t k2 = 0; k2 < K; k2++)
305  // fa_k2j --> fa(k2, j)
306  // fb_k2j --> fb(k2, j)
307  //
308  auto fa_k2j_ptr = fa_0j_ptr;
309  auto fb_k2j_ptr = fb_0j_ptr;
310  const auto fa_k2j_end = fa_k2j_ptr + fa.get_length();
311  while (fa_k2j_ptr != fa_k2j_end)
312  {
313  const auto fa_k2j = *fa_k2j_ptr;
314  const auto fb_k2j = *fb_k2j_ptr;
315 
316  *h_ptr -= (term3 * fa_k1j * fa_k2j)
317  + (term4 * fb_k1j * fb_k2j);
318 
319  fa_k2j_ptr += f_step;
320  fb_k2j_ptr += f_step;
321  h_ptr++;
322  }
323 
324  fa_k1j_ptr += f_step;
325  fb_k1j_ptr += f_step;
326  d_ptr++;
327  }
328  }
329 
330  g_ij_ptr++;
331  qfa_ij_ptr++;
332  qfb_ij_ptr++;
333  fa_0j_ptr++;
334  fb_0j_ptr++;
335  }
336  }
337 
338  ///
339  /// \return The log of the likelihood function.
340  ///
342  const matrix_type & q, ///< The Q matrix.
343  const matrix_type & fa, ///< The F matrix for major alleles.
344  const matrix_type & fb, ///< The F matrix for minor alleles.
345  const matrix_type & , ///< The Q*Fa product.
346  const matrix_type & ) ///< The Q*Fb product.
347  const override
348  {
349  const auto & g = _g;
350 
351  assert(verification_type::validate_gqf_sizes(*this, q, fa));
352  assert(verification_type::validate_gqf_sizes(*this, q, fb));
353 
354  const auto J = g.get_width();
355  const auto K = q.get_width();
356  auto g_ij_ptr = g.get_data();
357  const auto g_ij_end = g.get_data() + g.get_length();
358  auto q_i0_ptr = q.get_data();
359  auto sum_i = value_type(0);
360 
361  while (g_ij_ptr != g_ij_end)
362  {
363  auto fa_0j_ptr = fa.get_data();
364  auto fa_0j_end = fa.get_data() + J;
365  auto fb_0j_ptr = fb.get_data();
366  auto sum_j = value_type(0);
367 
368  while (fa_0j_ptr != fa_0j_end)
369  {
370  switch (*g_ij_ptr++)
371  {
372  case genotype_major_major:
373  {
374  auto q_ik_ptr = q_i0_ptr;
375  const auto q_ik_end = q_i0_ptr + K;
376  auto fb_kj_ptr = fb_0j_ptr;
377  auto sum_rhs = value_type(0);
378 
379  while (q_ik_ptr != q_ik_end)
380  {
381  const auto q_ik = *q_ik_ptr;
382  const auto fb_kj = *fb_kj_ptr;
383 
384  sum_rhs += q_ik * fb_kj;
385 
386  q_ik_ptr++;
387  fb_kj_ptr += J;
388  }
389 
390  sum_j += value_type(2) * std::log(sum_rhs);
391  break;
392  }
393 
394  case genotype_major_minor:
395  {
396  auto q_ik_ptr = q_i0_ptr;
397  const auto q_ik_end = q_i0_ptr + K;
398  auto fa_kj_ptr = fa_0j_ptr;
399  auto fb_kj_ptr = fb_0j_ptr;
400  auto sum_lhs = value_type(0);
401  auto sum_rhs = value_type(0);
402 
403  while (q_ik_ptr != q_ik_end)
404  {
405  const auto q_ik = *q_ik_ptr;
406  const auto fa_kj = *fa_kj_ptr;
407  const auto fb_kj = *fb_kj_ptr;
408 
409  sum_lhs += q_ik * fa_kj;
410  sum_rhs += q_ik * fb_kj;
411 
412  q_ik_ptr++;
413  fa_kj_ptr += J;
414  fb_kj_ptr += J;
415  }
416 
417  sum_j += std::log(sum_lhs * sum_rhs);
418  break;
419  }
420 
421  case genotype_minor_minor:
422  {
423  auto q_ik_ptr = q_i0_ptr;
424  const auto q_ik_end = q_i0_ptr + K;
425  auto fa_kj_ptr = fa_0j_ptr;
426  auto sum_lhs = value_type(0);
427 
428  while (q_ik_ptr != q_ik_end)
429  {
430  const auto q_ik = *q_ik_ptr;
431  const auto fa_kj = *fa_kj_ptr;
432 
433  sum_lhs += q_ik * fa_kj;
434 
435  q_ik_ptr++;
436  fa_kj_ptr += J;
437  }
438 
439  sum_j += value_type(2) * std::log(sum_lhs);
440  break;
441  }
442  }
443 
444  fa_0j_ptr++;
445  fb_0j_ptr++;
446  }
447 
448  sum_i += sum_j;
449 
450  q_i0_ptr += K;
451  }
452 
453  return sum_i;
454  }
455 
456  ///
457  /// \return A new mu matrix.
458  ///
460  const value_type f_epsilon) ///< The F matrix boundary epsilon.
461  const override
462  {
463  const auto f_min = value_type(0.0) + f_epsilon;
464  const auto f_max = value_type(1.0) - f_epsilon;
465 
466  const auto I = _g.get_height();
467  const auto J = _g.get_width();
468 
469  assert(I > 0);
470 
471  matrix_type mu (J, 1);
472 
473  for (size_t j = 0; j < J; j++)
474  {
475  auto sum = value_type(0.0);
476 
477  for (size_t i = 0; i < I; i++)
478  {
479  switch (_g(i, j))
480  {
481  case jade::genotype_major_major:
482  sum += value_type(2.0);
483  break;
484 
485  case jade::genotype_major_minor:
486  sum += value_type(1.0);
487  break;
488 
489  default:
490  break;
491  }
492  }
493 
494  mu[j] = std::min(std::max(
495  f_min,
496  sum / (value_type(2.0) * value_type(I))),
497  f_max);
498  }
499 
500  return mu;
501  }
502 
503  ///
504  /// \return The base matrix.
505  ///
506  inline const genotype_matrix_type & get_matrix() const
507  {
508  return _g;
509  }
510 
511  ///
512  /// \return The height of the matrix.
513  ///
514  inline virtual size_t get_height() const override
515  {
516  return _g.get_height();
517  }
518 
519  ///
520  /// \return The string representation of the size of the matrix.
521  ///
522  inline virtual std::string get_size_str() const override
523  {
524  return _g.get_size_str();
525  }
526 
527  ///
528  /// \return The width of the matrix.
529  ///
530  inline virtual size_t get_width() const override
531  {
532  return _g.get_width();
533  }
534 
535  ///
536  /// \return The string representation of the matrix.
537  ///
538  inline virtual std::string str() const override
539  {
540  return _g.str();
541  }
542 
543  private:
545  };
546 }
547 
548 #endif // JADE_DISCRETE_GENOTYPE_MATRIX_HPP__
jade::basic_discrete_genotype_matrix::basic_discrete_genotype_matrix
basic_discrete_genotype_matrix(const initializer_list_type &values)
Initializes a new instance of the class based on the specified values.
Definition: jade.discrete_genotype_matrix.hpp:85
jade::basic_verification
A template for a class that performs validation on various types of matrices.
Definition: jade.verification.hpp:20
jade::basic_matrix::get_data
const value_type * get_data() const
Definition: jade.matrix.hpp:542
jade::basic_matrix::get_length
size_t get_length() const
Definition: jade.matrix.hpp:624
jade::basic_discrete_genotype_matrix::initializer_list_type
genotype_matrix_type::initializer_list_type initializer_list_type
The initializer list for the genotype matrix.
Definition: jade.discrete_genotype_matrix.hpp:39
jade::basic_discrete_genotype_matrix::as_dgm
basic_discrete_genotype_matrix * as_dgm() override
Definition: jade.discrete_genotype_matrix.hpp:102
jade::basic_matrix::get_width
size_t get_width() const
Definition: jade.matrix.hpp:757
jade::basic_discrete_genotype_matrix::matrix_type
basic_matrix< value_type > matrix_type
The matrix type.
Definition: jade.discrete_genotype_matrix.hpp:31
jade::basic_genotype_matrix
A template for an abstract class implementing operations for a genotype matrix.
Definition: jade.genotype_matrix.hpp:26
jade::basic_discrete_genotype_matrix::basic_discrete_genotype_matrix
basic_discrete_genotype_matrix(std::istream &in)
Initializes a new instance of the class based on values from the specified input stream.
Definition: jade.discrete_genotype_matrix.hpp:73
jade::basic_discrete_genotype_matrix::get_height
virtual size_t get_height() const override
Definition: jade.discrete_genotype_matrix.hpp:514
jade::basic_matrix::str
std::string str() const
Definition: jade.matrix.hpp:1199
jade::basic_matrix::get_size_str
std::string get_size_str() const
Definition: jade.matrix.hpp:735
jade::basic_discrete_genotype_matrix::basic_discrete_genotype_matrix
basic_discrete_genotype_matrix(char const *const path)
Initializes a new instance of the class based on values from the specified file.
Definition: jade.discrete_genotype_matrix.hpp:53
jade::basic_matrix::is_size
bool is_size(const basic_matrix &other) const
Definition: jade.matrix.hpp:896
jade::basic_discrete_genotype_matrix::get_size_str
virtual std::string get_size_str() const override
Definition: jade.discrete_genotype_matrix.hpp:522
jade::basic_discrete_genotype_matrix::str
virtual std::string str() const override
Definition: jade.discrete_genotype_matrix.hpp:538
jade::basic_discrete_genotype_matrix::basic_discrete_genotype_matrix
basic_discrete_genotype_matrix()
Initializes a new instance of the class.
Definition: jade.discrete_genotype_matrix.hpp:44
jade::basic_discrete_genotype_matrix::genotype_matrix_type
basic_matrix< genotype > genotype_matrix_type
The genotype matrix type.
Definition: jade.discrete_genotype_matrix.hpp:28
jade::basic_matrix::get_height
size_t get_height() const
Definition: jade.matrix.hpp:603
jade::basic_discrete_genotype_matrix::get_width
virtual size_t get_width() const override
Definition: jade.discrete_genotype_matrix.hpp:530
jade::basic_discrete_genotype_matrix::get_matrix
const genotype_matrix_type & get_matrix() const
Definition: jade.discrete_genotype_matrix.hpp:506
jade::basic_discrete_genotype_matrix::verification_type
basic_verification< value_type > verification_type
The verification type.
Definition: jade.discrete_genotype_matrix.hpp:34
jade::basic_verification::validate_gqf_sizes
static bool validate_gqf_sizes(const genotype_matrix_type &g, const matrix_type &q, const matrix_type &f)
Validates the sizes of the G, Q, and F matrices and throws an exception if validation fails.
Definition: jade.verification.hpp:211
jade::basic_discrete_genotype_matrix::compute_derivatives_q
virtual void compute_derivatives_q(const matrix_type &, const matrix_type &fa, const matrix_type &fb, const matrix_type &qfa, const matrix_type &qfb, const size_t i, matrix_type &d_vec, matrix_type &h_mat) const override
Computes the derivative vector and hessian matrix for a specified individual of the Q matrix.
Definition: jade.discrete_genotype_matrix.hpp:220
jade::basic_discrete_genotype_matrix
A template for a class implementing operations for a discrete genotype matrix.
Definition: jade.discrete_genotype_matrix.hpp:22
jade::basic_matrix< genotype >::initializer_list_type
std::initializer_list< std::initializer_list< value_type > > initializer_list_type
The initializer list type.
Definition: jade.matrix.hpp:35
jade::basic_discrete_genotype_matrix::as_dgm
const basic_discrete_genotype_matrix * as_dgm() const override
Definition: jade.discrete_genotype_matrix.hpp:94
jade::basic_discrete_genotype_matrix::basic_discrete_genotype_matrix
basic_discrete_genotype_matrix(const std::string &path)
Initializes a new instance of the class based on values from the specified file.
Definition: jade.discrete_genotype_matrix.hpp:63
jade::basic_discrete_genotype_matrix::create_mu
virtual matrix_type create_mu(const value_type f_epsilon) const override
Definition: jade.discrete_genotype_matrix.hpp:459
jade::basic_discrete_genotype_matrix::compute_derivatives_f
virtual void compute_derivatives_f(const matrix_type &q, const matrix_type &, const matrix_type &, const matrix_type &qfa, const matrix_type &qfb, const size_t j, matrix_type &d_vec, matrix_type &h_mat) const override
Computes the derivative vector and hessian matrix for a specified marker of the F matrix.
Definition: jade.discrete_genotype_matrix.hpp:111
jade::basic_matrix::set_values
void set_values(const value_type value)
Sets all values of the matrix to the specified value.
Definition: jade.matrix.hpp:1189
jade::basic_discrete_genotype_matrix::compute_lle
virtual value_type compute_lle(const matrix_type &q, const matrix_type &fa, const matrix_type &fb, const matrix_type &, const matrix_type &) const override
Definition: jade.discrete_genotype_matrix.hpp:341
jade::basic_matrix< genotype >
jade::basic_discrete_genotype_matrix::value_type
TValue value_type
The value type.
Definition: jade.discrete_genotype_matrix.hpp:25