ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.selscan.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_SELSCAN_HPP__
8 #define JADE_SELSCAN_HPP__
9 
10 #include "jade.args.hpp"
11 #include "jade.genotype_matrix_factory.hpp"
12 
13 namespace jade
14 {
15  ///
16  /// A template for a class implementing the selscan program.
17  ///
18  template <typename TValue>
20  {
21  public:
22  /// The value type.
23  typedef TValue value_type;
24 
25  /// The matrix type.
27 
28  /// The verification type.
30 
31  /// The genotype matrix type.
33 
34  /// The genotype matrix factory type.
35  typedef
38 
39  ///
40  /// Initializes a new instance of the class.
41  ///
42  explicit basic_selscan(
43  args & a) ///< The command-line arguments.
44  : steps (a.read("--steps", "-s", size_t(100)))
45  , f_epsilon (_read_f_epsilon(a))
46  , g_ptr (genotype_matrix_factory_type::create(a.pop<std::string>()))
47  , g (*g_ptr)
48  , fa (a.pop<std::string>())
49  , c1 (a.pop<std::string>())
50  , c2 (_init_scaling_matrix(a, c1))
51  , RK (c1.get_width())
52  , J (g.get_width())
53  , mu (g.create_mu(f_epsilon))
54  , rooted_fa (_compute_rooted_fa(fa))
55  , c_inv (RK, RK)
56  , f_j_c_inv (RK, 1)
57  {
58  if (steps < 2)
59  throw error() << "invalid value for --steps option ("
60  << steps << "); expected at least two steps";
61 
62  a.validate_empty();
63 
71  }
72 
73  ///
74  /// Executes the program.
75  ///
76  void execute()
77  {
78  const auto K = fa.get_height();
79 
80  std::cout << "step\tlle-ratio\tglobal-lle\tlocal-lle";
81 
82  for (size_t k = 0; k < K; k++)
83  std::cout << "\t" << "f-pop" << k;
84 
85  std::cout << std::endl;
86 
87  std::vector<record> records;
88  records.reserve(J);
89  for (size_t j = 0; j < J; j++)
90  records.emplace_back(j, _compute_score(0, j));
91 
92  for (size_t si = 0; si < steps; si++)
93  for (auto & r : records)
94  r.update(si, _compute_score(si, r.get_j()));
95 
96  for (const auto & r : records)
97  {
98  std::cout << r.get_step() << '\t'
99  << _format(r.get_lle_ratio()) << '\t'
100  << _format(r.get_score()) << '\t'
101  << _format(r.get_best_score());
102 
103  for (size_t k = 0; k < K; k++)
104  std::cout << '\t' << _format(fa(k, r.get_j()));
105 
106  std::cout << std::endl;
107  }
108  }
109 
110  private:
111  typedef std::unique_ptr<genotype_matrix_type> genotype_matrix_ptr;
112 
113  // --------------------------------------------------------------------
114  static matrix_type _compute_rooted_fa(const matrix_type & fa)
115  {
116  const size_t K = fa.get_height();
117  const size_t J = fa.get_width();
118 
119  assert(K > 1);
120 
121  matrix_type rooted_fa (K - 1, J);
122 
123  for (size_t k = 0; k + 1 < K; k++)
124  for (size_t j = 0; j < J; j++)
125  rooted_fa(k, j) = fa(k + 1, j) - fa(0, j);
126 
127  return rooted_fa;
128  }
129 
130  // --------------------------------------------------------------------
131  value_type _compute_score(
132  const size_t si,
133  const size_t j)
134  {
135  typedef std::numeric_limits<value_type> limits_type;
136  static const auto lowest = limits_type::lowest();
137 
138  typedef typename matrix_type::blas_type blas_type;
139 
140  const auto percent = value_type(si) / value_type(steps - 1);
141  for (size_t i = 0; i < RK * RK; i++)
142  c_inv[i] = c1[i] + percent * (c2[i] - c1[i]);
143 
144  //
145  // Compute the inverse and the log of the determinant. If this
146  // fails, the matrix is not positive semidefinite; return -Infinity
147  // to indicate these are unacceptable parameters.
148  //
149  value_type log_c_det;
150  if (!c_inv.invert(log_c_det))
151  return lowest;
152 
153  //
154  // The ?gemv routines perform a matrix-vector operation defined as
155  //
156  // y := (alpha * A * x) + (beta * y)
157  //
158  // where alpha and beta are scalars, x and y are vectors, and A is
159  // an m-by-n matrix.
160  //
161  blas_type::gemv(
162  CblasRowMajor, // Layout
163  CblasNoTrans, // transa
164  int(c_inv.get_height()), // m
165  int(c_inv.get_width()), // n
166  1.0, // alpha
167  c_inv.get_data(), // A
168  int(c_inv.get_width()), // lda
169  rooted_fa.get_data() + j, // x
170  int(rooted_fa.get_width()), // incx
171  0.0, // beta
172  f_j_c_inv.get_data(), // y
173  int(f_j_c_inv.get_width())); // yinc
174 
175  //
176  // The ?dot routines perform a vector-vector reduction operation
177  // defined as
178  //
179  // res = Sum (x_i * y_i) for i = 1 to n
180  //
181  // where x_i and y_i are elements of vectors x and y.
182  //
183  const auto dot = blas_type::dot(
184  int(f_j_c_inv.get_height()), // n (height = RK)
185  rooted_fa.get_data() + j, // x
186  int(rooted_fa.get_width()), // xinc (width = 1)
187  f_j_c_inv.get_data(), // y
188  int(f_j_c_inv.get_width())); // yinc (width = 1)
189 
190  static const auto pi = value_type(std::acos(-1.0));
191  static const auto tau = value_type(2) * pi;
192 
193  const auto rk = static_cast<value_type>(RK);
194  const auto mu_j = mu[j];
195  const auto c_j = mu_j * (value_type(1) - mu_j);
196  const auto term = (rk * std::log(tau * c_j)) + (dot / c_j);
197  return -(log_c_det + term) / value_type(2);
198  }
199 
200  // --------------------------------------------------------------------
201  static std::string _format(const value_type value)
202  {
203  std::ostringstream out;
205  out << std::showpos << value;
206  return out.str();
207  }
208 
209  // --------------------------------------------------------------------
210  static matrix_type _init_scaling_matrix(
211  args & a,
212  const matrix_type & c1)
213  {
214  const auto path = a.read<std::string>("--c-scale", "-cs");
215  return path.empty() ? value_type(10) * c1 : matrix_type(path);
216  }
217 
218  // --------------------------------------------------------------------
219  static value_type _read_f_epsilon(args & a)
220  {
221  const auto out = a.read("--f-epsilon", "-fe", value_type(1.0e-6));
222 
223  if (!(out > value_type(0.0) && out < value_type(0.1)))
224  throw error()
225  << "invalid value for --f-epsilon option: "
226  << out;
227 
228  return out;
229  }
230 
231  // --------------------------------------------------------------------
232  class record
233  {
234  public:
235  // ----------------------------------------------------------------
236  inline record(const size_t j, const value_type score)
237  : _best_score (std::numeric_limits<value_type>::lowest())
238  , _j (j)
239  , _lle_ratio (std::numeric_limits<value_type>::quiet_NaN())
240  , _score (score)
241  , _step (std::numeric_limits<size_t>::max())
242  {
243  }
244 
245  // ----------------------------------------------------------------
246  inline value_type get_best_score() const
247  {
248  return _best_score;
249  }
250 
251  // ----------------------------------------------------------------
252  inline size_t get_j() const
253  {
254  return _j;
255  }
256 
257  // ----------------------------------------------------------------
258  inline value_type get_lle_ratio() const
259  {
260  return _lle_ratio;
261  }
262 
263  // ----------------------------------------------------------------
264  inline value_type get_score() const
265  {
266  return _score;
267  }
268 
269  // ----------------------------------------------------------------
270  inline size_t get_step() const
271  {
272  return _step;
273  }
274 
275  // ----------------------------------------------------------------
276  inline void update(const size_t step, const value_type score)
277  {
278  if (score <= _best_score)
279  return;
280 
281  _step = step;
282  _best_score = score;
283  _lle_ratio = value_type(2) * (score - _score);
284  }
285 
286  private:
287  value_type _best_score;
288  size_t _j;
289  value_type _lle_ratio;
290  value_type _score;
291  size_t _step;
292  };
293 
294  const size_t steps;
295  const value_type f_epsilon;
296  const genotype_matrix_ptr g_ptr;
297  const genotype_matrix_type & g;
298  const matrix_type fa;
299  const matrix_type c1;
300  const matrix_type c2;
301  const size_t RK;
302  const size_t J;
303  const matrix_type mu;
304  const matrix_type rooted_fa;
305  matrix_type c_inv;
306  matrix_type f_j_c_inv;
307  };
308 }
309 
310 #endif // JADE_SELSCAN_HPP__
jade::basic_verification
A template for a class that performs validation on various types of matrices.
Definition: jade.verification.hpp:20
jade::basic_args::validate_empty
void validate_empty() const
Throws an exception if there are more arguments to be processed.
Definition: jade.args.hpp:161
jade::basic_verification::validate_c
static bool validate_c(const matrix_type &c)
Validates the C matrix and throws an exception if validation fails.
Definition: jade.verification.hpp:38
jade::basic_matrix::get_data
const value_type * get_data() const
Definition: jade.matrix.hpp:542
jade::basic_verification::validate_gf_sizes
static bool validate_gf_sizes(const genotype_matrix_type &g, const matrix_type &f)
Validates the sizes of the G and F matrices and throws an exception if validation fails.
Definition: jade.verification.hpp:177
jade::basic_matrix::get_width
size_t get_width() const
Definition: jade.matrix.hpp:757
jade::basic_genotype_matrix
A template for an abstract class implementing operations for a genotype matrix.
Definition: jade.genotype_matrix.hpp:26
jade::basic_matrix< value_type >::set_high_precision
static void set_high_precision(std::ostream &out)
Sets the specified stream to scientific notation with a high precision.
Definition: jade.matrix.hpp:1154
jade::basic_selscan::execute
void execute()
Executes the program.
Definition: jade.selscan.hpp:76
jade::basic_selscan::genotype_matrix_type
basic_genotype_matrix< value_type > genotype_matrix_type
The genotype matrix type.
Definition: jade.selscan.hpp:32
jade::basic_verification::validate_fc_sizes
static bool validate_fc_sizes(const matrix_type &f, const matrix_type &c)
Validates the sizes of the F and C matrices and throws an exception if validation fails.
Definition: jade.verification.hpp:114
jade::basic_selscan::verification_type
basic_verification< value_type > verification_type
The verification type.
Definition: jade.selscan.hpp:29
jade::basic_verification::validate_f
static bool validate_f(const matrix_type &f)
Validates the F matrix and throws an exception if validation fails.
Definition: jade.verification.hpp:73
jade::basic_matrix::read
void read(const std::string &path)
Reads the matrix values from the specified file.
Definition: jade.matrix.hpp:1059
jade::basic_matrix< value_type >::blas_type
basic_blas< value_type > blas_type
The BLAS type.
Definition: jade.matrix.hpp:27
jade::basic_selscan::matrix_type
basic_matrix< value_type > matrix_type
The matrix type.
Definition: jade.selscan.hpp:26
jade::basic_selscan::value_type
TValue value_type
The value type.
Definition: jade.selscan.hpp:23
jade::basic_matrix::get_height
size_t get_height() const
Definition: jade.matrix.hpp:603
jade::basic_selscan::basic_selscan
basic_selscan(args &a)
Initializes a new instance of the class.
Definition: jade.selscan.hpp:42
jade::basic_matrix::invert
bool invert(value_type &log_det)
Computes and stores the inverse of this matrix using the Cholesky square root method....
Definition: jade.matrix.hpp:796
jade::basic_selscan
A template for a class implementing the selscan program.
Definition: jade.selscan.hpp:20
jade::basic_selscan::genotype_matrix_factory_type
basic_genotype_matrix_factory< TValue > genotype_matrix_factory_type
The genotype matrix factory type.
Definition: jade.selscan.hpp:37
jade::basic_args
A template for a class that helps process command-line arguments.
Definition: jade.args.hpp:19
jade::basic_genotype_matrix_factory
A template for a class that creates genotype matrices based on files and their file extensions.
Definition: jade.genotype_matrix_factory.hpp:21
jade::basic_verification::validate_g
static bool validate_g(const genotype_matrix_type &g)
Validates the G matrix and throws an exception if validation fails.
Definition: jade.verification.hpp:158
jade::basic_error
A template for a class representing an exception thrown from this namespace.
Definition: jade.error.hpp:20
jade::basic_matrix< value_type >