ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.agi_reader.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_AGI_READER_HPP__
8 #define JADE_AGI_READER_HPP__
9 
10 #include "jade.shunting_yard.hpp"
11 
12 namespace jade
13 {
14  ///
15  /// A template for a class that reads an AGI (Admixture Graph Input) file.
16  /// The file provides the following information: A set of branch length
17  /// variable names, a set of admixture proportion parameter names, a
18  /// component size (K), and K*(K-1)/2 number of expressions representing
19  /// C matrix entries.
20  ///
21  /// An example of the file format is as follows:
22  /// <pre>
23  /// # Branch length parameters, range: [0, inf)
24  /// a b c d e f g
25  ///
26  /// # Admixture proportion parameters, range: [0, 1]
27  /// p
28  ///
29  /// # K value
30  /// 3
31  ///
32  /// # Matrix entries, total number should be: K*(K-1)/2
33  /// # They map to a C matrix, e.g. K=3 maps to:
34  /// # 0 1
35  /// # 1 2
36  /// (1 - p) * (b + e + g + f + a) + p * (b + d + a)
37  /// p * a + (1 - p) * (g + f + a)
38  /// c + g + f + a
39  /// </pre>
40  ///
41  template <typename TValue>
43  {
44  public:
45  ///
46  /// The value type.
47  ///
48  typedef TValue value_type;
49 
50  ///
51  /// The shunting yard algorithm used to parse and evaluate expressions.
52  ///
54 
55  ///
56  /// The arguments used to evaluate the shunting yard algorithm.
57  ///
59 
60  ///
61  /// Initializes a new instance of the class by reading and parsing the
62  /// specified input stream.
63  /// \param in The input stream.
64  ///
65  explicit basic_agi_reader(std::istream & in)
66  : _args ()
67  , _branch_names (_read_names(in))
68  , _proportion_names (_read_names(in))
69  , _k (_read_size(in))
70  , _entries (_read_entries(in, _k))
71  {
72  std::string token;
73  if (!!(in >> token))
74  throw jade::error() << "unexpected token: " << token;
75 
76  //
77  // Require that all variable names across both sets are unique. In
78  // doing so, build a set of all defined variable names.
79  //
80  std::set<std::string> defined;
81  const auto vectors = { &_branch_names, &_proportion_names };
82  for (const auto vector : vectors)
83  for (const auto & name : *vector)
84  if (!defined.insert(name).second)
85  throw jade::error()
86  << "duplicate variable name '" << name << "'";
87 
88  //
89  // Require that all variables defined in the expressions are found
90  // in the set of defined variable names.
91  //
92  for (const auto & entry : _entries)
93  for (const auto & pair : entry.get_args())
94  if (defined.insert(pair.first).second)
95  throw jade::error()
96  << "undefined variable name '"
97  << pair.first << "' in expression";
98 
99  //
100  // Create a set of the variables used in the expressions.
101  //
102  std::set<std::string> used;
103  for (const auto & entry : _entries)
104  for (const auto & pair : entry.get_args())
105  used.insert(pair.first);
106 
107  //
108  // Remove unused variables from the branch and proportion sets.
109  //
110  {
111  const auto remove = [&used](std::vector<std::string> & names)
112  {
113  const auto end = std::remove_if(
114  names.begin(),
115  names.end(),
116  [&used](const std::string & name)
117  {
118  return used.find(name) == used.end();
119  });
120 
121  names.erase(end, names.end());
122  };
123 
124  remove(_branch_names);
125  remove(_proportion_names);
126  }
127 
128  //
129  // Now that it is verified that all names are unique and that all
130  // variables in the expressions are defined as either branch or
131  // proportions, initialize the set of default arguments based on
132  // the variables used in the expressions.
133  //
134  for (const auto & name : used)
135  _args[name] = value_type(0);
136  }
137 
138  ///
139  /// Initializes a new instance of the class by reading and parsing the
140  /// file with the specified path.
141  /// \param path The path to the file to read and parse.
142  ///
143  explicit basic_agi_reader(const std::string & path)
144  : _args ()
145  , _branch_names ()
146  , _proportion_names ()
147  , _k (0)
148  , _entries ()
149  {
150  try
151  {
152  std::ifstream in (path);
153  *this = basic_agi_reader(in);
154  }
155  catch (const std::exception & e)
156  {
157  throw jade::error()
158  << "error reading admixture graph input from file '"
159  << path
160  << "': "
161  << e.what();
162  }
163  }
164 
165  ///
166  /// Returns a table of arguments that maps variables to values. The
167  /// table reference returned has all values set to zero. The table can
168  /// be used to evaluate all expression entries.
169  /// \return A table mapping variables to values.
170  ///
171  const args_type & get_args() const
172  {
173  return _args;
174  }
175 
176  ///
177  /// Returns the vector of branch names.
178  /// \return The vector of branch names.
179  ///
180  const std::vector<std::string> & get_branch_names() const
181  {
182  return _branch_names;
183  }
184 
185  ///
186  /// Returns the vector of proportion names.
187  /// \return The vector of proportion names.
188  ///
189  const std::vector<std::string> & get_proportion_names() const
190  {
191  return _proportion_names;
192  }
193 
194  ///
195  /// Returns the number of components.
196  /// \return The number of components.
197  ///
198  std::size_t get_k() const
199  {
200  return _k;
201  }
202 
203  ///
204  /// Returns the vector of expressions for the C matrix entries.
205  /// \return The vector of expressions for the C matrix entries.
206  ///
207  const std::vector<shunting_yard_type> & get_entries() const
208  {
209  return _entries;
210  }
211 
212  private:
213  // --------------------------------------------------------------------
214  static std::vector<shunting_yard_type> _read_entries(
215  std::istream & in,
216  const size_t k)
217  {
218  const auto n = k * (k - 1) / 2;
219  std::vector<shunting_yard_type> entries;
220  entries.reserve(n);
221 
222  for (size_t i = 0; i < n; i++)
223  entries.emplace_back(_read_line(in));
224 
225  return entries;
226  }
227 
228  // --------------------------------------------------------------------
229  static std::string _read_line(std::istream & in)
230  {
231  for (;;)
232  {
233  std::string line;
234  if (!std::getline(in, line))
235  throw jade::error("unexpected end of file");
236 
237  if (!line.empty() && line[0] != '#')
238  return line;
239  }
240  }
241 
242  // --------------------------------------------------------------------
243  static std::vector<std::string> _read_names(std::istream & in)
244  {
245  std::vector<std::string> names;
246 
247  std::istringstream line (_read_line(in));
248 
249  for (;;)
250  {
251  std::string name;
252  if (!(line >> name))
253  return names;
254 
255  names.push_back(_validate_name(name));
256  }
257  }
258 
259  // --------------------------------------------------------------------
260  static size_t _read_size(std::istream & in)
261  {
262  const auto line = _read_line(in);
263  std::istringstream line_in (line);
264 
265  size_t size;
266  if (!(line_in >> size) || size < 2)
267  throw jade::error("invalid size: ") << line;
268 
269  std::string token;
270  if (!(line_in >> token))
271  return size;
272 
273  throw jade::error() << "unexpected token: " << token;
274  }
275 
276  // --------------------------------------------------------------------
277  static const std::string & _validate_name(const std::string & name)
278  {
279  static const auto fn = [](const char ch)
280  {
281  return std::isalnum(ch) || ch == '_';
282  };
283 
284  if (!name.empty() && std::isalpha(name[0]))
285  if (std::all_of(name.begin() + 1, name.end(), fn))
286  return name;
287 
288  throw jade::error() << "invalid name: '" << name << "'";
289  }
290 
291  args_type _args;
292  std::vector<std::string> _branch_names;
293  std::vector<std::string> _proportion_names;
294  std::size_t _k;
295  std::vector<shunting_yard_type> _entries;
296  };
297 }
298 
299 #endif // JADE_AGI_READER_HPP__
jade::basic_agi_reader::basic_agi_reader
basic_agi_reader(const std::string &path)
Initializes a new instance of the class by reading and parsing the file with the specified path.
Definition: jade.agi_reader.hpp:143
jade::basic_shunting_yard::args_type
std::map< std::string, float_type > args_type
A map of strings to floating-point values. The string represent the variable names parsed from the ex...
Definition: jade.shunting_yard.hpp:33
jade::basic_agi_reader::value_type
TValue value_type
The value type.
Definition: jade.agi_reader.hpp:48
jade::basic_error::what
virtual const char * what() const
jade::basic_agi_reader
A template for a class that reads an AGI (Admixture Graph Input) file. The file provides the followin...
Definition: jade.agi_reader.hpp:43
jade::basic_agi_reader::get_k
std::size_t get_k() const
Returns the number of components.
Definition: jade.agi_reader.hpp:198
jade::basic_agi_reader::args_type
shunting_yard_type::args_type args_type
The arguments used to evaluate the shunting yard algorithm.
Definition: jade.agi_reader.hpp:58
jade::basic_agi_reader::get_branch_names
const std::vector< std::string > & get_branch_names() const
Returns the vector of branch names.
Definition: jade.agi_reader.hpp:180
jade::basic_agi_reader::get_entries
const std::vector< shunting_yard_type > & get_entries() const
Returns the vector of expressions for the C matrix entries.
Definition: jade.agi_reader.hpp:207
jade::basic_agi_reader::get_args
const args_type & get_args() const
Returns a table of arguments that maps variables to values. The table reference returned has all valu...
Definition: jade.agi_reader.hpp:171
jade::basic_agi_reader::shunting_yard_type
jade::basic_shunting_yard< value_type > shunting_yard_type
The shunting yard algorithm used to parse and evaluate expressions.
Definition: jade.agi_reader.hpp:53
jade::basic_shunting_yard
A template for a class that implements the shunting-yard algorithm. The implementation supports '+',...
Definition: jade.shunting_yard.hpp:23
jade::basic_agi_reader::get_proportion_names
const std::vector< std::string > & get_proportion_names() const
Returns the vector of proportion names.
Definition: jade.agi_reader.hpp:189
jade::basic_error
A template for a class representing an exception thrown from this namespace.
Definition: jade.error.hpp:20
jade::basic_agi_reader::basic_agi_reader
basic_agi_reader(std::istream &in)
Initializes a new instance of the class by reading and parsing the specified input stream.
Definition: jade.agi_reader.hpp:65