ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.newick.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_NEWICK_HPP__
8 #define JADE_NEWICK_HPP__
9 
10 #include "jade.scanner.hpp"
11 
12 namespace jade
13 {
14  ///
15  /// A template class representing a node from a Newick tree.
16  ///
17  template <typename TValue>
19  {
20  public:
21  /// The value type.
22  typedef TValue value_type;
23 
24  /// The constant Newick node pointer type.
26 
27  /// The Newick node pointer type.
29 
30  /// The children vector type.
31  typedef std::vector<pointer_type> children_type;
32 
33  /// The scanner type.
35 
36  ///
37  /// Reclaims resources used by this instance and its children.
38  ///
40  {
41  _destruct();
42  }
43 
44  ///
45  /// Initializes a new instance of the class. Initially, the instance
46  /// has no name, no length, no parent, and no children.
47  ///
49  : _children ()
50  , _has_length (false)
51  , _id (0)
52  , _length (0)
53  , _name ()
54  , _parent ()
55  {
56  }
57 
58  ///
59  /// Initializes a new instance of the class from the specified input
60  /// stream. The input stream must end with a semicolon. If an error
61  /// occurs, the method throws an exception.
62  ///
63  /// \throw jade::error Thrown if there is an error parsing the tree.
64  ///
66  std::istream & in) ///< The input stream to read.
68  {
69  scanner_type s (in);
70  _read(s);
71  }
72 
73  ///
74  /// Initializes a new instance of the class from the specified input
75  /// string. The input string must end with a semicolon. If an error
76  /// occurs, the method throws an exception.
77  ///
78  /// \throw jade::error Thrown if there is an error parsing the tree.
79  ///
81  const std::string & in) ///< The input string to read.
83  {
84  scanner_type s (in);
85  _read(s);
86  }
87 
88  ///
89  /// Erases the length.
90  ///
91  inline void erase_length()
92  {
93  _length = 0;
94  _has_length = false;
95  }
96 
97  ///
98  /// \return A colleciton of all descendents that match the specified
99  /// predicate. The method searches in depth-first order and examines
100  /// this node as well.
101  ///
102  template <typename TPredicate>
103  std::set<const_pointer_type> find_all(
104  const TPredicate predicate) ///< The predicate to match.
105  const
106  {
107  std::set<const_pointer_type> container;
108 
109  this->for_each([predicate, &container](
110  const_pointer_type node)
111  -> void
112  {
113  if (predicate(node))
114  container.insert(node);
115  });
116 
117  return container;
118  }
119 
120  ///
121  /// \return A colleciton of all descendents that match the specified
122  /// predicate. The method searches in depth-first order and examines
123  /// this node as well.
124  ///
125  template <typename TPredicate>
126  std::set<pointer_type> find_all(
127  const TPredicate predicate) ///< The predicate to match.
128  {
129  std::set<pointer_type> container;
130 
131  this->for_each([predicate, &container](
132  pointer_type node)
133  -> void
134  {
135  if (predicate(node))
136  container.insert(node);
137  });
138 
139  return container;
140  }
141 
142  ///
143  /// \return The collection of descendents of this node.
144  ///
145  inline std::set<const_pointer_type> find_descendents() const
146  {
147  return find_all([this](const_pointer_type node) -> bool
148  {
149  return node != this;
150  });
151  }
152 
153  ///
154  /// \return The collection of descendents of this node.
155  ///
156  inline std::set<pointer_type> find_descendents()
157  {
158  return find_all([this](pointer_type node) -> bool
159  {
160  return node != this;
161  });
162  }
163 
164  ///
165  /// \return The first descendent that matches the specified predicate.
166  /// The method searches in depth-first order and examines this node as
167  /// well.
168  ///
169  template <typename TPredicate>
171  const TPredicate predicate) ///< The predicate to match.
172  const
173  {
174  for (auto child : _children)
175  {
176  const auto first = child->find_first(predicate);
177  if (nullptr != first)
178  return first;
179  }
180 
181  return predicate(this) ? this : nullptr;
182  }
183 
184  ///
185  /// \return The first descendent that matches the specified predicate.
186  /// The method searches in depth-first order and examines this node as
187  /// well.
188  ///
189  template <typename TPredicate>
191  const TPredicate predicate) ///< The predicate to match.
192  {
193  for (auto child : _children)
194  {
195  const pointer_type first = child->find_first(predicate);
196  if (nullptr != first)
197  return first;
198  }
199 
200  return predicate(this) ? this : nullptr;
201  }
202 
203  ///
204  /// \return The node with the specified id.
205  ///
207  const int id) ///< The id of the node to find.
208  const
209  {
210  return find_first([id](const_pointer_type node) -> bool
211  {
212  return node->_id == id;
213  });
214  }
215 
216  ///
217  /// \return The node with the specified id.
218  ///
220  const int id) ///< The id of the node to find.
221  {
222  return find_first([id](pointer_type node) -> bool
223  {
224  return node->_id == id;
225  });
226  }
227 
228  ///
229  /// \return The set of leaf nodes. If this node is a leaf node, it is
230  /// returned in the collection.
231  ///
232  std::set<const_pointer_type> find_leafs() const
233  {
234  return find_all([](const_pointer_type node) -> bool
235  {
236  return node->is_leaf();
237  });
238  }
239 
240  ///
241  /// \return The set of leaf nodes. If this node is a leaf node, it is
242  /// returned in the collection.
243  ///
244  std::set<pointer_type> find_leafs()
245  {
246  return find_all([](pointer_type node) -> bool
247  {
248  return node->is_leaf();
249  });
250  }
251 
252  ///
253  /// \return The node with the specified name.
254  ///
256  char const * const name) ///< The name of the node to find.
257  const
258  {
259  return find_first([name](const_pointer_type node) -> bool
260  {
261  return node->_name == name;
262  });
263  }
264 
265  ///
266  /// \return The node with the specified name.
267  ///
269  char const * const name) ///< The name of the node to find.
270  {
271  return find_first([name](pointer_type node) -> bool
272  {
273  return node->_name == name;
274  });
275  }
276 
277  ///
278  /// \return The root of the tree associated with this node.
279  ///
281  {
282  return is_root() ? this : _parent->find_root();
283  }
284 
285  ///
286  /// \return The root of the tree associated with this node.
287  ///
289  {
290  return is_root() ? this : _parent->find_root();
291  }
292 
293  ///
294  /// Executes the specified action for all descendent nodes, including
295  /// this instance, in depth-first order.
296  ///
297  template <typename TAction>
298  void for_each(
299  const TAction action) ///< The action to perform.
300  const
301  {
302  for (auto child : _children)
303  child->for_each(action);
304 
305  action(this);
306  }
307 
308  ///
309  /// Executes the specified action for all descendent nodes, including
310  /// this instance, in depth-first order.
311  ///
312  template <typename TAction>
313  void for_each(
314  const TAction action) ///< The action to perform.
315  {
316  for (auto child : _children)
317  child->for_each(action);
318 
319  action(this);
320  }
321 
322  ///
323  /// \return A new instance based on the specified file.
324  ///
325  /// \throw jade::error Thrown if there is an error parsing the tree.
326  ///
327  inline static pointer_type from_file(
328  char const * const path) ///< The path to the file.
329  {
330  std::ifstream in (path);
331  return new basic_newick_node(in);
332  }
333 
334  ///
335  /// \return A new instance based on the specified file.
336  ///
337  /// \throw jade::error Thrown if there is an error parsing the tree.
338  ///
339  inline static basic_newick_node * from_file(
340  const std::string & path) ///< The path to the file to decode.
341  {
342  return from_file(path.c_str());
343  }
344 
345  ///
346  /// \return The collection of immediate children of this node.
347  ///
348  inline const children_type & get_children() const
349  {
350  return _children;
351  }
352 
353  ///
354  /// \return The unique ID for this node within the tree. If the tree is
355  /// rerooted, this id persists into the new tree.
356  ///
357  inline int get_id() const
358  {
359  return _id;
360  }
361 
362  ///
363  /// \return The length of this node; the length is valid only if the
364  /// has_length method returns true.
365  ///
366  inline value_type get_length() const
367  {
368  return _length;
369  }
370 
371  ///
372  /// \return The name of this node.
373  ///
374  inline const std::string & get_name() const
375  {
376  return _name;
377  }
378 
379  ///
380  /// \return The parent of this node.
381  ///
383  {
384  return _parent;
385  }
386 
387  ///
388  /// \return The parent of this node.
389  ///
391  {
392  return _parent;
393  }
394 
395  ///
396  /// \return True if the node has a defined length.
397  ///
398  inline bool has_length() const
399  {
400  return _has_length;
401  }
402 
403  ///
404  /// \return True if the node has a name.
405  ///
406  inline bool has_name() const
407  {
408  return !_name.empty();
409  }
410 
411  ///
412  /// \return True if the node has no children; i.e. it is a leaf node.
413  ///
414  inline bool is_leaf() const
415  {
416  return _children.empty();
417  }
418 
419  ///
420  /// \return True if the node has no parent; i.e. it is a root node.
421  ///
422  inline bool is_root() const
423  {
424  return _parent == nullptr;
425  }
426 
427  ///
428  /// Creates a new tree based on the tree associated with this node; in
429  /// the tree returned, a copy of this node is the root.
430  ///
431  /// \return A new Newick node, the root of an equivalent tree.
432  ///
434  {
435  //
436  // Reroot the structure of the tree.
437  //
438  auto root = _clone();
439  _copy_children(this, root, nullptr);
440  _copy_parents(this, root);
441 
442  //
443  // To preserve information, assign the length
444  // to the old root to this new root.
445  //
446  auto old_root = find_root();
447  root->_length = old_root->_length;
448  root->_has_length = old_root->_has_length;
449 
450  return root;
451  }
452 
453  ///
454  /// Assigns the length.
455  ///
457  const value_type length) ///< The length to assign.
458  {
459  assert(!std::isnan(length));
460  assert(!std::isinf(length));
461  _length = length;
462  _has_length = true;
463  }
464 
465  ///
466  /// Assigns the name.
467  ///
468  inline void set_name(
469  char const * const name) ///< The name to assign.
470  {
471  assert(name != nullptr);
472  _name = name;
473  }
474 
475  ///
476  /// Encodes this instance and returns the output as a string. A
477  /// semicolon is written as the last character of the file.
478  ///
479  /// \return The encoded string.
480  ///
481  inline std::string str() const
482  {
483  std::ostringstream out;
484  write(out);
485  return out.str();
486  }
487 
488  ///
489  /// Writes this instance into the specified output stream. The output
490  /// stream receives a semicolon after the tree data.
491  ///
492  void write(
493  std::ostream & out) ///< The output stream.
494  const
495  {
496  _write(out);
497  out << ';';
498  }
499 
500  ///
501  /// Writes this instance into the specified file. A semicolon is
502  /// written as the last character of the file.
503  ///
504  inline void write(
505  char const * const path) ///< The path to the output file.
506  const
507  {
508  std::ofstream out (path);
509  write(out);
510  out << std::endl;
511  }
512 
513  ///
514  /// Writes this instance into the specified file. A semicolon is
515  /// written as the last character of the file.
516  ///
517  inline void write(
518  const std::string & path) ///< The path to the output file.
519  const
520  {
521  write(path.c_str());
522  }
523 
524  private:
525  basic_newick_node(const basic_newick_node &) = delete;
526  basic_newick_node & operator = (const basic_newick_node &) = delete;
527 
528  // --------------------------------------------------------------------
529  pointer_type _clone() const
530  {
531  auto node = new basic_newick_node();
532  node->_has_length = _has_length;
533  node->_id = _id;
534  node->_length = _length;
535  node->_name = _name;
536  return node;
537  }
538 
539  // --------------------------------------------------------------------
540  static void _copy_children(
541  const_pointer_type source,
542  pointer_type target,
543  const_pointer_type excluded)
544  {
545  //
546  // Loop over the children from the source node.
547  //
548  for (auto source_child : source->_children)
549  {
550  //
551  // Skip the excluded child, if necessary.
552  //
553  if (source_child == excluded)
554  continue;
555 
556  //
557  // Clone the source child and insert it into the target children.
558  //
559  auto target_child = source_child->_clone();
560  target->_children.push_back(target_child);
561  target_child->_parent = target;
562 
563  //
564  // Recursively copy children.
565  //
566  _copy_children(source_child, target_child, nullptr);
567  }
568  }
569 
570  // --------------------------------------------------------------------
571  static void _copy_parents(
572  const_pointer_type source,
573  pointer_type target)
574  {
575  //
576  // Do nothing if the source is the root.
577  //
578  const auto source_parent = source->_parent;
579  if (source_parent == nullptr)
580  return;
581 
582  //
583  // Clone the source parent and assign it as a child of the target.
584  //
585  auto target_child = source_parent->_clone();
586  target->_children.push_back(target_child);
587  target_child->_parent = target;
588 
589  //
590  // Recursively process the children and parents.
591  //
592  _copy_children(source_parent, target_child, source);
593  _copy_parents(source_parent, target_child);
594 
595  //
596  // Reverse the direction of the length for the new child.
597  //
598  target_child->_length = source->_length;
599  target_child->_has_length = source->_has_length;
600  }
601 
602  // --------------------------------------------------------------------
603  inline void _destruct()
604  {
605  for (auto child : _children)
606  delete child;
607  _children.clear();
608  }
609 
610  // --------------------------------------------------------------------
611  void _read(scanner_type & in)
612  {
613  //
614  // This method executes from the constructor and also throws
615  // exceptions, so take care to reclaim resources (the children) in
616  // case an exception occurs.
617  //
618  try
619  {
620  //
621  // Recursively create all nodes in the tree.
622  //
623  auto node_count = 0;
624  _read(in, node_count);
625 
626  //
627  // Require the last non-whitespace character, the semicolon.
628  //
629  in.expect(';');
630 
631  //
632  // Ensure no more non-whitespace characters are in the stream.
633  //
634  in.skip_whitespace();
635  if (in.is_end_of_data())
636  return;
637 
638  throw jade::error()
639  << "expected end of stream but encountered "
640  << " '" << in.read_token() << "'";
641  }
642  catch (...)
643  {
644  //
645  // Reclaim resources before rethrowing the exception.
646  //
647  _destruct();
648  throw;
649  }
650  }
651 
652  // --------------------------------------------------------------------
653  void _read(scanner_type & in, int & node_count)
654  {
655  static const char delims[] = ";:(),";
656 
657  //
658  // Create a new id for each node created.
659  //
660  _id = ++node_count;
661 
662  //
663  // Determine if this node has children.
664  //
665  if (in.try_char('('))
666  {
667  //
668  // Loop over all children, which are separated by colons.
669  //
670  do
671  {
672  //
673  // Create the child, storing it into the collection of
674  // children, and updating the child to reference the parent.
675  //
676  auto child = new basic_newick_node();
677  child->_parent = this;
678  _children.push_back(child);
679  child->_read(in, node_count);
680  }
681  while (in.try_char(','));
682 
683  //
684  // Require a closing parenthesis after the last child.
685  //
686  in.expect(')');
687  }
688 
689  //
690  // Parse the name, if any.
691  //
692  _name = _trim(in.read_token(delims));
693 
694  //
695  // If the stream provides a colon, parse the length.
696  //
697  if (in.try_char(':'))
698  {
699  _length = in.read_real<value_type>();
700  _has_length = true;
701  }
702  }
703 
704  // --------------------------------------------------------------------
705  static std::string _trim(const std::string & s)
706  {
707  size_t begin = 0;
708  size_t end = s.length();
709  while (begin < end && ::isspace(s[begin]))
710  begin++;
711  while (end > begin && ::isspace(s[end - 1]))
712  end--;
713  return s.substr(begin, end - begin);
714  }
715 
716  // --------------------------------------------------------------------
717  void _write(std::ostream & out) const
718  {
719  const auto end = _children.end();
720 
721  //
722  // Loop over each child, if any.
723  //
724  auto iter = _children.begin();
725  if (iter != end)
726  {
727  //
728  // Enclose all children in parentheses.
729  //
730  out << '(';
731  (*iter)->_write(out);
732 
733  //
734  // Separate children with commas.
735  //
736  while (++iter != end)
737  {
738  out << ',';
739  (*iter)->_write(out);
740  }
741 
742  //
743  // Close the parentheses for the children.
744  //
745  out << ')';
746  }
747 
748  //
749  // Write the name, if one exists.
750  //
751  if (has_name())
752  out << _name;
753 
754  //
755  // Write the length, if one exists.
756  //
757  if (_has_length)
758  out << ':' << _length;
759  }
760 
761  children_type _children; ///< A container for children.
762  bool _has_length; ///< A flag for a defined length.
763  int _id; ///< A unique ID for the node.
764  value_type _length; ///< A possible length.
765  std::string _name; ///< A name.
766  pointer_type _parent; ///< A parent node.
767  };
768 }
769 
770 ///
771 /// Writes the specified node to the output stream.
772 ///
773 /// \param lhs The output stream.
774 /// \param rhs The newick node.
775 /// \return The output stream.
776 ///
777 template <typename TValue>
778 inline std::ostream & operator << (
779  std::ostream & lhs,
781 {
782  rhs.write(lhs);
783  return lhs;
784 }
785 
786 #endif // JADE_NEWICK_HPP__
jade::basic_newick_node::get_name
const std::string & get_name() const
Definition: jade.newick.hpp:374
jade::basic_newick_node::basic_newick_node
basic_newick_node()
Initializes a new instance of the class. Initially, the instance has no name, no length,...
Definition: jade.newick.hpp:48
jade::basic_newick_node::find_leafs
std::set< pointer_type > find_leafs()
Definition: jade.newick.hpp:244
jade::basic_newick_node::find_descendents
std::set< pointer_type > find_descendents()
Definition: jade.newick.hpp:156
jade::basic_newick_node::~basic_newick_node
~basic_newick_node()
Reclaims resources used by this instance and its children.
Definition: jade.newick.hpp:39
jade::basic_newick_node::is_leaf
bool is_leaf() const
Definition: jade.newick.hpp:414
jade::basic_newick_node::find_first
const_pointer_type find_first(const TPredicate predicate) const
Definition: jade.newick.hpp:170
jade::basic_newick_node::reroot
pointer_type reroot() const
Creates a new tree based on the tree associated with this node; in the tree returned,...
Definition: jade.newick.hpp:433
jade::basic_newick_node::from_file
static basic_newick_node * from_file(const std::string &path)
Definition: jade.newick.hpp:339
jade::basic_newick_node::erase_length
void erase_length()
Erases the length.
Definition: jade.newick.hpp:91
jade::basic_newick_node::const_pointer_type
const basic_newick_node * const_pointer_type
The constant Newick node pointer type.
Definition: jade.newick.hpp:25
jade::basic_newick_node::get_parent
pointer_type get_parent()
Definition: jade.newick.hpp:390
jade::basic_newick_node::for_each
void for_each(const TAction action) const
Executes the specified action for all descendent nodes, including this instance, in depth-first order...
Definition: jade.newick.hpp:298
jade::basic_newick_node::find_name
const_pointer_type find_name(char const *const name) const
Definition: jade.newick.hpp:255
jade::basic_newick_node::get_id
int get_id() const
Definition: jade.newick.hpp:357
jade::basic_newick_node::find_root
const_pointer_type find_root() const
Definition: jade.newick.hpp:280
jade::basic_newick_node::write
void write(std::ostream &out) const
Writes this instance into the specified output stream. The output stream receives a semicolon after t...
Definition: jade.newick.hpp:492
jade::basic_newick_node::write
void write(const std::string &path) const
Writes this instance into the specified file. A semicolon is written as the last character of the fil...
Definition: jade.newick.hpp:517
jade::basic_newick_node
A template class representing a node from a Newick tree.
Definition: jade.newick.hpp:19
jade::basic_newick_node::has_name
bool has_name() const
Definition: jade.newick.hpp:406
jade::basic_newick_node::set_length
void set_length(const value_type length)
Assigns the length.
Definition: jade.newick.hpp:456
jade::basic_newick_node::find_all
std::set< const_pointer_type > find_all(const TPredicate predicate) const
Definition: jade.newick.hpp:103
jade::basic_newick_node::scanner_type
basic_scanner< char > scanner_type
The scanner type.
Definition: jade.newick.hpp:34
jade::basic_newick_node::set_name
void set_name(char const *const name)
Assigns the name.
Definition: jade.newick.hpp:468
jade::basic_newick_node::find_descendents
std::set< const_pointer_type > find_descendents() const
Definition: jade.newick.hpp:145
jade::basic_newick_node::pointer_type
basic_newick_node * pointer_type
The Newick node pointer type.
Definition: jade.newick.hpp:28
jade::basic_newick_node::find_root
pointer_type find_root()
Definition: jade.newick.hpp:288
jade::basic_newick_node::get_parent
const_pointer_type get_parent() const
Definition: jade.newick.hpp:382
jade::basic_newick_node::find_leafs
std::set< const_pointer_type > find_leafs() const
Definition: jade.newick.hpp:232
jade::basic_scanner
A template class that parses text and throws meaningful error messages.
Definition: jade.scanner.hpp:19
jade::basic_newick_node::find_id
const_pointer_type find_id(const int id) const
Definition: jade.newick.hpp:206
jade::basic_newick_node::find_all
std::set< pointer_type > find_all(const TPredicate predicate)
Definition: jade.newick.hpp:126
jade::basic_newick_node::value_type
TValue value_type
The value type.
Definition: jade.newick.hpp:22
jade::basic_newick_node::write
void write(char const *const path) const
Writes this instance into the specified file. A semicolon is written as the last character of the fil...
Definition: jade.newick.hpp:504
jade::basic_newick_node::from_file
static pointer_type from_file(char const *const path)
Definition: jade.newick.hpp:327
jade::basic_newick_node::str
std::string str() const
Encodes this instance and returns the output as a string. A semicolon is written as the last characte...
Definition: jade.newick.hpp:481
jade::basic_newick_node::find_name
pointer_type find_name(char const *const name)
Definition: jade.newick.hpp:268
jade::basic_error
A template for a class representing an exception thrown from this namespace.
Definition: jade.error.hpp:20
jade::basic_newick_node::get_children
const children_type & get_children() const
Definition: jade.newick.hpp:348
jade::basic_newick_node::find_id
pointer_type find_id(const int id)
Definition: jade.newick.hpp:219
jade::basic_newick_node::find_first
pointer_type find_first(const TPredicate predicate)
Definition: jade.newick.hpp:190
jade::basic_newick_node::children_type
std::vector< pointer_type > children_type
The children vector type.
Definition: jade.newick.hpp:31
jade::basic_newick_node::is_root
bool is_root() const
Definition: jade.newick.hpp:422
jade::basic_newick_node::for_each
void for_each(const TAction action)
Executes the specified action for all descendent nodes, including this instance, in depth-first order...
Definition: jade.newick.hpp:313
jade::basic_newick_node::basic_newick_node
basic_newick_node(const std::string &in)
Initializes a new instance of the class from the specified input string. The input string must end wi...
Definition: jade.newick.hpp:80
jade::basic_newick_node::has_length
bool has_length() const
Definition: jade.newick.hpp:398
jade::basic_newick_node::basic_newick_node
basic_newick_node(std::istream &in)
Initializes a new instance of the class from the specified input stream. The input stream must end wi...
Definition: jade.newick.hpp:65
jade::basic_newick_node::get_length
value_type get_length() const
Definition: jade.newick.hpp:366