ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.neighbor_joining.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_NEIGHBOR_JOINING_HPP__
8 #define JADE_NEIGHBOR_JOINING_HPP__
9 
10 #include "jade.matrix.hpp"
11 
12 namespace jade
13 {
14  ///
15  /// A template for a class that implements the neighbor joining algorithm.
16  ///
17  template <typename TValue>
19  {
20  public:
21  /// The value type.
22  typedef TValue value_type;
23 
24  /// The matrix type.
26 
27  ///
28  /// Initializes a new instance of the class.
29  ///
31  const matrix_type & distances) ///< The distance matrix.
32  : _children ()
33  , _lengths ()
34  , _names ()
35  , _root (invalid_id)
36  {
37  //
38  // Allow empty matrices even though no tree will be produced from
39  // the write method.
40  //
41  if (distances.is_empty())
42  return;
43 
44  //
45  // Allow just one node even though the tree produced from the write
46  // method will consist on only the node name ("0").
47  //
48  assert(distances.is_square());
49  auto n = distances.get_height();
50  id_type next_id = 0;
51  if (n == 1)
52  {
53  _root = _add_leaf(next_id);
54  return;
55  }
56 
57  //
58  // Prepare a list of ids for the initial set of nodes.
59  //
60  typedef std::vector<id_type> vector_type;
61  vector_type x;
62  for (size_t i = 0; i < n; i++)
63  x.push_back(_add_leaf(next_id));
64 
65  //
66  // Prepare the distance matrix that will be reduced by the
67  // algorithm.
68  //
69  matrix_type d (distances);
70 
71  //
72  // Loop until there are only two nodes remaining in the distance
73  // matrix.
74  //
75  while (n > 2)
76  {
77  //
78  // Find the minimum Q value in the matrix, and use it to find
79  // the two nodes that will be joined. Join them by creating a
80  // new parent node.
81  //
82  const q_data q (d);
83  const id_type id (next_id++);
84  _add_parent(id, x[q.i], q.d_ik);
85  _add_parent(id, x[q.j], q.d_jk);
86 
87  //
88  // Prepare the new, reduced distance matrix as well as the
89  // corresponding id vector.
90  //
91  matrix_type dd (n - 1, n - 1);
92  vector_type xx { id };
93  for (size_t r = 0, rr = 1; r < n; r++)
94  {
95  if (r == q.i || r == q.j)
96  continue;
97  xx.push_back(x[r]);
98  dd(rr, 0) = value_type(0.5) *
99  (d(r, q.i) + d(r, q.j) - q.d_ij);
100  for (size_t c = 0, cc = 1; c < r; c++)
101  if (c != q.i && c != q.j)
102  dd(rr, cc++) = d(r, c);
103  rr++;
104  }
105 
106  //
107  // Copy the lower triangle to the upper triangle so the data
108  // in the next Q matrix matches the expected values.
109  //
110  dd.copy_lower_to_upper();
111 
112  d.swap(dd);
113  x.swap(xx);
114  n--;
115  }
116 
117  //
118  // Connect the last two nodes; note the loop above places new nodes
119  // at index zero, so here it is known that the leaf node must be at
120  // index 1, and so the root note must be at index 0.
121  //
122  _root = x[0];
123  _add_parent(_root, x[1], d(1, 0));
124  }
125 
126  ///
127  /// \return A string representation of this class.
128  ///
129  std::string str() const
130  {
131  std::ostringstream out;
132  write(out);
133  return out.str();
134  }
135 
136  ///
137  /// Writes the constructed tree in Newick format to the specified output
138  /// stream.
139  ///
140  void write(
141  std::ostream & out) ///< The output stream.
142  const
143  {
144  if (_root != invalid_id)
145  {
146  _write(out, _root);
147  out << ';';
148  }
149  }
150 
151  private:
152  typedef size_t id_type;
153 
154  static constexpr id_type invalid_id =
155  std::numeric_limits<id_type>::max();
156 
157  // --------------------------------------------------------------------
158  id_type _add_leaf(id_type & next_id)
159  {
160  const auto id = next_id++;
161  _children[id];
162  _names.insert(id);
163  return id;
164  }
165 
166  // --------------------------------------------------------------------
167  void _add_parent(
168  const id_type parent_id,
169  const id_type child_id,
170  const value_type child_length)
171  {
172  _children[parent_id].push_back(child_id);
173  _lengths[child_id] = child_length;
174  }
175 
176  // --------------------------------------------------------------------
177  void _write(std::ostream & out, const id_type id) const
178  {
179  const auto & children = _children.find(id)->second;
180  if (!children.empty())
181  {
182  out << "(";
183 
184  auto iter = children.begin();
185  _write(out, *iter);
186 
187  while (++iter != children.end())
188  {
189  out << ",";
190  _write(out, *iter);
191  }
192 
193  out << ")";
194  }
195 
196  const auto name_iter = _names.find(id);
197  if (name_iter != _names.end())
198  out << id;
199 
200  const auto length_iter = _lengths.find(id);
201  if (length_iter != _lengths.end())
202  out << ":" << length_iter->second;
203  }
204 
205  // --------------------------------------------------------------------
206  struct q_data
207  {
208  value_type d_ij;
209  value_type d_ik;
210  value_type d_jk;
211  size_t i;
212  size_t j;
213 
214  // ----------------------------------------------------------------
215  explicit q_data(const matrix_type & d)
216  : d_ij ()
217  , d_ik ()
218  , d_jk ()
219  , i ()
220  , j ()
221  {
222  static const auto k_0_5 = value_type(0.5);
223 
224  const auto n = d.get_height();
225  const auto k_n_2 = value_type(n - 2);
226 
227  //
228  // Cache the row sums; column sums would work equally well
229  // because the matrix is symmetric.
230  //
231  std::vector<value_type> sigma;
232  for (size_t c = 0; c < n; c++)
233  sigma.push_back(d.get_row_sum(c));
234 
235  //
236  // Compute the values of the Q matrix.
237  //
238  matrix_type q (n, n);
239  for (size_t r = 0; r < n; r++)
240  for (size_t c = 0; c < r; c++)
241  q(r, c) = k_n_2 * d(r, c) - sigma[r] - sigma[c];
242 
243  //
244  // Find the cell with the minimum value.
245  //
246  i = 1, j = 0;
247  for (size_t r = 2; r < n; r++)
248  for (size_t c = 0; c < r; c++)
249  if (q(r, c) < q(i, j))
250  i = r, j = c;
251 
252  //
253  // Compute distances between the new nodes.
254  //
255  d_ij = d(i, j);
256  d_ik = k_0_5 * (d_ij + ((sigma[i] - sigma[j]) / k_n_2));
257  d_jk = d_ij - d_ik;
258  }
259  };
260 
261  std::map<id_type, std::vector<id_type>> _children;
262  std::map<id_type, value_type> _lengths;
263  std::set<id_type> _names;
264  id_type _root;
265  };
266 }
267 
268 #endif // JADE_NEIGHBOR_JOINING_HPP__
jade::basic_neighbor_joining::str
std::string str() const
Definition: jade.neighbor_joining.hpp:129
jade::basic_neighbor_joining
A template for a class that implements the neighbor joining algorithm.
Definition: jade.neighbor_joining.hpp:19
jade::basic_matrix::is_square
bool is_square() const
Definition: jade.matrix.hpp:917
jade::basic_neighbor_joining::write
void write(std::ostream &out) const
Writes the constructed tree in Newick format to the specified output stream.
Definition: jade.neighbor_joining.hpp:140
jade::basic_matrix::swap
void swap(basic_matrix &other)
Swaps this matrix and another matrix.
Definition: jade.matrix.hpp:1209
jade::basic_neighbor_joining::matrix_type
basic_matrix< value_type > matrix_type
The matrix type.
Definition: jade.neighbor_joining.hpp:25
jade::basic_matrix::is_empty
bool is_empty() const
Definition: jade.matrix.hpp:860
jade::basic_neighbor_joining::basic_neighbor_joining
basic_neighbor_joining(const matrix_type &distances)
Initializes a new instance of the class.
Definition: jade.neighbor_joining.hpp:30
jade::basic_matrix::get_height
size_t get_height() const
Definition: jade.matrix.hpp:603
jade::basic_neighbor_joining::value_type
TValue value_type
The value type.
Definition: jade.neighbor_joining.hpp:22
jade::basic_matrix::copy_lower_to_upper
void copy_lower_to_upper()
Copies the elements in the lower triangle of the square matrix to the corresponding upper triangle el...
Definition: jade.matrix.hpp:266
jade::basic_matrix< value_type >