ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.bgl_reader.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_BGL_READER_HPP__
8 #define JADE_BGL_READER_HPP__
9 
10 #include "jade.error.hpp"
11 
12 namespace jade
13 {
14  ///
15  /// A template for a class that reads BEAGLE data and is capable of writing
16  /// it to a stream as a likelihood genotype matrix.
17  ///
18  template <typename TValue>
20  {
21  public:
22  /// The value type.
23  typedef TValue value_type;
24 
25  ///
26  /// Initializes a new instance of the class.
27  ///
28  explicit basic_bgl_reader(
29  std::istream & in) ///< The input stream.
30  : _buf ()
31  , _rows (0)
32  , _cols (0)
33  {
34  std::string temp;
35  std::getline(in, temp);
36  if (!in.good())
37  throw error() << "failed to read header of beagle data.";
38 
39  std::istringstream header_in (temp);
40  while (header_in >> temp)
41  _rows++;
42  if (_rows < 3 || _rows % 3 != 0)
43  throw error() << "invalid number of columns "
44  << "in beagle header: " << _rows;
45 
46  _rows = (_rows - 3) / 3;
47 
48  for (;;)
49  {
50  std::string marker;
51  if (!(in >> marker))
52  break;
53 
54  _cols++;
55 
56  if (!(in >> temp))
57  throw error()
58  << "cannot read first allele for marker '"
59  << marker << "' on line " << _cols + 1
60  << " of beagle data";
61 
62  if (!(in >> temp))
63  throw error()
64  << "cannot read second allele for marker '"
65  << marker << "' on line " << _cols + 1
66  << " of beagle data";
67 
68  for (size_t i = 0; i < _rows; i++)
69  {
70  for (size_t j = 0; j < 3; j++)
71  {
72  value_type value;
73  if (!(in >> value) || value < value_type(0) ||
74  value > value_type(1))
75  throw error()
76  << "encountered invalid percentage in column "
77  << 3 + i * 3 + j << " for marker '" << marker
78  << "' on line " << _cols + 1
79  << " of beagle data";
80  _buf.push_back(value);
81  }
82  }
83 
84  if (!in.good())
85  throw error() << "invalid data on line " << _cols + 1
86  << " of beagle data";
87 
88  const auto ch = in.peek();
89  if (ch >= 0 && ch != '\n')
90  throw error()
91  << "expected end of line but encountered "
92  << _format(ch) << " on line "
93  << _cols + 1 << " of beagle data";
94  }
95 
96  if (_cols == 0)
97  _rows = 0;
98  }
99 
100  ///
101  /// \return A string representation of this instance.
102  ///
103  std::string str() const
104  {
105  std::ostringstream out;
106  write(out);
107  return out.str();
108  }
109 
110  ///
111  /// Writes the BEAGLE data to the specified output stream as a
112  /// likelihood genotype matrix.
113  ///
114  void write(
115  std::ostream & out) ///< The output stream.
116  const
117  {
118  if (_cols == 0)
119  {
120  out << "0 0" << std::endl;
121  return;
122  }
123 
124  const auto r3 = _rows * 3;
125  const auto cr3 = _cols * _rows * 3;
126 
127  auto grp_ptr = _buf.data();
128  const auto grp_end = grp_ptr + 3;
129  for (;;)
130  {
131  out << _rows << ' ' << _cols << std::endl;
132 
133  auto row_ptr = grp_ptr;
134  const auto row_end = row_ptr + r3;
135  while (row_ptr != row_end)
136  {
137  auto col_ptr = row_ptr;
138  const auto col_end = col_ptr + cr3;
139  for (;;)
140  {
141  out << *col_ptr;
142  col_ptr += r3;
143  if (col_ptr == col_end)
144  break;
145  out << '\t';
146  }
147 
148  out << '\n';
149  row_ptr += 3;
150  }
151 
152  if (++grp_ptr == grp_end)
153  break;
154  out << std::endl;
155  }
156  }
157 
158  ///
159  /// Writes the BEAGLE data to the specified output file.
160  ///
161  inline void write(
162  char const * const path) ///< The output path.
163  {
164  assert(path != nullptr);
165  std::ofstream out (path);
166  if (!out.good())
167  throw error() << "error opening '" << path << "' for writing";
168  write(out);
169  }
170 
171  ///
172  /// Writes the BEAGLE data to the specified output file.
173  ///
174  inline void write(
175  const std::string & path) ///< The output path.
176  {
177  write(path.c_str());
178  }
179 
180  private:
181  // --------------------------------------------------------------------
182  static std::string _format(const int ch)
183  {
184  std::ostringstream str;
185 
186  if (ch < 0)
187  str << "end of data";
188  else if (ch == '\'') str << "'\\''";
189  else if (ch == '\n') str << "'\\n'";
190  else if (ch == '\r') str << "'\\r'";
191  else if (ch == '\t') str << "'\\t'";
192  else if (0 != std::isprint(ch))
193  str << "'" << char(ch) << "'";
194  else
195  str << "0x" << std::setfill('0') << std::hex
196  << std::setw(2) << ch;
197 
198  return str.str();
199  }
200 
201  std::vector<value_type> _buf;
202  size_t _rows;
203  size_t _cols;
204  };
205 }
206 
207 #endif // JADE_BGL_READER_HPP__
jade::basic_bgl_reader::str
std::string str() const
Definition: jade.bgl_reader.hpp:103
jade::basic_bgl_reader::value_type
TValue value_type
The value type.
Definition: jade.bgl_reader.hpp:23
jade::basic_bgl_reader::write
void write(std::ostream &out) const
Writes the BEAGLE data to the specified output stream as a likelihood genotype matrix.
Definition: jade.bgl_reader.hpp:114
jade::basic_bgl_reader::write
void write(char const *const path)
Writes the BEAGLE data to the specified output file.
Definition: jade.bgl_reader.hpp:161
jade::basic_error
A template for a class representing an exception thrown from this namespace.
Definition: jade.error.hpp:20
jade::basic_bgl_reader
A template for a class that reads BEAGLE data and is capable of writing it to a stream as a likelihoo...
Definition: jade.bgl_reader.hpp:20
jade::basic_bgl_reader::basic_bgl_reader
basic_bgl_reader(std::istream &in)
Initializes a new instance of the class.
Definition: jade.bgl_reader.hpp:28
jade::basic_bgl_reader::write
void write(const std::string &path)
Writes the BEAGLE data to the specified output file.
Definition: jade.bgl_reader.hpp:174