ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.ped_reader.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_PED_READER_HPP__
8 #define JADE_PED_READER_HPP__
9 
10 #include "jade.error.hpp"
11 
12 namespace jade
13 {
14  ///
15  /// A template for a class that reads PED data.
16  ///
17  template <typename TValue>
19  {
20  public:
21  /// The value type.
22  typedef TValue value_type;
23 
24  ///
25  /// Initializes a new instance of the class.
26  ///
27  explicit basic_ped_reader(
28  std::istream & in) ///< The input stream.
29  : _buf ()
30  , _rows (0)
31  , _cols (0)
32  {
33  size_t line = 1;
34  size_t col = 0;
35  for (;;)
36  {
37  const auto ch = in.get();
38 
39  if (ch < 0)
40  {
41  if (col > 0)
42  throw error("unexpected end of data");
43  break;
44  }
45 
46  if (ch == '\n')
47  {
48  if (col > 0)
49  throw error() << "premature end of line " << line;
50  line++;
51  continue;
52  }
53 
54  if (ch != '\t')
55  continue;
56 
57  if (++col < 6)
58  continue;
59 
60  col = 1;
61  for (;;)
62  {
63  const auto ch1 = _read(in, line);
64 
65  _require(in, line, ' ');
66 
67  const auto ch2 = _read(in, line);
68 
69  const auto symbol =
70  ch1 == '0' || ch2 == '0' ? '3' :
71  ch1 != ch2 ? '1' :
72  ch1 == '2' ? '0' :
73  '2';
74 
75  _buf.push_back(symbol);
76 
77  if (in.peek() < 0 || in.peek() == '\n')
78  break;
79 
80  _require(in, line, '\t');
81 
82  if (_rows > 0 && col == _cols)
83  throw error()
84  << "expected " << _cols << " pairs but encountered "
85  << "at least " << col + 1 << " on line " << line;
86 
87  col++;
88  }
89 
90  if (_rows == 0)
91  _cols = col;
92 
93  else if (col != _cols)
94  throw error()
95  << "expected " << _cols << " genotype pairs but "
96  << "encountered " << col << " on line " << line;
97 
98  col = 0;
99  _rows++;
100  }
101 
102  if (_cols == 0)
103  _rows = 0;
104  }
105 
106  ///
107  /// \return A string representation of this instance.
108  ///
109  std::string str() const
110  {
111  std::ostringstream out;
112  write(out);
113  return out.str();
114  }
115 
116  ///
117  /// Writes the PED data to the specified output stream.
118  ///
119  void write(
120  std::ostream & out) ///< The output stream.
121  const
122  {
123  out << _rows << ' ' << _cols << std::endl;
124 
125  if (_cols == 0)
126  return;
127 
128  size_t i = 0;
129  for (const auto ch : _buf)
130  out << ch << (++i % _cols == 0 ? '\n' : ' ');
131  }
132 
133  ///
134  /// Writes the PED data to the specified output file.
135  ///
136  void write(
137  char const * const path) ///< The output path.
138  {
139  assert(path != nullptr);
140  std::ofstream out (path);
141  if (!out.good())
142  throw error() << "error opening '" << path << "' for writing";
143  write(out);
144  }
145 
146  ///
147  /// Writes the PED data to the specified output file.
148  ///
149  inline void write(
150  const std::string & path) ///< The output path.
151  {
152  write(path.c_str());
153  }
154 
155  private:
156  // --------------------------------------------------------------------
157  static std::string _format(const int ch)
158  {
159  std::ostringstream str;
160 
161  if (ch < 0)
162  str << "end of data";
163  else if (ch == '\'') str << "'\\''";
164  else if (ch == '\n') str << "'\\n'";
165  else if (ch == '\r') str << "'\\r'";
166  else if (ch == '\t') str << "'\\t'";
167  else if (0 != std::isprint(ch))
168  str << "'" << char(ch) << "'";
169  else
170  str << "0x" << std::setfill('0') << std::hex
171  << std::setw(2) << ch;
172 
173  return str.str();
174  }
175 
176  // --------------------------------------------------------------------
177  static int _read(std::istream & in, const size_t line)
178  {
179  const auto ch = in.get();
180 
181  if (ch != '0' && ch != '1' && ch != '2')
182  throw error()
183  << "expected genotype '0', '1', or '2' but encountered "
184  << _format(ch) << " on line " << line << "; correct this "
185  << "by regenerating your data using the -recode12 option "
186  << "from plink";
187 
188  return ch;
189  }
190 
191  // --------------------------------------------------------------------
192  static void _require(
193  std::istream & in,
194  const size_t line,
195  const int expected)
196  {
197  const auto actual = in.get();
198  if (actual == expected)
199  return;
200 
201  throw error()
202  << "expected " << _format(expected) << " but encountered "
203  << _format(actual) << " on line " << line;
204  }
205 
206  std::vector<char> _buf;
207  size_t _rows;
208  size_t _cols;
209  };
210 }
211 
212 #endif // JADE_PED_READER_HPP__
jade::basic_ped_reader::write
void write(char const *const path)
Writes the PED data to the specified output file.
Definition: jade.ped_reader.hpp:136
jade::basic_ped_reader::write
void write(const std::string &path)
Writes the PED data to the specified output file.
Definition: jade.ped_reader.hpp:149
jade::basic_ped_reader::value_type
TValue value_type
The value type.
Definition: jade.ped_reader.hpp:22
jade::basic_ped_reader::write
void write(std::ostream &out) const
Writes the PED data to the specified output stream.
Definition: jade.ped_reader.hpp:119
jade::basic_ped_reader
A template for a class that reads PED data.
Definition: jade.ped_reader.hpp:19
jade::basic_ped_reader::str
std::string str() const
Definition: jade.ped_reader.hpp:109
jade::basic_ped_reader::basic_ped_reader
basic_ped_reader(std::istream &in)
Initializes a new instance of the class.
Definition: jade.ped_reader.hpp:27
jade::basic_error
A template for a class representing an exception thrown from this namespace.
Definition: jade.error.hpp:20