ʻOhana
Population structure, admixture history, and selection using learning methods.
jade.scanner.hpp
1 /* -------------------------------------------------------------------------
2  Ohana
3  Copyright (c) 2015-2020 Jade Cheng (\___/)
4  Jade Cheng <info@jade-cheng.com> (='.'=)
5  ------------------------------------------------------------------------- */
6 
7 #ifndef JADE_SCANNER_HPP__
8 #define JADE_SCANNER_HPP__
9 
10 #include "jade.error.hpp"
11 
12 namespace jade
13 {
14  ///
15  /// A template class that parses text and throws meaningful error messages.
16  ///
17  template <typename TChar>
19  {
20  public:
21  /// The character type.
22  typedef TChar char_type;
23 
24  /// The character traits type.
25  typedef std::char_traits<char_type> char_traits_type;
26 
27  /// The input stream type.
28  typedef std::basic_istream<char_type> istream_type;
29 
30  /// The output stream type.
31  typedef std::basic_ostream<char_type> ostream_type;
32 
33  /// The string type.
34  typedef std::basic_string<char_type> string_type;
35 
36  /// The input stream type.
37  typedef std::basic_istringstream<char_type> istringstream_type;
38 
39  /// The output stream type.
40  typedef std::basic_ostringstream<char_type> ostringstream_type;
41 
42  ///
43  /// Initializes a new instance of the class to scan the specified
44  /// stream.
45  ///
46  explicit basic_scanner(
47  istream_type & in) ///< The input stream.
48  : _ptr ()
49  , _in (in)
50  {
51  }
52 
53  ///
54  /// Initializes a new instance of the class to scan the specified
55  /// string.
56  ///
57  explicit basic_scanner(
58  const string_type & in) ///< The input string.
59  : _ptr (new istringstream_type(in))
60  , _in (*_ptr)
61  {
62  }
63 
64  ///
65  /// Skips whitespace and validates the next symbol in the stream matches
66  /// the specified character. If the validation fails, this method throws
67  /// an exception with a meaningful error message.
68  ///
69  /// \throw jade::error Thrown if the stream provides an unexpected
70  /// symbol.
71  ///
72  void expect(
73  const char_type ch) ///< The expected character.
74  {
75  //
76  // Skip whitespace before validating the character.
77  //
79 
80  //
81  // Throw an excepiton if encountering the end of the stream.
82  //
83  const auto actual = _in.peek();
84  if (actual < 0)
85  throw error()
86  << "expected symbol '" << char_type(ch) << "' "
87  << "but encountered end of stream";
88 
89  //
90  // Validate the symbol matches the expected value.
91  //
92  if (actual == ch)
93  {
94  _in.get();
95  return;
96  }
97 
98  if (::isprint(ch))
99  throw error()
100  << "expected symbol '" << char_type(ch) << "' "
101  << "but encountered symbol '"
102  << char_type(actual) << "'";
103 
104  throw error()
105  << "expected symbol '" << char_type(ch) << "' "
106  << "but encountered ASCII code " << actual;
107  }
108 
109  ///
110  /// \return True if all data has been read from the scanner.
111  ///
112  inline bool is_end_of_data() const
113  {
114  return _in.peek() < 0;
115  }
116 
117  ///
118  /// Reads a series of digits from the stream and copies them to the
119  /// specified output stream. This method does not skip whitespace before
120  /// or after reading the digits.
121  ///
123  ostream_type & out) ///< The output stream.
124  {
125  while (::isdigit(_in.peek()))
126  out << char_type(_in.get());
127  }
128 
129  ///
130  /// Skips whitespace and then parses and returns a floating-point
131  /// value from the specified stream. If there is an error parsing the
132  /// value, the method throws an exception with a meaningful error
133  /// message.
134  ///
135  /// \return A floating-point value.
136  ///
137  /// \throw jade::error Thrown if there is an error parsing the length.
138  ///
139  inline double read_double()
140  {
141  return read_real<double>();
142  }
143 
144  ///
145  /// Skips whitespace and then parses and returns a floating-point
146  /// value from the specified stream. If there is an error parsing the
147  /// value, the method throws an exception with a meaningful error
148  /// message.
149  ///
150  /// \return A floating-point value.
151  ///
152  /// \throw jade::error Thrown if there is an error parsing the length.
153  ///
154  inline float read_float()
155  {
156  return read_real<float>();
157  }
158 
159  ///
160  /// Skips whitespace and then parses and returns a floating-point
161  /// value from the specified stream. If there is an error parsing the
162  /// value, the method throws an exception with a meaningful error
163  /// message.
164  ///
165  /// \return A floating-point value.
166  ///
167  /// \throw jade::error Thrown if there is an error parsing the length.
168  ///
169  template <typename TValue>
170  TValue read_real()
171  {
172  static const auto hyphen = char_type('-');
173  static const auto period = char_type('.');
174 
175  //
176  // Initially skip whitespace.
177  //
178  skip_whitespace();
179 
180  ostringstream_type out;
181 
182  //
183  // Check for negative values.
184  //
185  if (try_char(hyphen))
186  out << hyphen;
187 
188  //
189  // Read values before the decimal place.
190  //
191  read_digits(out);
192 
193  //
194  // Check for a decimal, possibly reading additional digits.
195  //
196  if (try_char(period))
197  {
198  out << period;
199  read_digits(out);
200  }
201 
202  //
203  // If no symbols exist in the output buffer, this is invalid.
204  //
205  const auto length_str = out.str();
206  if (length_str.empty())
207  throw error(
208  "expected a floating-point value but "
209  "did not encounter any digits");
210 
211  //
212  // Parse and return the digits as a floating-point value, throwing
213  // an exception in the case of an error.
214  //
215  istringstream_type length_in (length_str);
216  TValue length;
217  if (length_in >> length)
218  return length;
219  throw error()
220  << "expected a length but encountered '"
221  << length_str << "'";
222  }
223 
224  ///
225  /// Reads and returns a series of characters terminated by the end of
226  /// the input stream or a specified delimeter. If no delimeters are
227  /// unspecified, then the method uses whitespace as the delimeters.
228  ///
229  /// \param delimeters The delimeters, or nullptr.
230  /// \return The token.
231  ///
232  string_type read_token(char_type const * const delimeters = nullptr)
233  {
234  static const char_type fallback[] =
235  {
236  char_type(' '),
237  char_type('\n'),
238  char_type('\t'),
239  char_type('\0')
240  };
241 
242  //
243  // Use whitespace as the delimeters unless specified otherwise.
244  //
245  const auto delims = delimeters == nullptr ? fallback : delimeters;
246  const auto length = char_traits_type::length(delims);
247 
248  ostringstream_type out;
249 
250  for (;;)
251  {
252  auto ch = _in.peek();
253 
254  if (ch < 0 || nullptr != char_traits_type::find(
255  delims, length, char_type(ch)))
256  return out.str();
257 
258  out << char_type(_in.get());
259  }
260  }
261 
262  ///
263  /// Skips whitespace from the input stream. If the input stream
264  /// indicates the end of the stream, the method returns without an
265  /// error.
266  ///
268  {
269  while (::isspace(_in.peek()))
270  _in.get();
271  }
272 
273  ///
274  /// Skips whitespace and then checks if the next character from the
275  /// stream matches the specified character. If it does, the method
276  /// advances the stream past the character and returns true; otherwise,
277  /// the method returns false.
278  ///
279  /// \return True if encountering the character; otherwise, false.
280  ///
281  bool try_char(
282  const char_type ch) ///< The character to test.
283  {
284  //
285  // Skip whitespace before testing the next symbol.
286  //
287  skip_whitespace();
288 
289  //
290  // Return false if the symbol does not match or if there are no more
291  // symbols available in the input stream.
292  //
293  if (ch != _in.peek())
294  return false;
295 
296  //
297  // Advance past the character and return true to indicate a match.
298  //
299  _in.get();
300  return true;
301  }
302 
303  private:
304  basic_scanner(const basic_scanner &) = delete;
305  basic_scanner & operator = (const basic_scanner &) = delete;
306 
307  std::unique_ptr<istream_type> _ptr;
308  istream_type & _in;
309  };
310 
311  /// A class that parses text and throws meaningful error messages.
312  typedef basic_scanner<char> scanner;
313 }
314 
315 #endif // JADE_SCANNER_HPP__
jade::basic_scanner::istringstream_type
std::basic_istringstream< char_type > istringstream_type
The input stream type.
Definition: jade.scanner.hpp:37
jade::basic_scanner::ostream_type
std::basic_ostream< char_type > ostream_type
The output stream type.
Definition: jade.scanner.hpp:31
jade::basic_scanner::read_token
string_type read_token(char_type const *const delimeters=nullptr)
Reads and returns a series of characters terminated by the end of the input stream or a specified del...
Definition: jade.scanner.hpp:232
jade::basic_scanner::expect
void expect(const char_type ch)
Skips whitespace and validates the next symbol in the stream matches the specified character....
Definition: jade.scanner.hpp:72
jade::basic_scanner::read_double
double read_double()
Skips whitespace and then parses and returns a floating-point value from the specified stream....
Definition: jade.scanner.hpp:139
jade::basic_scanner::is_end_of_data
bool is_end_of_data() const
Definition: jade.scanner.hpp:112
jade::basic_scanner::read_float
float read_float()
Skips whitespace and then parses and returns a floating-point value from the specified stream....
Definition: jade.scanner.hpp:154
jade::basic_scanner::char_type
TChar char_type
The character type.
Definition: jade.scanner.hpp:22
jade::basic_scanner::basic_scanner
basic_scanner(istream_type &in)
Initializes a new instance of the class to scan the specified stream.
Definition: jade.scanner.hpp:46
jade::basic_scanner::read_digits
void read_digits(ostream_type &out)
Reads a series of digits from the stream and copies them to the specified output stream....
Definition: jade.scanner.hpp:122
jade::basic_scanner::char_traits_type
std::char_traits< char_type > char_traits_type
The character traits type.
Definition: jade.scanner.hpp:25
jade::basic_scanner
A template class that parses text and throws meaningful error messages.
Definition: jade.scanner.hpp:19
jade::basic_scanner::ostringstream_type
std::basic_ostringstream< char_type > ostringstream_type
The output stream type.
Definition: jade.scanner.hpp:40
jade::basic_scanner::read_real
TValue read_real()
Skips whitespace and then parses and returns a floating-point value from the specified stream....
Definition: jade.scanner.hpp:170
jade::basic_scanner::skip_whitespace
void skip_whitespace()
Skips whitespace from the input stream. If the input stream indicates the end of the stream,...
Definition: jade.scanner.hpp:267
jade::basic_scanner::try_char
bool try_char(const char_type ch)
Skips whitespace and then checks if the next character from the stream matches the specified characte...
Definition: jade.scanner.hpp:281
jade::basic_error
A template for a class representing an exception thrown from this namespace.
Definition: jade.error.hpp:20
jade::basic_scanner::string_type
std::basic_string< char_type > string_type
The string type.
Definition: jade.scanner.hpp:34
jade::basic_scanner::basic_scanner
basic_scanner(const string_type &in)
Initializes a new instance of the class to scan the specified string.
Definition: jade.scanner.hpp:57
jade::basic_scanner::istream_type
std::basic_istream< char_type > istream_type
The input stream type.
Definition: jade.scanner.hpp:28