View on GitHub
jbson
C++11/1y BSON library
json_reader.hpp
1 // Copyright Christian Manning 2013.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE_1_0.txt or copy at
4 // http://www.boost.org/LICENSE_1_0.txt)
5 
6 #ifndef JBSON_JSON_READER_HPP
7 #define JBSON_JSON_READER_HPP
8 
9 #include <type_traits>
10 #include <iterator>
11 #include <memory>
12 #include <codecvt>
13 #include <vector>
14 
15 #include "detail/config.hpp"
16 
17 JBSON_PUSH_DISABLE_DOCUMENTATION_WARNING
18 #include <boost/range/as_literal.hpp>
19 #include <boost/range/algorithm.hpp>
20 #include <boost/range/algorithm_ext.hpp>
21 #include <boost/lexical_cast.hpp>
22 #include <boost/exception/exception.hpp>
23 #include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
24 #include <boost/io/ios_state.hpp>
25 JBSON_CLANG_POP_WARNINGS
26 
27 #include "document.hpp"
28 #include "detail/traits.hpp"
29 
30 JBSON_PUSH_DISABLE_DEPRECATED_WARNING
31 
32 namespace jbson {
33 
34 using boost::spirit::line_pos_iterator;
35 
36 struct json_parse_error;
37 enum class json_error_num;
38 
39 struct json_reader {
40  using container_type = std::vector<char>;
41  using range_type = boost::iterator_range<container_type::const_iterator>;
42 
43  json_reader() noexcept(std::is_nothrow_constructible<container_type>::value) = default;
44 
45  template <typename ForwardIterator> void parse(ForwardIterator, ForwardIterator);
46  template <typename ForwardIterator>
47  void parse(line_pos_iterator<ForwardIterator>, line_pos_iterator<ForwardIterator>);
48 
49  template <typename ForwardRange_> void parse(ForwardRange_&& range_) {
50  auto range = boost::as_literal(std::forward<ForwardRange_>(range_));
51  using ForwardRange = decltype(range);
52  BOOST_CONCEPT_ASSERT((boost::ForwardRangeConcept<ForwardRange>));
53  using line_it = line_pos_iterator<typename boost::range_const_iterator<std::decay_t<ForwardRange>>::type>;
54  parse(line_it{std::cbegin(range)}, line_it{std::cend(range)});
55  }
56 
57  template <typename C>
58  operator basic_document_set<C>() const& {
59  if(m_data.size() < 5)
60  return basic_document_set<C>{};
62  }
63 
64  template <typename C1, typename C2>
65  operator basic_document<C1, C2>() const& {
66  if(m_data.size() < 5)
67  return basic_document<C1, C2>{};
68  return basic_document<C1, C2>{m_data};
69  }
70 
71  template <typename C1, typename C2>
72  operator basic_array<C1, C2>() const& {
73  if(m_data.size() < 5)
74  return basic_array<C1, C2>{};
75  return basic_array<C1, C2>{m_data};
76  }
77 
78  template <typename Vec>
80  if(m_data.size() < 5)
82  return basic_document<container_type, Vec>{std::move(m_data)};
83  }
84 
85  template <typename Vec>
86  operator basic_array<container_type, Vec>() && {
87  if(m_data.size() < 5)
89  return basic_array<container_type, Vec>{std::move(m_data)};
90  }
91 
92  private:
93  template <typename ForwardIterator, typename OutputIterator>
94  OutputIterator parse_document(line_pos_iterator<ForwardIterator>&, const line_pos_iterator<ForwardIterator>&,
95  OutputIterator);
96  template <typename ForwardIterator, typename OutputIterator>
97  OutputIterator parse_array(line_pos_iterator<ForwardIterator>&, const line_pos_iterator<ForwardIterator>&,
98  OutputIterator);
99 
100  template <typename ForwardIterator, typename OutputIterator>
101  std::tuple<OutputIterator, element_type> parse_value(line_pos_iterator<ForwardIterator>&,
102  const line_pos_iterator<ForwardIterator>&, OutputIterator);
103 
104  template <typename OutputIterator>
105  std::tuple<boost::optional<OutputIterator>, element_type> parse_extended_value(const basic_document<range_type>&,
106  OutputIterator);
107 
108  template <typename ForwardIterator, typename OutputIterator>
109  OutputIterator parse_string(line_pos_iterator<ForwardIterator>&, const line_pos_iterator<ForwardIterator>&,
110  OutputIterator);
111 
112  template <typename ForwardIterator, typename OutputIterator>
113  OutputIterator parse_name(line_pos_iterator<ForwardIterator>&, const line_pos_iterator<ForwardIterator>&,
114  OutputIterator, bool allow_null = false);
115 
116  template <typename ForwardIterator, typename OutputIterator>
117  OutputIterator parse_escape(line_pos_iterator<ForwardIterator>&, const line_pos_iterator<ForwardIterator>&,
118  OutputIterator);
119 
120  template <typename ForwardIterator, typename OutputIterator>
121  std::tuple<OutputIterator, element_type> parse_number(line_pos_iterator<ForwardIterator>&,
122  const line_pos_iterator<ForwardIterator>&, OutputIterator);
123 
124  template <typename ForwardIterator>
125  void skip_space(line_pos_iterator<ForwardIterator>&, const line_pos_iterator<ForwardIterator>&);
126 
127  template <typename ForwardIterator>
128  json_parse_error make_parse_exception(json_error_num, const line_pos_iterator<ForwardIterator>& current,
129  const line_pos_iterator<ForwardIterator>& last,
130  const std::string& expected = {}) const;
131  json_parse_error make_parse_exception(json_error_num, const std::string& expected = {}) const;
132 
133  private:
134  std::shared_ptr<void> m_start;
135  container_type m_data;
136 };
137 
138 enum class json_error_num { invalid_root_element, unexpected_end_of_range, unexpected_token, };
139 
140 template <typename CharT, typename TraitsT>
141 std::basic_ostream<CharT, TraitsT>& operator<<(std::basic_ostream<CharT, TraitsT>& os, json_error_num err) {
142  switch(err) {
143  case json_error_num::invalid_root_element:
144  os << "invalid root element; must be document (object) or array";
145  break;
146  case json_error_num::unexpected_end_of_range:
147  os << "unexpected end of range";
148  break;
149  case json_error_num::unexpected_token:
150  os << "unexpected token";
151  break;
152  default:
153  os << "unknown error";
154  }
155  return os;
156 }
157 
158 using parse_error = boost::error_info<struct err_val_, json_error_num>;
159 using expected_token = boost::error_info<struct token_, std::string>;
160 using current_line_string = boost::error_info<struct line_, std::string>;
161 using line_position = boost::error_info<struct line_pos_, size_t>;
162 using line_number = boost::error_info<struct line_num_, size_t>;
163 
165  const char* what() const noexcept override { return "json_parse_error"; }
166 };
167 
168 std::string to_string(const json_parse_error&);
169 
170 template <typename ForwardIterator>
172 json_reader::make_parse_exception(json_error_num err, const line_pos_iterator<ForwardIterator>& current,
173  const line_pos_iterator<ForwardIterator>& last, const std::string& expected) const {
174  using char_type = typename std::iterator_traits<ForwardIterator>::value_type;
175  BOOST_CONCEPT_ASSERT((boost::ForwardIteratorConcept<ForwardIterator>));
176  auto e = make_parse_exception(err, expected);
177  if(m_start) {
178  auto start = *static_cast<line_pos_iterator<ForwardIterator>*>(m_start.get());
179  auto begin = boost::spirit::get_line_start(start, current);
180  if(begin != current && begin != last && (*begin == '\n' || *begin == '\r'))
181  std::advance(begin, 1);
182  auto range = boost::range::find_first_of<boost::return_begin_found>(boost::make_iterator_range(begin, last),
183  boost::as_literal("\n\r"));
184  using cvt_char_type =
185  std::conditional_t<std::is_same<char_type, container_type::value_type>::value, char32_t, char_type>;
186  thread_local std::wstring_convert<std::codecvt_utf8<cvt_char_type>, cvt_char_type> cvt;
187 
188  std::basic_string<cvt_char_type> str{range.begin(), range.end()};
189  if(std::is_same<char_type, container_type::value_type>::value)
190  e << current_line_string(boost::lexical_cast<std::string>(range));
191  else {
192  try {
193  e << current_line_string(cvt.to_bytes(str));
194  }
195  catch(...) {
196  auto c = str[boost::spirit::get_line(current)];
197  e << current_line_string(std::to_string((int)c));
198  }
199  }
200  e << line_number(boost::spirit::get_line(current));
201  e << line_position(boost::spirit::get_column(begin, current));
202  } else
203  std::abort();
204  return e;
205 }
206 
207 inline json_parse_error json_reader::make_parse_exception(json_error_num err, const std::string& expected) const {
208  auto e = json_parse_error{};
209  e << parse_error(err);
210  if(!expected.empty())
211  e << expected_token(expected);
212  return e;
213 }
214 
215 inline std::string error_message(json_parse_error& err) {
216  std::stringstream is;
217 
218  const auto line_num = boost::get_error_info<line_number>(err);
219  if(line_num)
220  is << "line " << *line_num << ": ";
221 
222  is << err.what() << ": ";
223  const auto num = boost::get_error_info<parse_error>(err);
224  if(!num) {
225  is << "unknown error";
226  return is.str();
227  }
228  is << *num << "\n";
229 
230  const auto line = boost::get_error_info<current_line_string>(err);
231  if(!line)
232  return is.str();
233  const auto pos = boost::get_error_info<line_position>(err);
234  if(!pos)
235  return is.str();
236 
237  is << *line << "\n";
238  {
239  boost::io::ios_width_saver ios(is);
240  is << std::setw(*pos) << "^";
241  }
242 
243  const auto expected = boost::get_error_info<expected_token>(err);
244  if(!expected || expected->empty())
245  return is.str();
246 
247  is << "\nExpected: " << *expected;
248 
249  return is.str();
250 }
251 
252 template <typename ForwardIterator> void json_reader::parse(ForwardIterator first, ForwardIterator last) {
253  parse(line_pos_iterator<ForwardIterator>{first}, line_pos_iterator<ForwardIterator>{last});
254 }
255 
256 template <typename ForwardIterator>
257 void json_reader::parse(line_pos_iterator<ForwardIterator> first, line_pos_iterator<ForwardIterator> last) {
258  BOOST_CONCEPT_ASSERT((boost::ForwardIteratorConcept<ForwardIterator>));
259 
260  m_data = container_type{};
261  if(detail::is_iterator_pointer<std::decay_t<ForwardIterator>>::value)
262  m_data.reserve(+std::distance(&*first, &*last));
263  else
264  m_data.reserve(+std::distance(first, last));
265 
266  m_start = std::make_shared<line_pos_iterator<ForwardIterator>>(first);
267  skip_space(first, last);
268  if(first == last || *first == '\0')
269  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
270  switch(*first) {
271  case '{':
272  parse_document(first, last, m_data.end());
273  break;
274  case '[':
275  parse_array(first, last, m_data.end());
276  break;
277  default:
278  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::invalid_root_element, first, last));
279  };
280  m_data.shrink_to_fit();
281 
282  skip_space(first, last);
283  if(first != last && *first != '\0')
284  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "end of input"));
285 }
286 
287 template <typename ForwardIterator, typename OutputIterator>
288 OutputIterator json_reader::parse_document(line_pos_iterator<ForwardIterator>& first,
289  const line_pos_iterator<ForwardIterator>& last, OutputIterator out) {
290  if(first == last)
291  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
292  assert(last != first);
293 
294  const auto start_idx = std::distance(m_data.begin(), out);
295 
296  assert(out >= m_data.begin() && out <= m_data.end());
297  out = std::next(m_data.insert(out, 4, '\0'), 4);
298 
299  if(*first != '{')
300  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "{"));
301  skip_space(++first, last);
302  if(first == last)
303  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
304  if(*first == '}') {
305  ++first;
306 
307  assert(out >= m_data.begin() && out <= m_data.end());
308  out = std::next(m_data.insert(out, '\0'));
309 
310  int32_t size = std::distance(std::next(m_data.begin(), start_idx), out);
311  if(size != 5)
312  BOOST_THROW_EXCEPTION(invalid_document_size{} << detail::expected_size(5) << detail::actual_size(size));
313  boost::range::copy(detail::native_to_little_endian(size), std::next(m_data.begin(), start_idx));
314  return out;
315  }
316 
317  while(true) {
318  if(first == last)
319  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
320 
321  assert(out >= m_data.begin() && out <= m_data.end());
322  out = m_data.insert(out, static_cast<char>(element_type::null_element));
323 
324  const auto type_idx = std::distance(m_data.begin(), out);
325  ++out;
326  skip_space(first, last);
327  out = parse_name(first, last, out);
328  skip_space(first, last);
329 
330  if(*first != ':')
331  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, ":"));
332  ++first;
333  skip_space(first, last);
334  element_type type;
335  std::tie(out, type) = parse_value(first, last, out);
336  m_data[type_idx] = static_cast<char>(type);
337 
338  skip_space(first, last);
339  if(*first == ',') {
340  ++first;
341  continue;
342  }
343  if(*first != '}')
344  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "}"));
345  else {
346  ++first;
347 
348  assert(out >= m_data.begin() && out <= m_data.end());
349  out = std::next(m_data.insert(out, '\0'));
350 
351  int32_t size = std::distance(std::next(m_data.begin(), start_idx), out);
352  if(size < 5)
353  BOOST_THROW_EXCEPTION(invalid_document_size{} << detail::expected_size(5) << detail::actual_size(size));
354  boost::range::copy(detail::native_to_little_endian(size), std::next(m_data.begin(), start_idx));
355  return out;
356  }
357  }
358 }
359 
360 template <typename ForwardIterator, typename OutputIterator>
361 OutputIterator json_reader::parse_array(line_pos_iterator<ForwardIterator>& first,
362  const line_pos_iterator<ForwardIterator>& last, OutputIterator out) {
363  if(first == last)
364  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
365  assert(last != first);
366 
367  const auto start_idx = std::distance(m_data.begin(), out);
368 
369  assert(out >= m_data.begin() && out <= m_data.end());
370  out = std::next(m_data.insert(out, 4, '\0'), 4);
371 
372  if(*first != '[')
373  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "["));
374  skip_space(++first, last);
375  if(first == last)
376  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
377  if(*first == ']') {
378  ++first;
379 
380  assert(out >= m_data.begin() && out <= m_data.end());
381  out = std::next(m_data.insert(out, '\0'));
382 
383  const int32_t size = std::distance(std::next(m_data.begin(), start_idx), out);
384  if(size != 5)
385  BOOST_THROW_EXCEPTION(invalid_document_size{} << detail::expected_size(5) << detail::actual_size(size));
386  boost::range::copy(detail::native_to_little_endian(size), std::next(m_data.begin(), start_idx));
387  return out;
388  }
389 
390  int32_t idx{0};
391  while(true) {
392  if(first == last)
393  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
394 
395  assert(out >= m_data.begin() && out <= m_data.end());
396  out = m_data.insert(out, static_cast<char>(element_type::null_element));
397  auto type_idx = std::distance(m_data.begin(), out);
398  auto sidx = std::to_string(idx++);
399  ++out;
400 
401  assert(out >= m_data.begin() && out <= m_data.end());
402  out = m_data.insert(out, sidx.size() + 1, '\0');
403  out = std::next(boost::range::copy(sidx, out));
404  assert(out >= m_data.begin() && out <= m_data.end());
405 
406  skip_space(first, last);
407 
408  element_type type;
409  std::tie(out, type) = parse_value(first, last, out);
410  m_data[type_idx] = static_cast<char>(type);
411 
412  skip_space(first, last);
413 
414  if(*first == ',') {
415  ++first;
416  continue;
417  }
418  if(*first != ']')
419  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, ", or ]"));
420  else {
421  ++first;
422 
423  assert(out >= m_data.begin() && out <= m_data.end());
424  out = std::next(m_data.insert(out, '\0'));
425 
426  int32_t size = std::distance(std::next(m_data.begin(), start_idx), out);
427  if(size < 5)
428  BOOST_THROW_EXCEPTION(invalid_document_size{} << detail::expected_size(5) << detail::actual_size(size));
429  boost::range::copy(detail::native_to_little_endian(size), std::next(m_data.begin(), start_idx));
430  return out;
431  }
432  }
433 }
434 
435 template <typename ForwardIterator, typename OutputIterator>
436 std::tuple<OutputIterator, element_type> json_reader::parse_value(line_pos_iterator<ForwardIterator>& first,
437  const line_pos_iterator<ForwardIterator>& last,
438  OutputIterator out) {
439  if(first == last)
440  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
441  assert(last != first);
442  auto type = element_type::null_element;
443  switch(*first) {
444  case '"':
446  out = parse_string(first, last, out);
447  break;
448  case '[':
450  out = parse_array(first, last, out);
451  break;
452  case 'f':
454  if(boost::equal(boost::as_literal("false"), boost::make_iterator_range(first, std::next(first, 5)),
455  [](char a, auto&& b) { return b == (decltype(b))a; })) {
456  assert(out >= m_data.begin() && out <= m_data.end());
457  out = std::next(m_data.insert(out, false));
458  std::advance(first, 5);
459  break;
460  }
461  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "false"));
462  case 'n':
464  if(!boost::equal(boost::as_literal("null"), boost::make_iterator_range(first, std::next(first, 4)),
465  [](char a, auto&& b) { return b == (decltype(b))a; }))
466  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "null"));
467  std::advance(first, 4);
468  break;
469  case 't':
471  if(boost::equal(boost::as_literal("true"), boost::make_iterator_range(first, std::next(first, 4)),
472  [](char a, auto&& b) { return b == (decltype(b))a; })) {
473  assert(out >= m_data.begin() && out <= m_data.end());
474  out = std::next(m_data.insert(out, true));
475  std::advance(first, 4);
476  break;
477  }
478  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "true"));
479  case '{': {
480  auto idx = std::distance(m_data.begin(), out);
481  auto r = parse_document(first, last, out);
482 
483  basic_document<range_type> doc{std::next(m_data.begin(), idx), r};
484  assert(std::distance(std::next(m_data.begin(), idx), r) >= 5);
485  assert(doc.size() >= 5);
486 
487  boost::string_ref name;
488  const auto it = doc.begin();
489  if(it != doc.end())
490  name = it->name();
491  if(!name.empty() && name[0] == '$') {
492  boost::optional<OutputIterator> o_out;
493  std::tie(o_out, type) = parse_extended_value(doc, std::next(m_data.begin(), idx));
494  if(o_out) {
495  out = *o_out;
496  m_data.resize(std::distance(m_data.begin(), out));
497  break;
498  }
499  }
501  using std::swap;
502  swap(out, r);
503  } break;
504  default:
505  std::tie(out, type) = parse_number(first, last, out);
506  }
507  if(first == last)
508  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
509  return std::make_tuple(out, type);
510 }
511 
512 template <typename OutputIterator>
513 std::tuple<boost::optional<OutputIterator>, element_type>
514 json_reader::parse_extended_value(const basic_document<range_type>& doc, OutputIterator out) {
515  if(doc.size() < 5)
516  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "extended json value")
517  << detail::expected_size(5) << detail::actual_size(doc.size()));
518  auto type = element_type::null_element;
519  const auto name = doc.begin()->name();
520  if(name == "$binary" || name == "$type") {
521  if(doc.size() != 2)
522  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "binary element"));
524  // TODO: implement binary value
525  } else if(name == "$date") {
526  if(doc.size() != 1 ||
527  (doc.begin()->type() != element_type::int32_element && doc.begin()->type() != element_type::int64_element))
528  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "date element"));
529  using DateT = detail::ElementTypeMap<element_type::date_element, element::container_type>;
530  if(doc.begin()->type() == element_type::int32_element)
531  detail::serialise(m_data, out, static_cast<DateT>(get<element_type::int32_element>(*doc.begin())));
532  else if(doc.begin()->type() == element_type::int64_element)
533  detail::serialise(m_data, out, static_cast<DateT>(get<element_type::int64_element>(*doc.begin())));
534  else
535  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "date element"));
537  } else if(name == "$timestamp") {
538  if(doc.size() != 1)
539  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "timestamp element"));
541  // TODO: implement timestamp
542  } else if(name == "$regex" || name == "$options") {
543  if(doc.size() != 2)
544  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "regex element"));
545  auto it = doc.find("$regex");
546  if(it == doc.end() || it->type() != element_type::string_element)
547  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "regex element"));
548  auto re = get<element_type::string_element>(*it);
549  it = doc.find("$options");
550  if(it == doc.end() || it->type() != element_type::string_element)
551  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "regex element"));
552  auto options = get<element_type::string_element>(*it);
553 
555  detail::serialise(m_data, out, std::make_tuple(re, options));
556  } else if(name == "$oid") {
557  if(doc.size() != 40 || doc.begin()->type() != element_type::string_element)
558  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "oid element"));
559  auto str = get<element_type::string_element>(*doc.begin());
560  if(str.size() != 24)
561  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "oid element"));
562  std::array<char, 12> oid;
563  for(auto i = 0; i < 24; i += 2)
564  oid[i / 2] = static_cast<char>(std::stoi(boost::lexical_cast<std::string>(str.substr(i, 2)), nullptr, 16));
565 
567  detail::serialise(m_data, out, oid);
568  } else if(name == "$ref" || name == "$id") {
569  if(doc.size() != 2)
570  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "ref element"));
571  auto it = doc.find("$id");
572  if(it == doc.end() || it->type() != element_type::string_element)
573  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "ref element"));
574  auto str = get<element_type::string_element>(*it);
575  if(str.size() != 24)
576  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "oid element"));
577  std::array<char, 12> oid;
578  for(auto i = 0; i < 24; i += 2)
579  oid[i / 2] = static_cast<char>(std::stoi(boost::lexical_cast<std::string>(str.substr(i, 2)), nullptr, 16));
580 
581  it = doc.find("$ref");
582  if(it == doc.end() || it->type() != element_type::string_element)
583  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "ref element"));
584  auto coll = get<element_type::string_element>(*it);
585 
587  detail::serialise(m_data, out, std::make_tuple(coll, oid));
588  } else if(name == "$undefined") {
589  if(doc.size() != 1)
590  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "undefined element"));
592  } else if(name == "$minkey") {
593  if(doc.size() != 1)
594  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "minkey element"));
595  type = element_type::min_key;
596  } else if(name == "$maxkey") {
597  if(doc.size() != 1)
598  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, "maxkey element"));
599  type = element_type::max_key;
600  } else
601  return std::make_tuple(boost::none, (element_type)0);
602  return std::make_tuple(out, type);
603 }
604 
605 template <typename ForwardIterator, typename OutputIterator>
606 OutputIterator json_reader::parse_string(line_pos_iterator<ForwardIterator>& first,
607  const line_pos_iterator<ForwardIterator>& last, OutputIterator out) {
608  assert(last != first);
609 
610  assert(out >= m_data.begin() && out <= m_data.end());
611  out = m_data.insert(out, 4, '\0');
612 
613  const auto size = std::distance(m_data.begin(), std::next(out, 4));
614  out = parse_name(first, last, std::next(out, 4), true);
615  boost::range::copy(detail::native_to_little_endian<int32_t>(m_data.size() - size),
616  std::next(m_data.begin(), size - 4));
617  return out;
618 }
619 
620 namespace detail {
621 
622 template <typename CharT> constexpr bool iscntrl(CharT c) {
623  // ignore 0x7f
624  return (std::make_unsigned_t<CharT>)c < 0x1f;
625 }
626 
627 template <typename CharT> constexpr bool isdigit(CharT c) { return (std::make_unsigned_t<CharT>)(c - '0') < 10; }
628 
629 template <typename CharT> constexpr bool isxdigit(CharT c) {
630  return detail::isdigit(c) || (std::make_unsigned_t<CharT>)(c - 'A') < 6 ||
631  (std::make_unsigned_t<CharT>)(c - 'a') < 6;
632 }
633 
634 template <typename CharT> constexpr bool isspace(CharT c) {
635  return c == 0x20 || (std::make_unsigned_t<CharT>)(c - '\t') < 5;
636 }
637 
638 template <typename CharT> struct codecvt {
639  using type = std::codecvt_utf8<CharT>;
640 };
641 
642 template <> struct codecvt<char> : std::codecvt<char, char, std::mbstate_t> {
643  using type = codecvt<char>;
644 };
645 
646 template <> struct codecvt<char16_t> {
647  using type = std::codecvt_utf8_utf16<char16_t>;
648 };
649 
650 template <typename CharT> using codecvt_t = typename codecvt<CharT>::type;
651 
652 } // namespace detail
653 
654 template <typename ForwardIterator, typename OutputIterator>
655 OutputIterator json_reader::parse_name(line_pos_iterator<ForwardIterator>& first,
656  const line_pos_iterator<ForwardIterator>& last, OutputIterator out,
657  bool allow_null) {
658  using char_type = typename std::iterator_traits<ForwardIterator>::value_type;
659  assert(last != first);
660  if(first == last)
661  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
662  if(*first != '"')
663  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last, "\""));
664  std::advance(first, 1);
665 
666  detail::codecvt_t<char_type> cvt;
667  std::mbstate_t state{};
668  std::array<char_type, 2> buf;
669 
670  while(true) {
671  if(first == last)
672  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
673 
674  buf[0] = *first;
675 
676  if(buf[0] == '"') {
677  std::advance(first, 1);
678  break;
679  }
680  if(buf[0] == '\0' && !allow_null) {
681  std::advance(first, 1);
682  break;
683  }
684 
685  // Handle UTF-16 surrogate pair
686  if(std::is_same<char_type, char16_t>::value && buf[0] >= 0xD800 && buf[0] <= 0xDBFF)
687  buf[1] = *++first;
688  else
689  buf[1] = 0;
690 
691  if(buf[0] == '\\') {
692  out = parse_escape(first, last, out);
693  continue;
694  } else if(detail::iscntrl(buf[0]))
695  BOOST_THROW_EXCEPTION(
696  make_parse_exception(json_error_num::unexpected_token, first, last, "non-control char"));
697 
698  if(std::is_same<char_type, container_type::value_type>::value) {
699  assert(out >= m_data.begin() && out <= m_data.end());
700  out = std::next(m_data.insert(out, buf[0]));
701  } else {
702  std::array<char, std::max(sizeof(buf), 2 * sizeof(char16_t)) + 1> to;
703  to.fill(0);
704  const char_type* frm_next;
705  char* to_next;
706  auto res = cvt.out(state, buf.data(), buf.data() + (buf[1] ? 2 : 1), frm_next, to.data(),
707  to.data() + to.size(), to_next);
708  if(!std::mbsinit(&state) || res != std::codecvt_base::ok)
709  BOOST_THROW_EXCEPTION(
710  make_parse_exception(json_error_num::unexpected_token, first, last, "valid unicode code point(s)"));
711  auto len = std::strlen(to.data());
712  out = m_data.insert(out, len, '\0');
713  out = std::copy(to.data(), to.data() + len, out);
714  }
715 
716  std::advance(first, 1);
717  }
718  assert(out >= m_data.begin() && out <= m_data.end());
719  return std::next(m_data.insert(out, '\0'));
720 }
721 
722 template <typename ForwardIterator, typename OutputIterator>
723 OutputIterator json_reader::parse_escape(line_pos_iterator<ForwardIterator>& first,
724  const line_pos_iterator<ForwardIterator>& last, OutputIterator out) {
725  assert(last != first);
726  assert(*first == '\\');
727  std::advance(first, 1);
728 
729  if(first == last || *first == '\0')
730  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_end_of_range, first, last));
731  auto c = *first++;
732  if(c == '"')
733  out = std::next(m_data.insert(out, '"'));
734  else if(c == '/')
735  out = std::next(m_data.insert(out, '/'));
736  else if(c == '\\')
737  out = std::next(m_data.insert(out, '\\'));
738  else if(c == 'b')
739  out = std::next(m_data.insert(out, '\b'));
740  else if(c == 'f')
741  out = std::next(m_data.insert(out, '\f'));
742  else if(c == 'n')
743  out = std::next(m_data.insert(out, '\n'));
744  else if(c == 'r')
745  out = std::next(m_data.insert(out, '\r'));
746  else if(c == 't')
747  out = std::next(m_data.insert(out, '\t'));
748  else if(c == 'u') {
749  if(std::next(first, 4) !=
750  std::find_if_not(first, std::next(first, 4), [](auto&& c) { return detail::isxdigit(c); }))
751  BOOST_THROW_EXCEPTION(
752  make_parse_exception(json_error_num::unexpected_token, first, last, "4x hex (0-9;a-f/A-F)"));
753 
754  std::array<char, 5> buf;
755  buf.back() = 0;
756  std::copy(first, std::next(first, 4), buf.begin());
757  assert(buf.back() == 0);
758  char* pos;
759 
760  std::array<char16_t, 2> codepoints;
761  codepoints[0] = std::strtol(buf.data(), &pos, 16);
762  codepoints[1] = 0;
763  if(pos != buf.data() + 4)
764  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token,
765  std::next(first, pos - buf.data()), last,
766  "valid hex characters (0-9;a-f/A-F)"));
767 
768  if(codepoints[0] == 0x0000) {
769  auto null_str = R"(\u0000)";
770  std::advance(first, 4);
771  out = m_data.insert(out, 6, '\0');
772  out = std::copy(null_str, null_str + 6, out);
773  return out;
774  }
775 
776  if(codepoints[0] >= 0xD800 && codepoints[0] <= 0xDBFF) {
777  // Handle UTF-16 surrogate pair
778  std::advance(first, 4);
779  if(*first++ != '\\' || *first++ != 'u')
780  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last,
781  "trail surrogate after lead surrogate (utf-16)"));
782  if(std::next(first, 4) !=
783  std::find_if_not(first, std::next(first, 4), [](auto&& c) { return detail::isxdigit(c); }))
784  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first, last,
785  "4x valid hex characters (0-9;a-f/A-F)"));
786 
787  std::copy(first, std::next(first, 4), buf.begin());
788  assert(buf.back() == 0);
789 
790  codepoints[1] = std::strtol(buf.data(), &pos, 16);
791  if(pos != buf.data() + 4)
792  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token,
793  std::next(first, pos - buf.data()), last,
794  "valid hex characters (0-9;a-f/A-F)"));
795  }
796 
797  std::codecvt_utf8_utf16<char16_t> cvt16;
798  std::mbstate_t state{};
799  buf.fill(0);
800  const char16_t* frm_next;
801  char* to_next;
802  auto res = cvt16.out(state, codepoints.data(), codepoints.data() + codepoints.size(), frm_next, buf.data(),
803  buf.data() + buf.size(), to_next);
804  if(!std::mbsinit(&state) || res != std::codecvt_base::ok)
805  BOOST_THROW_EXCEPTION(
806  make_parse_exception(json_error_num::unexpected_token, first, last, "valid unicode code point(s)"));
807  std::advance(first, 4);
808  const auto len = std::strlen(buf.data());
809  out = m_data.insert(out, len, '\0');
810  out = std::copy(buf.data(), buf.data() + len, out);
811  } else
812  BOOST_THROW_EXCEPTION(
813  make_parse_exception(json_error_num::unexpected_token, first, last, "valid control char"));
814  return out;
815 }
816 
817 template <typename ForwardIterator, typename OutputIterator>
818 std::tuple<OutputIterator, element_type> json_reader::parse_number(line_pos_iterator<ForwardIterator>& first_,
819  const line_pos_iterator<ForwardIterator>& last_,
820  OutputIterator out) {
821  assert(last_ != first_);
822 
823  if(!detail::isdigit(*first_) && *first_ != '-')
824  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first_, last_, "number"));
825  const auto last = std::find_if_not(first_, last_, [](char c) {
826  return detail::isdigit(c) || c == '.' || c == '+' || c == '-' || c == 'e' || c == 'E';
827  });
828 
829  const auto buf_len = detail::is_iterator_pointer<std::decay_t<ForwardIterator>>::value
830  ? std::distance(&*first_, &*last)
831  : std::distance(first_, last);
832  char* buf = (char*)alloca(buf_len + 1);
833  char* const buf_end = buf + buf_len;
834  *buf_end = 0;
835  assert(buf != nullptr);
836  std::copy(first_, last, buf);
837 
838  if(*buf == '0' && buf_len > 1 && *std::next(buf) != '.')
839  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first_, last_, "number"));
840  auto type = element_type::null_element;
841 
842  char* pos;
843  const int64_t val = std::strtoll(buf, &pos, 10);
844 
845  if(pos == buf || pos != buf_end || errno == ERANGE) {
846  errno = 0;
847  char* pos;
848  const double val = std::strtod(buf, &pos);
849 
850  if(val == HUGE_VALL || pos == buf || pos != buf_end)
851  BOOST_THROW_EXCEPTION(make_parse_exception(json_error_num::unexpected_token, first_, last_, "number"));
852 
853  std::advance(first_, buf_len);
854 
855  assert(out >= m_data.begin() && out <= m_data.end());
856  detail::serialise(m_data, out, val);
858  return std::make_tuple(out, type);
859  }
860 
861  std::advance(first_, buf_len);
862 
863  if(val > std::numeric_limits<int32_t>::min() && val < std::numeric_limits<int32_t>::max()) {
864  assert(out >= m_data.begin() && out <= m_data.end());
865  detail::serialise(m_data, out, static_cast<int32_t>(val));
867  } else {
868  assert(out >= m_data.begin() && out <= m_data.end());
869  detail::serialise(m_data, out, val);
871  }
872 
873  return std::make_tuple(out, type);
874 }
875 
876 template <typename ForwardIterator>
877 void json_reader::skip_space(line_pos_iterator<ForwardIterator>& first,
878  const line_pos_iterator<ForwardIterator>& last) {
879  first = std::find_if_not(first, last, [](auto&& c) { return detail::isspace(c); });
880 }
881 
882 inline namespace literal {
883 
884 inline document_set operator"" _json_set(const char* str, size_t len) {
885  auto reader = json_reader{};
886  reader.parse(str, str + len);
887  return document_set(basic_document<std::vector<char>, std::vector<char>>(std::move(reader)));
888 }
889 
890 inline document operator"" _json_doc(const char* str, size_t len) {
891  auto reader = json_reader{};
892  reader.parse(str, str + len);
893  return std::move(reader);
894 }
895 
896 inline array operator"" _json_arr(const char* str, size_t len) {
897  auto reader = json_reader{};
898  reader.parse(str, str + len);
899  return std::move(reader);
900 }
901 
902 inline document_set operator"" _json_set(const wchar_t* str, size_t len) {
903  auto reader = json_reader{};
904  reader.parse(str, str + len);
905  return document_set(basic_document<std::vector<char>, std::vector<char>>(std::move(reader)));
906 }
907 
908 inline document operator"" _json_doc(const wchar_t* str, size_t len) {
909  auto reader = json_reader{};
910  reader.parse(str, str + len);
911  return std::move(reader);
912 }
913 
914 inline array operator"" _json_arr(const wchar_t* str, size_t len) {
915  auto reader = json_reader{};
916  reader.parse(str, str + len);
917  return std::move(reader);
918 }
919 
920 inline document_set operator"" _json_set(const char16_t* str, size_t len) {
921  auto reader = json_reader{};
922  reader.parse(str, str + len);
923  return document_set(basic_document<std::vector<char>, std::vector<char>>(std::move(reader)));
924 }
925 
926 inline document operator"" _json_doc(const char16_t* str, size_t len) {
927  auto reader = json_reader{};
928  reader.parse(str, str + len);
929  return std::move(reader);
930 }
931 
932 inline array operator"" _json_arr(const char16_t* str, size_t len) {
933  auto reader = json_reader{};
934  reader.parse(str, str + len);
935  return std::move(reader);
936 }
937 
938 inline document_set operator"" _json_set(const char32_t* str, size_t len) {
939  auto reader = json_reader{};
940  reader.parse(str, str + len);
941  return document_set(basic_document<std::vector<char>, std::vector<char>>(std::move(reader)));
942 }
943 
944 inline document operator"" _json_doc(const char32_t* str, size_t len) {
945  auto reader = json_reader{};
946  reader.parse(str, str + len);
947  return std::move(reader);
948 }
949 
950 inline array operator"" _json_arr(const char32_t* str, size_t len) {
951  auto reader = json_reader{};
952  reader.parse(str, str + len);
953  return std::move(reader);
954 }
955 
956 } // namespace literal
957 
958 } // namesapce jbson
959 
960 JBSON_POP_WARNINGS
961 
962 #endif // JBSON_JSON_READER_HPP
element_type
The element_type enum represents a BSON data type.
Definition: element_fwd.hpp:36
std::string or boost::string_ref (string_type)
BSON document.
Definition: document.hpp:191
std::multiset< basic_element< Container >, detail::elem_compare > basic_document_set
BSON document in the form of a std::set for ease of manipulation.
Definition: element_fwd.hpp:90
void swap(basic_document< Container, EContainer > &a, basic_document< Container, EContainer > &b) noexcept(noexcept(a.swap(b)))
Non-member swap for basic_document. Calls basic_document::swap.
Definition: document.hpp:619
basic_array< std::vector< char >> array
Default basic_array type alias for owned BSON data.
basic_document_set< std::vector< char >> document_set
Default basic_document_set type alias.
Definition: element_fwd.hpp:95
Exception type. Base class of all exceptions thrown directly by jbson.
Definition: error.hpp:23
basic_document> (document_type)
basic_document< std::vector< char >> document
Default basic_document type alias for owned BSON data.
BSON array.
Definition: document.hpp:534