CNL  2.0.2 (development)
Compositional Numeric Library
parse.h
1 
2 // Copyright John McFarlane 2021.
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file ../LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6 
7 #if !defined(CNL_IMPL_PARSE_H)
8 #define CNL_IMPL_PARSE_H
9 
10 #include "charconv/constants.h"
11 #include "charconv/descale.h"
12 #include "cnl_assert.h"
13 #include "config.h"
14 #include "narrow_cast.h"
15 #include "num_traits/digits.h"
16 #include "num_traits/max_digits.h"
17 #include "num_traits/set_digits.h"
18 #include "unreachable.h"
19 
20 #include <algorithm>
21 #include <cstring>
22 #include <numeric>
23 #include <tuple>
24 #include <utility>
25 
26 namespace cnl {
27  namespace _impl {
28 
30  // cnl::_impl::strlen - constexpr version of std::strlen
31 
32  constexpr auto strlen(const char* str)
33  {
34  auto const* it{str};
35  while (*it) {
36  it++;
37  }
38  return narrow_cast<int>(it - str);
39  }
40 
42  // scale_op
43 
44  template<typename Sum>
45  using scale_op_t = Sum (*)(Sum const&);
46 
47  constexpr auto make_scale_op(int base) -> scale_op_t<std::int64_t>
48  {
49  switch (base) {
50  case 2:
51  return [](std::int64_t const& sum) {
52  return std::int64_t{(sum << 1)};
53  };
54  case 8:
55  return [](std::int64_t const& sum) {
56  return std::int64_t{(sum << 3)};
57  };
58  case 10:
59  return [](std::int64_t const& sum) {
60  return std::int64_t{(sum * 10)};
61  };
62  case 16:
63  return [](std::int64_t const& sum) {
64  return std::int64_t{(sum << 4)};
65  };
66  default:
67  return unreachable<scale_op_t<std::int64_t>>("unsupported number base");
68  }
69  }
70 
71  template<typename Sum>
72  constexpr auto make_scale_op_chunk(int base) -> scale_op_t<Sum>
73  {
74  switch (base) {
75  case 2:
76  return [](Sum const& sum) {
77  return Sum{(sum << 63)};
78  };
79  case 8:
80  return [](Sum const& sum) {
81  return Sum{(sum << 63)};
82  };
83  case 10:
84  return [](Sum const& sum) {
85  return Sum{(sum * 1'000'000'000'000'000'000)};
86  };
87  case 0x10:
88  return [](Sum const& sum) {
89  return Sum{(sum << 60)};
90  };
91  default:
92  return unreachable<scale_op_t<Sum>>("unsupported number base");
93  }
94  }
95 
97  // char_to_digit
98 
99  using char_to_digit_t = int (*)(char);
100 
101  constexpr auto make_char_to_digit_negative(int base) -> char_to_digit_t
102  {
103  switch (base) {
104  case 2:
105  return [](char c) {
106  return (c >= '0' && c <= '1') ? '0' - c : unreachable<int>("invalid binary digit");
107  };
108  case 8:
109  return [](char c) {
110  return (c >= '0' && c <= '7') ? '0' - c : unreachable<int>("invalid octal digit");
111  };
112  case 10:
113  return [](char c) {
114  return (c >= '0' && c <= '9') ? '0' - c : unreachable<int>("invalid decimal digit");
115  };
116  case 16:
117  return [](char c) {
118  return (c >= '0' && c <= '9') ? '0' - c : (c >= 'a' && c <= 'z') ? ('a' - 10) - c
119  : (c >= 'A' && c <= 'Z') ? ('A' - 10) - c
120  : unreachable<int>("invalid hexadecimal digit");
121  };
122  default:
123  return unreachable<char_to_digit_t>("unsupported number base");
124  }
125  }
126 
127  constexpr auto make_char_to_digit_positive(int base) -> char_to_digit_t
128  {
129  switch (base) {
130  case 2:
131  return [](char c) {
132  return (c >= '0' && c <= '1') ? c - '0' : unreachable<int>("invalid binary digit");
133  };
134  case 8:
135  return [](char c) {
136  return (c >= '0' && c <= '7') ? c - '0' : unreachable<int>("invalid octal digit");
137  };
138  case 10:
139  return [](char c) {
140  return (c >= '0' && c <= '9') ? c - '0' : unreachable<int>("invalid decimal digit");
141  };
142  case 16:
143  return [](char c) {
144  return (c >= '0' && c <= '9') ? c - '0' : (c >= 'a' && c <= 'z') ? c - ('a' - 10)
145  : (c >= 'A' && c <= 'Z') ? c - ('A' - 10)
146  : unreachable<int>("invalid hexadecimal digit");
147  };
148  default:
149  return unreachable<char_to_digit_t>("unsupported number base");
150  };
151  }
152 
153  constexpr auto make_char_to_digit(bool is_negative, int base)
154  {
155  return is_negative
156  ? make_char_to_digit_negative(base)
157  : make_char_to_digit_positive(base);
158  }
159 
161  // scan_string
162 
163  struct params {
164  bool is_negative;
165  int base;
166  int stride;
167  int first_numeral;
168  int num_bits;
169  int num_digits;
170  int num_fractional_digits;
171  };
172 
173  constexpr auto separator{'\''};
174 
175  [[nodiscard]] constexpr auto scan_msb(
176  char const* str, bool is_negative,
177  int base, int stride, int offset,
178  int max_num_bits, int num_digits, int num_fractional_digits)
179  {
180  // If most significant digit char is not too great, use a slightly narrower result type.
181  // In turn, ensure that large signed numbers don't 'nudge' over to wider types.
182  // E.g. 0x10000000 gets returned as 8*4-1=31 digits, not 8*4=32 digits.
183  // A comprehensive solution would fully optimise width of result type.
184  // E.g. 0x00000001 would report num_bits=1, not 31.
185  // But this is fast, simple and avoids the pathological case.
186  auto const first_digit_char{str[offset + (str[offset] == radix_char)]};
187  auto const first_digit{make_char_to_digit_positive(base)(first_digit_char)};
188  return params{
189  is_negative, base,
190  stride,
191  offset,
192  max_num_bits - (first_digit * 2 < base),
193  num_digits,
194  num_fractional_digits};
195  }
196 
197  [[nodiscard]] constexpr auto scan_base(char const* str, bool is_negative, int offset, int length)
198  {
199  auto const last{str + length};
200  auto const found_radix{std::find(str, last, radix_char)};
201  auto const has_radix{found_radix != last};
202  auto const post_radix{found_radix + has_radix};
203  auto const pre_radix_separators{narrow_cast<int>(std::count(str, found_radix, separator))};
204  auto const post_radix_separators{narrow_cast<int>(std::count(post_radix, last, separator))};
205  auto const num_separators{pre_radix_separators + post_radix_separators};
206  auto const num_non_separators{length - (num_separators + has_radix)};
207  auto const num_fractional_digits{
208  narrow_cast<int>(std::distance(post_radix, last)) - post_radix_separators};
209 
210  auto const is_decimal{str[offset] != '0' || has_radix};
211  if (is_decimal || offset + 1 >= num_non_separators) {
212  static_assert(std::numeric_limits<int32_t>::digits10 == 9);
213  auto const num_digits{num_non_separators};
214  return scan_msb(str, is_negative, 10, 18, offset, (num_digits * 3322 + 678) / 1000, num_digits, num_fractional_digits);
215  }
216  switch (str[offset + 1]) {
217  case 'B':
218  case 'b': {
219  auto const num_digits{num_non_separators - 2};
220  return scan_msb(str, is_negative, 2, 63, offset + 2, num_digits, num_digits, num_fractional_digits);
221  }
222  case 'X':
223  case 'x': {
224  auto const num_digits{num_non_separators - 2};
225  return scan_msb(str, is_negative, 16, 15, offset + 2, num_digits * 4, num_digits, num_fractional_digits);
226  }
227  default:
228  auto const num_digits{num_non_separators - 1};
229  return scan_msb(str, is_negative, 8, 21, offset + 1, num_digits * 3, num_digits, num_fractional_digits);
230  }
231  }
232 
233  [[nodiscard]] inline constexpr auto scan_string(char const* str, int length)
234  {
235  switch (str[0]) {
236  case plus_char:
237  return scan_base(str, false, 1, length - 1);
238  case minus_char:
239  return scan_base(str, true, 1, length - 1);
240  default:
241  return scan_base(str, false, 0, length);
242  }
243  }
244 
245  template<int Length>
246  // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
247  [[nodiscard]] inline constexpr auto scan_string(char const (&str)[Length])
248  {
249  return scan_string(str, Length);
250  }
251 
253  // parse_string
254 
255  template<typename Result>
256  [[nodiscard]] constexpr auto parse_string(
257  char const* first, int num_digits, bool is_negative, int base, int stride)
258  {
259  auto const parse_int64 = [&num_digits, &first,
260  char_to_digit = make_char_to_digit(is_negative, base),
261  scale_op = make_scale_op(base)](int n) {
262  std::int64_t init{};
263  num_digits -= n;
264  CNL_ASSERT(num_digits >= 0);
265  while (n) {
266  auto const digit{*first++};
267  CNL_ASSERT(digit);
268  if (digit != separator && digit != radix_char) {
269  init = scale_op(init) + char_to_digit(digit);
270  n--;
271  }
272  }
273  return init;
274  };
275 
276  auto const chunk_scale_op{make_scale_op_chunk<Result>(base)};
277 
278  Result init(parse_int64((num_digits + stride) % stride));
279 
280  // This loop exits when parse_int64 sets num_digits to zero.
281  // NOLINTNEXTLINE(bugprone-infinite-loop)
282  while (num_digits) {
283  init = chunk_scale_op(std::move(init)) + parse_int64(stride);
284  }
285 
286  return init;
287  }
288 
289  template<typename Result, int NumChars>
290  [[nodiscard]] constexpr auto parse_string(
291  // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
292  char const (&str)[NumChars], int num_digits, bool is_negative, int base, int stride, int first_numeral)
293  {
294  return parse_string<Result>(str + first_numeral, num_digits, is_negative, base, stride);
295  }
296 
297  template<typename Result, int NumDigits, bool IsNegative, int Base, int Stride, int FirstNumeral, char... Chars>
298  [[nodiscard]] constexpr auto parse_string()
299  {
300  return parse_string<Result, sizeof...(Chars)>({Chars...}, NumDigits, IsNegative, Base, Stride, FirstNumeral);
301  }
302 
304  // parse
305 
306  template<typename Narrowest>
307  [[nodiscard]] constexpr auto parse(char const* str)
308  {
309  auto const length{strlen(str)};
310  auto params{scan_string(str, length)};
311  return parse_string<Narrowest>(
312  str + params.first_numeral,
313  params.num_digits,
314  params.is_negative,
315  params.base,
316  params.stride);
317  }
318 
319  template<typename Narrowest, char... Chars>
320  [[nodiscard]] constexpr auto parse_real()
321  {
322  constexpr auto params{scan_string({Chars...})};
323  constexpr auto result_digits{
324  std::max(digits_v<Narrowest>, std::min(params.num_bits, max_digits<Narrowest>))};
325  using result_type = set_digits_t<Narrowest, result_digits>;
326 
327  return descaled<result_type, params.base>{
328  parse_string<
329  result_type,
330  params.num_digits,
331  params.is_negative,
332  params.base,
333  params.stride,
334  params.first_numeral,
335  Chars...>(),
336  -params.num_fractional_digits};
337  }
338 
339  template<typename Narrowest, char... Chars>
340  [[nodiscard]] constexpr auto parse()
341  {
342  constexpr auto parsed{parse_real<Narrowest, Chars...>()};
343  static_assert(
344  parsed.exponent == 0,
345  "non-integer number");
346  return parsed.significand;
347  }
348  }
349 }
350 
351 #endif // CNL_IMPL_PARSE_H
std::strlen
T strlen(T... args)
std::move
T move(T... args)
std::find
T find(T... args)
std::distance
T distance(T... args)
std::int64_t
cnl
compositional numeric library
Definition: abort.h:15
std::min
T min(T... args)
std::count
T count(T... args)
config.h
definitions which can be used to configure CNL library
std::max
T max(T... args)
std::numeric_limits