20
20
21
21
#include " decimal_fp.h"
22
22
#include " dragonbox.h"
23
- #include " ryu_printf.h"
24
23
#include " policy.h"
25
24
#include " detail/bits.h"
26
25
#include " detail/div.h"
27
26
#include " detail/log.h"
28
27
#include " detail/util.h"
29
- #include " detail/macros.h"
30
28
#include < cassert>
31
29
#include < cstdint>
32
30
@@ -76,10 +74,8 @@ namespace jkj::fp {
76
74
static constexpr auto max_significand =
77
75
compute_power<decimal_digit_limit>(carrier_uint(10 )) - 1 ;
78
76
79
- static constexpr auto sign_bit_mask =
80
- carrier_uint (carrier_uint(1 ) << (significand_bits + exponent_bits));
81
- static constexpr auto infinity =
82
- carrier_uint (((carrier_uint(1 ) << exponent_bits) - 1 ) << significand_bits);
77
+ static constexpr auto sign_bit_mask = ieee754_traits<Float>::negative_zero();
78
+ static constexpr auto infinity = ieee754_traits<Float>::positive_infinity();
83
79
84
80
static constexpr auto normal_residual_mask =
85
81
carrier_uint ((carrier_uint(1 ) << (carrier_bits - significand_bits - 2 )) - 1);
@@ -309,195 +305,6 @@ namespace jkj::fp {
309
305
decltype (policy::sign::propagate),
310
306
decltype (policy::cache::fast)>(decimal);
311
307
}
312
-
313
- // Does the same thing as ryu_printf, but specialized for the usage in Dooly.
314
- // Suppose the input decimal number is of the form
315
- // (g + alpha) * 10^k, where g is an integer and alpha is in [0,1).
316
- // Write k = q * eta + r for integers q, r with 0 <= r < eta, where eta is the segment length.
317
- // What this class does is to generate digits of 2^e for given e, from the
318
- // segment index given as a construction parameter, which should be -q.
319
- // This class will tell if the given initial segment index is the first nonzero segment or not.
320
- // If there are preceding nonzero segments, then that means 2^e is strictly greater than
321
- // alpha * 10^k. If not, then compare floor(alpha * 10^r) with the initial segment
322
- // generated from this class. If they are equal, then compare
323
- // floor(alpha * 10^(r + eta)) with the next segment generated, and if they are still equal,
324
- // then compare floor(alpha * 10^(r + 2 * eta)) with the next segment generated,
325
- // and so on. In this way, the user can compare alpha * 10^k and 2^e.
326
- template <class Float >
327
- class dooly_generator : private detail ::ryu_printf::impl_base<ieee754_traits<Float>::format>
328
- {
329
- public:
330
- static constexpr auto format = ieee754_traits<Float>::format;
331
-
332
- private:
333
- using impl_base = detail::ryu_printf::impl_base<format>;
334
- using impl_base::significand_bits;
335
- using impl_base::min_exponent;
336
- using impl_base::exponent_bias;
337
- using impl_base::segment_bit_size;
338
- using impl_base::compression_factor;
339
- using fast_cache_holder = detail::ryu_printf::fast_cache_holder<format>;
340
- using cache_entry_type = typename fast_cache_holder::cache_entry_type;
341
-
342
- using carrier_uint = typename ieee754_traits<Float>::carrier_uint;
343
- static constexpr auto carrier_bits = ieee754_traits<Float>::carrier_bits;
344
-
345
- int exponent_;
346
- std::uint32_t segment_;
347
- int segment_index_; // n
348
- int exponent_index_; // k
349
- int remainder_; // r
350
- int min_segment_index_;
351
- int max_segment_index_;
352
-
353
- public:
354
- using impl_base::segment_size;
355
- using impl_base::segment_divisor;
356
-
357
- static constexpr auto max_nonzero_decimal_digits =
358
- detail::log::floor_log10_pow5 (significand_bits - min_exponent) +
359
- detail::log::floor_log10_pow2 (significand_bits) + 2 ;
360
-
361
- // Computes the first segmnet on construction.
362
- JKJ_FORCEINLINE dooly_generator (int exponent, int initial_segment_index) noexcept
363
- : exponent_{ exponent }, segment_index_{ initial_segment_index }
364
- {
365
- // 2^e * 10^(n * eta) >= 1
366
- // <=>
367
- // n >= ceil(-e * log10(2) / eta) = -floor(e * log10(2) / eta)
368
- if (exponent_ >= 0 ) {
369
- // Avoid signed division.
370
- min_segment_index_ =
371
- -int (unsigned (detail::log::floor_log10_pow2 (exponent_)) / unsigned (segment_size));
372
- max_segment_index_ = 0 ;
373
- }
374
- else {
375
- // Avoid signed division.
376
- min_segment_index_ =
377
- int (unsigned (detail::log::floor_log10_pow2 (-exponent_) / unsigned (segment_size))) + 1 ;
378
- max_segment_index_ = int (unsigned (-exponent_ + segment_size - 1 ) / unsigned (segment_size));
379
- }
380
-
381
- // We will compute the first segment.
382
-
383
- // Avoid signed division.
384
- int pow2_exponent = exponent_ + segment_index_ * segment_size;
385
- if (pow2_exponent >= 0 ) {
386
- exponent_index_ = int (unsigned (pow2_exponent) / unsigned (compression_factor));
387
- remainder_ = int (unsigned (pow2_exponent) % unsigned (compression_factor));
388
- }
389
- else {
390
- exponent_index_ = -int (unsigned (-pow2_exponent) / unsigned (compression_factor));
391
- remainder_ = int (unsigned (-pow2_exponent) % unsigned (compression_factor));
392
-
393
- if (remainder_ != 0 ) {
394
- --exponent_index_;
395
- remainder_ = compression_factor - remainder_;
396
- }
397
- }
398
-
399
- // Get the first nonzero segment.
400
- if (segment_index_ >= min_segment_index_ && segment_index_ <= max_segment_index_) {
401
- segment_ = compute_segment ();
402
- }
403
- else {
404
- segment_ = 0 ;
405
- }
406
- }
407
-
408
- std::uint32_t current_segment () const noexcept {
409
- return segment_;
410
- }
411
-
412
- int current_segment_index () const noexcept {
413
- return segment_index_;
414
- }
415
-
416
- bool has_further_nonzero_segments () const noexcept {
417
- if (segment_index_ >= max_segment_index_) {
418
- return false ;
419
- }
420
- else {
421
- // Check if there are reamining nonzero digits,
422
- // which is equivalent to that f * 2^e * 10^(n * eta) is not an integer.
423
-
424
- auto minus_pow5_exponent = -segment_index_ * segment_size;
425
- auto minus_pow2_exponent = -exponent_ + minus_pow5_exponent;
426
-
427
- return minus_pow2_exponent > 0 || minus_pow5_exponent > 0 ;
428
- }
429
- }
430
-
431
- // Returns true if there might be nonzero segments remaining,
432
- // and returns false if all following segments are zero.
433
- JKJ_FORCEINLINE bool compute_next_segment () noexcept {
434
- ++segment_index_;
435
- if (segment_index_ <= max_segment_index_) {
436
- on_increase_segment_index ();
437
- return true ;
438
- }
439
- else {
440
- segment_ = 0 ;
441
- return false ;
442
- }
443
- }
444
-
445
- private:
446
- JKJ_FORCEINLINE std::uint32_t compute_segment () const noexcept {
447
- auto const & cache = fast_cache_holder::cache[exponent_index_ +
448
- fast_cache_holder::get_starting_index_minus_min_k (segment_index_)];
449
- return multiply_shift_mod (cache, segment_bit_size + remainder_);
450
- }
451
-
452
- JKJ_FORCEINLINE void on_increase_segment_index () noexcept {
453
- assert (segment_index_ <= max_segment_index_);
454
- remainder_ += segment_size;
455
- static_assert (segment_size < compression_factor);
456
- if (remainder_ >= compression_factor) {
457
- ++exponent_index_;
458
- remainder_ -= compression_factor;
459
- }
460
- if (segment_index_ >= min_segment_index_) {
461
- segment_ = compute_segment ();
462
- }
463
- }
464
-
465
- JKJ_SAFEBUFFERS JKJ_FORCEINLINE static std::uint32_t multiply_shift_mod (
466
- cache_entry_type const & y, int shift_amount) noexcept
467
- {
468
- using namespace detail ;
469
-
470
- if constexpr (format == ieee754_format::binary32) {
471
- static_assert (value_bits<carrier_uint> <= 32 );
472
- assert (shift_amount > 0 && shift_amount <= 64 );
473
- auto shift_result = ((std::uint64_t (y[0 ]) << 32 ) | y[1 ]) >> (64 - shift_amount);
474
-
475
- return std::uint32_t (shift_result % segment_divisor);
476
- }
477
- else {
478
- static_assert (format == ieee754_format::binary64);
479
- static_assert (value_bits<carrier_uint> <= 64 );
480
- assert (shift_amount > 0 && shift_amount <= 128 );
481
- auto shift_result = shift_amount <= 64 ?
482
- wuint::uint128{ 0 , y[0 ] >> (64 - shift_amount) } :
483
- wuint::uint128{ y[0 ], y[1 ] } >> (128 - shift_amount);
484
-
485
- // Granlund-Montgomery style division by 10^9
486
- static_assert (compression_factor + segment_size - 1 <= 75 );
487
- // Note that shift_result is of shift_amount bits.
488
- // For 75-bit numbers, it is sufficient to use 76-bits, but to
489
- // make computation simpler, we will use 99-bits.
490
- // The result of multiplication fits inside 192-bit,
491
- // and the corresponding shift amount is 128 bits so we just need to
492
- // take the upper 64-bits.
493
- auto const c = wuint::uint128{ 0x4'4b82'fa09 , 0xb5a5'2cb9'8bc9'c4a7 };
494
-
495
- auto q = wuint::umul256_upper_middle64 (shift_result, c);
496
- return std::uint32_t (shift_result.low ()) - segment_divisor * std::uint32_t (q);
497
- }
498
- }
499
- };
500
308
}
501
309
502
- #include " detail/undef_macros.h"
503
310
#endif
0 commit comments