|
22 | 22 | #define USING_FLOAT_MUL 1 /* Using floating-point multiplication */
|
23 | 23 | #endif
|
24 | 24 |
|
25 |
| -#ifndef APPROX_TAIL_CMP_VAL |
26 |
| -#define APPROX_TAIL_CMP_VAL 2 /* The value should be less than or equal to 4 */ |
27 |
| -#endif |
28 |
| -static const uint32_t s_tail_cmp = (APPROX_TAIL_CMP_VAL << 1) + 1; |
29 |
| - |
30 | 25 | typedef struct {
|
31 | 26 | uint64_t f;
|
32 | 27 | int32_t e;
|
@@ -686,6 +681,84 @@ static inline diy_fp_t negative_diy_fp(int32_t e)
|
686 | 681 | return v;
|
687 | 682 | }
|
688 | 683 |
|
| 684 | +static inline void ldouble_convert(diy_fp_t *v) |
| 685 | +{ |
| 686 | + uint64_t f = v->f; |
| 687 | + int32_t e = v->e, t = v->e; |
| 688 | + diy_fp_t x; |
| 689 | + double d; |
| 690 | + |
| 691 | + e >>= 2; |
| 692 | + t -= e << 2; |
| 693 | + if (t) { |
| 694 | + f <<= t; |
| 695 | + } |
| 696 | + x = e >= 0 ? positive_diy_fp(e) : negative_diy_fp(-e); |
| 697 | + |
| 698 | + d = (double)f * x.f; |
| 699 | + if (d < 1e33) { |
| 700 | + v->f = (uint64_t)((d + 5e17) * 1e-18); |
| 701 | + v->e = e - x.e + 18; |
| 702 | + } else if (d < 1e34) { |
| 703 | + v->f = (uint64_t)((d + 5e18) * 1e-19); |
| 704 | + v->e = e - x.e + 19; |
| 705 | + } else { |
| 706 | + v->f = (uint64_t)((d + 5e19) * 1e-20); |
| 707 | + v->e = e - x.e + 20; |
| 708 | + } |
| 709 | +} |
| 710 | + |
| 711 | +static inline int32_t fill_significand(char *buffer, uint64_t digits, int32_t *ptz, int32_t *fixed) |
| 712 | +{ |
| 713 | + char *s = buffer; |
| 714 | + uint32_t q, r, q1, r1, q2, r2; |
| 715 | + |
| 716 | + *fixed = 0; |
| 717 | + q = (uint32_t)(digits / 100000000); |
| 718 | + r = (uint32_t)(digits - (uint64_t)q * 100000000); |
| 719 | + q1 = FAST_DIV10000(q); |
| 720 | + r1 = q - q1 * 10000; |
| 721 | + q2 = FAST_DIV100(q1); |
| 722 | + r2 = q1 - q2 * 100; |
| 723 | + |
| 724 | + if (q2 >= 10) { |
| 725 | + *ptz = tz_100_lut[q2]; |
| 726 | + memcpy(s, &ch_100_lut[q2<<1], 2); |
| 727 | + s += 2; |
| 728 | + } else { |
| 729 | + *ptz = 0; |
| 730 | + *s++ = q2 + '0'; |
| 731 | + } |
| 732 | + |
| 733 | + if (!r2) { |
| 734 | + *ptz += 2; |
| 735 | + memset(s, '0', 2); |
| 736 | + s += 2; |
| 737 | + } else { |
| 738 | + *ptz = tz_100_lut[r2]; |
| 739 | + memcpy(s, &ch_100_lut[r2<<1], 2); |
| 740 | + s += 2; |
| 741 | + } |
| 742 | + |
| 743 | + if (!r1) { |
| 744 | + *ptz += 4; |
| 745 | + memset(s, '0', 4); |
| 746 | + s += 4; |
| 747 | + } else { |
| 748 | + s += fill_t_4_digits(s, r1, ptz); |
| 749 | + } |
| 750 | + |
| 751 | + if (!r) { |
| 752 | + memset(s + 8, '0', 8); |
| 753 | + *ptz += 8; |
| 754 | + s += 8; |
| 755 | + } else { |
| 756 | + s += fill_t_8_digits(s, r, ptz); |
| 757 | + } |
| 758 | + |
| 759 | + return s - buffer; |
| 760 | +} |
| 761 | + |
689 | 762 | #else
|
690 | 763 |
|
691 | 764 | /*
|
@@ -775,6 +848,11 @@ typedef struct {
|
775 | 848 | int32_t e;
|
776 | 849 | } pow9x2_t;
|
777 | 850 |
|
| 851 | +#ifndef APPROX_TAIL_CMP_VAL |
| 852 | +#define APPROX_TAIL_CMP_VAL 2 /* The value should be less than or equal to 4 */ |
| 853 | +#endif |
| 854 | +static const uint32_t s_tail_cmp = (APPROX_TAIL_CMP_VAL << 1) + 1; |
| 855 | + |
778 | 856 | static inline pow9x2_t positive_diy_fp(int32_t e)
|
779 | 857 | {
|
780 | 858 | static const u32x2_t positive_base_lut[244] = {
|
@@ -960,37 +1038,10 @@ static inline pow9x2_t negative_diy_fp(int32_t e)
|
960 | 1038 | return v;
|
961 | 1039 | }
|
962 | 1040 |
|
963 |
| -#endif |
964 |
| - |
965 | 1041 | static inline void ldouble_convert(diy_fp_t *v)
|
966 | 1042 | {
|
967 | 1043 | uint64_t f = v->f;
|
968 | 1044 | int32_t e = v->e, t = v->e;
|
969 |
| - |
970 |
| -#if USING_FLOAT_MUL |
971 |
| - diy_fp_t x; |
972 |
| - double d; |
973 |
| - |
974 |
| - e >>= 2; |
975 |
| - t -= e << 2; |
976 |
| - if (t) { |
977 |
| - ++e; |
978 |
| - f >>= 4 - t; |
979 |
| - } |
980 |
| - x = e >= 0 ? positive_diy_fp(e) : negative_diy_fp(-e); |
981 |
| - |
982 |
| - d = (double)f * x.f; |
983 |
| - if (d < 1e32) { |
984 |
| - v->f = (uint64_t)(d * 1e-17); |
985 |
| - v->e = e - x.e + 17; |
986 |
| - } else if (d < 1e34) { |
987 |
| - v->f = (uint64_t)(d * 1e-18); |
988 |
| - v->e = e - x.e + 18; |
989 |
| - } else { |
990 |
| - v->f = (uint64_t)(d * 1e-19); |
991 |
| - v->e = e - x.e + 19; |
992 |
| - } |
993 |
| -#else |
994 | 1045 | pow9x2_t x;
|
995 | 1046 | uint64_t hi, lo;
|
996 | 1047 |
|
@@ -1018,7 +1069,6 @@ static inline void ldouble_convert(diy_fp_t *v)
|
1018 | 1069 | v->f = f / 10;
|
1019 | 1070 | v->e = e - x.e + 19;
|
1020 | 1071 | }
|
1021 |
| -#endif |
1022 | 1072 | }
|
1023 | 1073 |
|
1024 | 1074 | static inline int32_t fill_a_4_digits(char *buffer, uint32_t digits, int32_t *ptz)
|
@@ -1156,6 +1206,8 @@ static inline int32_t fill_significand(char *buffer, uint64_t digits, int32_t *p
|
1156 | 1206 | return s - buffer;
|
1157 | 1207 | }
|
1158 | 1208 |
|
| 1209 | +#endif |
| 1210 | + |
1159 | 1211 | static inline int32_t fill_exponent(int32_t K, char *buffer)
|
1160 | 1212 | {
|
1161 | 1213 | int32_t i = 0, k = 0;
|
|
0 commit comments