Skip to content

Commit cff64b6

Browse files
committed
optimized bc_raise
1 parent e946fd4 commit cff64b6

File tree

1 file changed

+175
-31
lines changed

1 file changed

+175
-31
lines changed

ext/bcmath/libbcmath/src/raise.c

+175-31
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,164 @@
3030
*************************************************************************/
3131

3232
#include "bcmath.h"
33+
#include "convert.h"
34+
#include "private.h"
3335
#include <assert.h>
3436
#include <stdbool.h>
3537
#include <stddef.h>
3638

37-
void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) {
38-
bc_num square_ex = bc_square(n1, scale_min);
39-
bc_free_num(result);
40-
*(result) = square_ex;
39+
static inline size_t bc_multiply_vector_ex(
40+
BC_VECTOR **n1_vector, size_t n1_arr_size, BC_VECTOR *n2_vector, size_t n2_arr_size, BC_VECTOR **result_vector)
41+
{
42+
size_t result_arr_size = n1_arr_size + n2_arr_size;
43+
bc_multiply_vector(*n1_vector, n1_arr_size, n2_vector, n2_arr_size, *result_vector, result_arr_size);
44+
45+
/* Eliminate extra zeros because they increase the number of calculations. */
46+
while ((*result_vector)[result_arr_size - 1] == 0) {
47+
result_arr_size--;
48+
}
49+
50+
/* Swap n1_vector and result_vector. */
51+
BC_VECTOR *tmp = *n1_vector;
52+
*n1_vector = *result_vector;
53+
*result_vector = tmp;
54+
55+
return result_arr_size;
56+
}
57+
58+
static inline size_t bc_square_vector_ex(BC_VECTOR **base_vector, size_t base_arr_size, BC_VECTOR **result_vector)
59+
{
60+
return bc_multiply_vector_ex(base_vector, base_arr_size, *base_vector, base_arr_size, result_vector);
61+
}
62+
63+
/* Use "exponentiation by squaring". This is the fast path when the results are small. */
64+
static inline bc_num bc_fast_raise(
65+
const char *base_end, long exponent, size_t base_len, size_t power_len, size_t power_scale, size_t power_full_len)
66+
{
67+
BC_VECTOR base_vector = 0;
68+
69+
/* Convert to BC_VECTOR[] */
70+
bc_convert_to_vector(&base_vector, base_end, base_len);
71+
72+
while ((exponent & 1) == 0) {
73+
base_vector *= base_vector;
74+
exponent >>= 1;
75+
}
76+
77+
/* copy base to power */
78+
BC_VECTOR power_vector = base_vector;
79+
exponent >>= 1;
80+
81+
while (exponent > 0) {
82+
base_vector *= base_vector;
83+
if ((exponent & 1) == 1) {
84+
power_vector *= base_vector;
85+
}
86+
exponent >>= 1;
87+
}
88+
89+
bc_num power = bc_new_num_nonzeroed(power_len, power_scale);
90+
char *pptr = power->n_value;
91+
char *pend = pptr + power_full_len - 1;
92+
93+
while (pend >= pptr) {
94+
*pend-- = power_vector % BASE;
95+
power_vector /= BASE;
96+
}
97+
return power;
98+
}
99+
100+
/* Use "exponentiation by squaring". This is the standard path. */
101+
static bc_num bc_standard_raise(
102+
const char *base_end, long exponent, size_t base_len, size_t power_len, size_t power_scale, size_t power_full_len)
103+
{
104+
size_t base_arr_size = (base_len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
105+
size_t power_arr_size = base_arr_size * exponent;
106+
107+
/* The allocated memory area is reused on a rotational basis, so the same size is required. */
108+
BC_VECTOR *buf = safe_emalloc(power_arr_size * 3, sizeof(BC_VECTOR), 0);
109+
BC_VECTOR *base_vector = buf;
110+
BC_VECTOR *power_vector = base_vector + power_arr_size;
111+
BC_VECTOR *tmp_result_vector = power_vector + power_arr_size;
112+
113+
/* Convert to BC_VECTOR[] */
114+
bc_convert_to_vector(base_vector, base_end, base_len);
115+
116+
while ((exponent & 1) == 0) {
117+
base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector);
118+
exponent >>= 1;
119+
}
120+
121+
/* copy base to power */
122+
size_t tmp_power_arr_size = base_arr_size;
123+
for (size_t i = 0; i < base_arr_size; i++) {
124+
power_vector[i] = base_vector[i];
125+
}
126+
exponent >>= 1;
127+
128+
while (exponent > 0) {
129+
base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector);
130+
if ((exponent & 1) == 1) {
131+
tmp_power_arr_size = bc_multiply_vector_ex(&power_vector, tmp_power_arr_size, base_vector, base_arr_size, &tmp_result_vector);
132+
}
133+
exponent >>= 1;
134+
}
135+
power_arr_size = tmp_power_arr_size;
136+
137+
/* Convert to bc_num */
138+
size_t calc_power_full_len = power_arr_size * BC_VECTOR_SIZE;
139+
size_t power_leading_zeros = 0;
140+
if (calc_power_full_len > power_scale) {
141+
power_len = calc_power_full_len - power_scale;
142+
power_full_len = calc_power_full_len;
143+
} else {
144+
power_len = 1;
145+
power_full_len = power_scale + 1;
146+
power_leading_zeros = power_scale - calc_power_full_len + 1;
147+
}
148+
bc_num power = bc_new_num_nonzeroed(power_len, power_scale);
149+
150+
char *pptr = power->n_value;
151+
char *pend = pptr + power_full_len - 1;
152+
153+
/* Pad with leading zeros if necessary. */
154+
while (power_leading_zeros > sizeof(uint32_t)) {
155+
bc_write_bcd_representation(0, pptr);
156+
pptr += sizeof(uint32_t);
157+
power_leading_zeros -= sizeof(uint32_t);
158+
}
159+
for (size_t i = 0; i < power_leading_zeros; i++) {
160+
*pptr++ = 0;
161+
}
162+
163+
size_t i = 0;
164+
while (i < power_arr_size - 1) {
165+
#if BC_VECTOR_SIZE == 4
166+
bc_write_bcd_representation(power_vector[i], pend - 3);
167+
pend -= 4;
168+
#else
169+
bc_write_bcd_representation(power_vector[i] / 10000, pend - 7);
170+
bc_write_bcd_representation(power_vector[i] % 10000, pend - 3);
171+
pend -= 8;
172+
#endif
173+
i++;
174+
}
175+
176+
while (pend >= pptr) {
177+
*pend-- = power_vector[i] % BASE;
178+
power_vector[i] /= BASE;
179+
}
180+
181+
efree(buf);
182+
183+
return power;
41184
}
42185

43186
/* Raise "base" to the "exponent" power. The result is placed in RESULT.
44187
Maximum exponent is LONG_MAX. If a "exponent" is not an integer,
45188
only the integer part is used. */
46189
bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
47-
bc_num temp, power;
48190
size_t rscale;
49-
size_t pwrscale;
50-
size_t calcscale;
51191
bool is_neg;
52192

53193
/* Special case if exponent is a zero. */
@@ -74,43 +214,47 @@ bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
74214
return !is_neg;
75215
}
76216

77-
/* Set initial value of temp. */
78-
power = bc_copy_num(base);
79-
pwrscale = base->n_scale;
80-
while ((exponent & 1) == 0) {
81-
pwrscale = 2 * pwrscale;
82-
bc_square_ex(power, &power, pwrscale);
83-
exponent = exponent >> 1;
217+
size_t base_len = base->n_len + base->n_scale;
218+
size_t power_len = base->n_len * exponent;
219+
size_t power_scale = base->n_scale * exponent;
220+
size_t power_full_len = power_len + power_scale;
221+
222+
sign power_sign;
223+
if (base->n_sign == MINUS && (exponent & 1) == 1) {
224+
power_sign = MINUS;
225+
} else {
226+
power_sign = PLUS;
84227
}
85-
temp = bc_copy_num(power);
86-
calcscale = pwrscale;
87-
exponent = exponent >> 1;
88228

89-
/* Do the calculation. */
90-
while (exponent > 0) {
91-
pwrscale = 2 * pwrscale;
92-
bc_square_ex(power, &power, pwrscale);
93-
if ((exponent & 1) == 1) {
94-
calcscale = pwrscale + calcscale;
95-
bc_multiply_ex(temp, power, &temp, calcscale);
96-
}
97-
exponent = exponent >> 1;
229+
const char *base_end = base->n_value + base_len - 1;
230+
231+
bc_num power;
232+
if (base_len <= BC_VECTOR_SIZE && power_full_len <= BC_VECTOR_SIZE * 2) {
233+
power = bc_fast_raise(base_end, exponent, base_len, power_len, power_scale, power_full_len);
234+
} else {
235+
power = bc_standard_raise(base_end, exponent, base_len, power_len, power_scale, power_full_len);
236+
}
237+
238+
_bc_rm_leading_zeros(power);
239+
if (bc_is_zero(power)) {
240+
power->n_sign = PLUS;
241+
power->n_scale = 0;
242+
} else {
243+
power->n_sign = power_sign;
98244
}
99245

100246
/* Assign the value. */
101247
if (is_neg) {
102-
if (bc_divide(BCG(_one_), temp, result, rscale) == false) {
103-
bc_free_num (&temp);
248+
if (bc_divide(BCG(_one_), power, result, rscale) == false) {
104249
bc_free_num (&power);
105250
return false;
106251
}
107-
bc_free_num (&temp);
252+
bc_free_num (&power);
108253
} else {
109254
bc_free_num (result);
110-
*result = temp;
255+
*result = power;
111256
(*result)->n_scale = MIN(scale, (*result)->n_scale);
112257
}
113-
bc_free_num (&power);
114258
return true;
115259
}
116260

0 commit comments

Comments
 (0)