Skip to content

Commit 1fef771

Browse files
committed
⚡️Golf sqrt and cbrt by getting a better initial estimate and removing an iteration
1 parent ff6256a commit 1fef771

1 file changed

Lines changed: 15 additions & 11 deletions

File tree

src/utils/clz/FixedPointMathLib.sol

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -774,14 +774,16 @@ library FixedPointMathLib {
774774
// n = floor(log2(x))
775775
// For x ≈ 2^n, we know sqrt(x) ≈ 2^(n/2)
776776
// We use (n+1)/2 instead of n/2 to round up slightly
777-
// This gives a better initial approximation
777+
// This gives a better initial approximation. This seed gives
778+
// ε₁ = 0.0607 after one Babylonian step for all inputs. With
779+
// ε_{n+1} ≈ ε²/2, 6 steps yield 2⁻¹⁶¹ relative error (>128 correct
780+
// bits).
778781
//
779782
// Formula: z = 2^((n+1)/2) = 2^(floor((n+1)/2))
780783
// Implemented as: z = 1 << ((n+1) >> 1)
781784
z := shl(shr(1, sub(256, clz(x))), 1)
782785

783-
/// (x/z + z) / 2
784-
z := shr(1, add(z, div(x, z)))
786+
// 6 Babylonian steps; z = (x/z + z) / 2
785787
z := shr(1, add(z, div(x, z)))
786788
z := shr(1, add(z, div(x, z)))
787789
z := shr(1, add(z, div(x, z)))
@@ -799,23 +801,25 @@ library FixedPointMathLib {
799801
/// @dev Returns the cube root of `x`, rounded down.
800802
/// Credit to bout3fiddy and pcaversaccio under AGPLv3 license:
801803
/// https://github.com/pcaversaccio/snekmate/blob/main/src/snekmate/utils/math.vy
802-
/// Formally verified by xuwinnie:
803-
/// https://github.com/vectorized/solady/blob/main/audits/xuwinnie-solady-cbrt-proof.pdf
804804
function cbrt(uint256 x) internal pure returns (uint256 z) {
805805
/// @solidity memory-safe-assembly
806806
assembly {
807-
// Initial guess z = 2^ceil((log2(x) + 2) / 3).
808-
// Since log2(x) = 255 - clz(x), the expression shl((257 - clz(x)) / 3, 1)
809-
// computes this over-estimate. Guaranteed ≥ cbrt(x) and safe for Newton-Raphson's.
810-
z := shl(div(sub(257, clz(x)), 3), 1)
811-
// Newton-Raphson's.
812-
z := div(add(add(div(x, mul(z, z)), z), z), 3)
807+
// Initial guess z ≈ ∛(3/4) · 2𐞥 where q = ⌊(257 − clz(x)) / 3⌋. The
808+
// multiplier 233/256 ≈ 0.910 ≈ ∛(3/4) balances the worst-case
809+
// over/underestimate across each octave triplet (ε_over ≈ 0.445,
810+
// ε_under ≈ −0.278), giving >85 bits of precision after 6 N-R
811+
// iterations. The `lt(0, x)` term ensures z ≥ 1 when x > 0 (the
812+
// `shr` can produce 0 for small `q`)
813+
z := add(shr(8, shl(div(sub(257, clz(x)), 3), 233)), lt(0, x))
814+
815+
// 6 Newton-Raphson iterations.
813816
z := div(add(add(div(x, mul(z, z)), z), z), 3)
814817
z := div(add(add(div(x, mul(z, z)), z), z), 3)
815818
z := div(add(add(div(x, mul(z, z)), z), z), 3)
816819
z := div(add(add(div(x, mul(z, z)), z), z), 3)
817820
z := div(add(add(div(x, mul(z, z)), z), z), 3)
818821
z := div(add(add(div(x, mul(z, z)), z), z), 3)
822+
819823
// Round down.
820824
z := sub(z, lt(div(x, mul(z, z)), z))
821825
}

0 commit comments

Comments
 (0)