Skip to content

Finish removing the BigInts from * for FD{Int128}! #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 33 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a1c1711
Use Int256 to avoid BigInt in FD operations.
NHDaly Jun 12, 2024
7756238
Further reduce BigInts by skipping a `rem()` in iseven
NHDaly Jun 12, 2024
78e45dc
Fix ambiguity in _widemul(Int256, UInt256)
NHDaly Jun 12, 2024
879c602
Bump patch version number
NHDaly Jun 12, 2024
a245651
Add compat for BitIntegers
NHDaly Jun 12, 2024
4bd8c45
Finish removing the BigInts from * for FD{Int128}!
NHDaly Jun 12, 2024
4e53f3d
Support older versions of julia
NHDaly Jun 13, 2024
dfd41b1
Comments
NHDaly Jun 13, 2024
efee91b
Disable fldmod-by-const tests on older julia
NHDaly Jun 13, 2024
a03d754
Fix one other case of iseven allocating a BigInt
NHDaly Jun 13, 2024
4ed8ebf
Apply this optimization to FD{Int64} as well.
NHDaly Jun 13, 2024
3f39b8a
Adjust to run for all integer types!
NHDaly Jun 13, 2024
20c66f2
Clarify the `_unsigned(x)` methods with comments
NHDaly Jun 13, 2024
f2958ba
Apply suggestions from code review
NHDaly Jun 13, 2024
0019bb0
Update src/FixedPointDecimals.jl
NHDaly Jun 17, 2024
4a53703
Add extensive tests for multiplication correctness, to cover the new …
NHDaly Jun 19, 2024
27a3e0f
Named testsets to make failures easier to identify
NHDaly Jun 19, 2024
e4cb73b
Fix off-by-one error in rounding truncation in calculate_inverse_coeff()
NHDaly Jun 19, 2024
73f6547
Add some comments and requires
NHDaly Jun 19, 2024
4e9fdd6
Copy/pasted definition for unsigned numbers straight from the book
NHDaly Jun 19, 2024
188933d
Have `magicgu` support arbitrary integer sizes
NHDaly Jun 20, 2024
f6d375c
Use the formulas from Hacker's delight for both signed and unsigned Ints
Drvi Jun 21, 2024
b79c873
.
Drvi Jun 25, 2024
4f4d17a
.
Drvi Jun 26, 2024
eaeaddf
Restrict back to just Int128 & Int256 for custom div_by_const
NHDaly Jul 10, 2024
a2dcf56
It turns out that in newer versions of julia, you should call fldmod,…
NHDaly Jul 10, 2024
ef578e9
Reorganize the functions to be top-down
NHDaly Jul 10, 2024
8df68b5
More thorough tests for flmdod_by_const
NHDaly Jul 10, 2024
66e1ecb
Merge branch 'master' into nhd-Int128-fastmul-noallocs
NHDaly Aug 12, 2024
fd096ac
Bump patch version number
NHDaly Aug 12, 2024
e147f5c
Add _widemul unit test
NHDaly Aug 13, 2024
f830c3c
Change "unreachable" comment to an `@assert false`
NHDaly Aug 13, 2024
71ba82e
add test comment
NHDaly Sep 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FixedPointDecimals"
uuid = "fb4d412d-6eee-574d-9565-ede6634db7b0"
authors = ["Fengyang Wang <[email protected]>", "Curtis Vogt <[email protected]>"]
version = "0.5.3"
version = "0.5.4"

[deps]
BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
Expand Down
46 changes: 31 additions & 15 deletions src/FixedPointDecimals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,28 @@ export checked_abs, checked_add, checked_cld, checked_div, checked_fld,

using Base: decompose, BitInteger

import BitIntegers # For 128-bit _widemul / _widen
using BitIntegers: BitIntegers, Int256, Int512, UInt256, UInt512
import Parsers

# The effects system in newer versions of Julia is necessary for the fldmod_by_const
# optimization to take affect without adverse performance impacts.
# For older versions of julia, we will fall back to the flmod implementation, which works
# very fast for all FixedDecimals of size <= 64 bits.
if isdefined(Base, Symbol("@assume_effects"))
include("fldmod-by-const.jl")
else
# We force-inline this, to allow the fldmod operation to be specialized for a constant
# second argument (converting divide-by-power-of-ten instructions with the cheaper
# multiply-by-inverse-coefficient.) Sadly this doesn't work for Int128.
if VERSION >= v"1.8.0-"
# Since julia 1.8+, the built-in fldmod optimizes for constant divisors, which is
# even better than computing the division twice.
@inline fldmod_by_const(x,y) = fldmod(x,y)
else
@inline fldmod_by_const(x,y) = (fld(x,y), mod(x,y))
end
end

# floats that support fma and are roughly IEEE-like
const FMAFloat = Union{Float16, Float32, Float64, BigFloat}

Expand Down Expand Up @@ -129,8 +148,10 @@ _widemul(x::Unsigned,y::Signed) = signed(_widen(x)) * _widen(y)

# Custom widen implementation to avoid the cost of widening to BigInt.
# FD{Int128} operations should widen to 256 bits internally, rather than to a BigInt.
_widen(::Type{Int128}) = BitIntegers.Int256
_widen(::Type{UInt128}) = BitIntegers.UInt256
_widen(::Type{Int128}) = Int256
_widen(::Type{UInt128}) = UInt256
_widen(::Type{Int256}) = Int512
_widen(::Type{UInt256}) = UInt512
_widen(t::Type) = widen(t)
_widen(x::T) where {T} = (_widen(T))(x)

Expand Down Expand Up @@ -176,15 +197,16 @@ function _round_to_nearest(quotient::T,
halfdivisor = divisor >> 1
# PERF Note: Only need the last bit to check iseven, and default iseven(Int256)
# allocates, so we truncate first.
if iseven((divisor % Int8)) && remainder == halfdivisor
_iseven(x) = (x & 0x1) == 0
if _iseven(divisor) && remainder == halfdivisor
# `:NearestTiesAway` will tie away from zero, e.g. -8.5 ->
# -9. `:NearestTiesUp` will always ties towards positive
# infinity. `:Nearest` will tie towards the nearest even
# integer.
if M == :NearestTiesAway
ifelse(quotient < zero(quotient), quotient, quotient + one(quotient))
elseif M == :Nearest
ifelse(iseven(quotient), quotient, quotient + one(quotient))
ifelse(_iseven(quotient), quotient, quotient + one(quotient))
else
quotient + one(quotient)
end
Expand All @@ -196,18 +218,12 @@ function _round_to_nearest(quotient::T,
end
_round_to_nearest(q, r, d, m=RoundNearest) = _round_to_nearest(promote(q, r, d)..., m)

# In many of our calls to fldmod, `y` is a constant (the coefficient, 10^f). However, since
# `fldmod` is sometimes not being inlined, that constant information is not available to the
# optimizer. We need an inlined version of fldmod so that the compiler can replace expensive
# divide-by-power-of-ten instructions with the cheaper multiply-by-inverse-coefficient.
@inline fldmodinline(x,y) = (fld(x,y), mod(x,y))

# multiplication rounds to nearest even representation
# TODO: can we use floating point to speed this up? after we build a
# correctness test suite.
function Base.:*(x::FD{T, f}, y::FD{T, f}) where {T, f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(_widemul(x.i, y.i), powt)
quotient, remainder = fldmod_by_const(_widemul(x.i, y.i), powt)
reinterpret(FD{T, f}, _round_to_nearest(quotient, remainder, powt))
end

Expand All @@ -234,12 +250,12 @@ function Base.round(x::FD{T, f},
RoundingMode{:NearestTiesUp},
RoundingMode{:NearestTiesAway}}=RoundNearest) where {T, f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(x.i, powt)
quotient, remainder = fldmod_by_const(x.i, powt)
FD{T, f}(_round_to_nearest(quotient, remainder, powt, m))
end
function Base.ceil(x::FD{T, f}) where {T, f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(x.i, powt)
quotient, remainder = fldmod_by_const(x.i, powt)
if remainder > 0
FD{T, f}(quotient + one(quotient))
else
Expand Down Expand Up @@ -435,7 +451,7 @@ function Base.checked_sub(x::T, y::T) where {T<:FD}
end
function Base.checked_mul(x::FD{T,f}, y::FD{T,f}) where {T<:Integer,f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(_widemul(x.i, y.i), powt)
quotient, remainder = fldmod_by_const(_widemul(x.i, y.i), powt)
v = _round_to_nearest(quotient, remainder, powt)
typemin(T) <= v <= typemax(T) || Base.Checked.throw_overflowerr_binaryop(:*, x, y)
return reinterpret(FD{T, f}, T(v))
Expand Down
131 changes: 131 additions & 0 deletions src/fldmod-by-const.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# This file includes a manual implementation of the divide-by-constant optimization for
# non-power-of-2 divisors. LLVM automatically includes this optimization, but only for
# smaller integer sizes (<=64-bits on my 64-bit machine).
#
# NOTE: We use LLVM's built-in implementation for Int64 and smaller, to keep the code
# simpler (though the code we produce is identical). We apply this optimization to (U)Int128
# and (U)Int256, which result from multiplying FD{Int64}s and FD{Int128}s.
# Before:
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int32,3}(1.234))
# 54.125 μs (0 allocations: 0 bytes) # (Unchanged)
# FixedDecimal{Int32,3}(1700943.280)
#
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234))
# 174.625 μs (0 allocations: 0 bytes)
# FixedDecimal{Int64,3}(4230510070790917.029)
#
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int128,3}(1.234))
# 2.119 ms (79986 allocations: 1.60 MiB)
# FixedDecimal{Int128,3}(-66726338547984585007169386718143307.324)
#
# After:
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int32,3}(1.234))
# 56.958 μs (0 allocations: 0 bytes) # (Unchanged)
# FixedDecimal{Int32,3}(1700943.280)
#
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int64,3}(1.234))
# 90.708 μs (0 allocations: 0 bytes)
# FixedDecimal{Int64,3}(4230510070790917.029)
#
# julia> @btime for _ in 1:10000 fd = fd * fd end setup = (fd = FixedDecimal{Int128,3}(1.234))
# 180.167 μs (0 allocations: 0 bytes)
# FixedDecimal{Int128,3}(-66726338547984585007169386718143307.324)

"""
ShouldUseCustomFldmodByConst(::Type{<:MyCustomIntType})) = true
A trait to control opt-in for the custom `fldmod_by_const` implementation. To use this for a
given integer type, you can define this overload for your integer type.
You will also need to implement some parts of the interface below, including _widen().
"""
ShouldUseCustomFldmodByConst(::Type{<:Union{Int128,UInt128}}) = true # For FD{Int64}
ShouldUseCustomFldmodByConst(::Type{<:Union{Int256,UInt256}}) = true # For FD{Int128}
ShouldUseCustomFldmodByConst(::Type) = false

@inline function fldmod_by_const(x, y)
if ShouldUseCustomFldmodByConst(typeof(x))
# For large Int types, LLVM doesn't optimize well, so we use a custom implementation
# of fldmod, which extends that optimization to those larger integer types.
d = fld_by_const(x, Val(y))
return d, (x - d * y)
else
# For other integers, LLVM might be able to correctly optimize away the division, if
# it knows it's dividing by a const.
# Since julia 1.8+, fldmod(x,y) automatically optimizes for constant divisors.
return fldmod(x, y)
end
end

# Calculate fld(x,y) when y is a Val constant.
# The implementation for fld_by_const was lifted directly from Base.fld(x,y), except that
# it uses `div_by_const` instead of `div`.
fld_by_const(x::T, y::Val{C}) where {T<:Unsigned, C} = div_by_const(x, y)
function fld_by_const(x::T, y::Val{C}) where {T<:Signed, C}
d = div_by_const(x, y)
return d - (signbit(x ⊻ C) & (d * C != x))
end

function div_by_const(x::T, ::Val{C}) where {T, C}
# These checks will be compiled away during specialization.
# While for `*(FixedDecimal, FixedDecimal)`, C will always be a power of 10, these
# checks allow this function to work for any `C > 0`, in case that's useful in the
# future.
if C == 1
return x
elseif ispow2(C)
return div(x, C) # Will already do the right thing
elseif C <= 0
throw(DomainError("C must be > 0"))
end
# Calculate the magic number and shift amount, based on Hacker's Delight, Chapter 10.
magic_number, shift = magicg(typemax(T), C)

out = _widemul(promote(x, magic_number)...)
out >>= shift
# Adding one as implied by formula (1b) in Hacker's delight, Chapter 10-4.
return (out % T) + (x < zero(T))
end

# Unsigned magic number computation + shift by constant
# See Hacker's delight, equations (26) and (27) from Chapter 10-9.
# (See also the errata on https://web.archive.org/web/20190915025154/http://www.hackersdelight.org/)
# requires nmax >= divisor > 2. divisor must not be a power of 2.
Base.@assume_effects :foldable function magicg(nmax::Unsigned, divisor)
T = typeof(nmax)
W = _widen(T)
d = W(divisor)

nc = div(W(nmax) + W(1), d) * d - W(1) # largest multiple of d <= nmax, minus 1
nbits = 8sizeof(nmax) - leading_zeros(nmax) # most significant bit
# shift must be larger than int size because we want the high bits of the wide multiplication
for p in nbits-1:2nbits-1
if W(2)^p > nc * (d - W(1) - rem(W(2)^p - W(1), d)) # (27)
m = div(W(2)^p + d - W(1) - rem(W(2)^p - W(1), d), d) # (26)
return (m, p)
end
end
@assert false """magicg bug: Unreachable reached. divisor=$divisor, nmax=$nmax.
Please report an issue to https://github.com/JuliaMath/FixedPointDecimals.jl
"""
end

# See Hacker's delight, equations (5) and (6) from Chapter 10-4.
# (See also the errata on https://web.archive.org/web/20190915025154/http://www.hackersdelight.org/)
# requires nmax >= divisor > 2. divisor must not be a power of 2.
Base.@assume_effects :foldable function magicg(nmax::Signed, divisor)
T = typeof(nmax)
W = _widen(T)
d = W(divisor)

nc = div(W(nmax) + W(1), d) * d - W(1) # largest multiple of d <= nmax, minus 1
nbits = 8sizeof(nmax) - leading_zeros(nmax) # most significant bit
# shift must be larger than int size because we want the high bits of the wide multiplication
for p in nbits-1:2nbits-1
if W(2)^p > nc * (d - rem(W(2)^p, d)) # (6)
m = div(W(2)^p + d - rem(W(2)^p, d), d) # (5)
return (m, p)
end
end
@assert false """magicg bug: Unreachable reached. divisor=$divisor, nmax=$nmax.
Please report an issue to https://github.com/JuliaMath/FixedPointDecimals.jl
"""
end
22 changes: 22 additions & 0 deletions test/FixedDecimal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1408,3 +1408,25 @@ end
@test hash(FD2(1//10)) ≠ hash(0.1)
end

@testset "_widemul" begin
_widemul = FixedPointDecimals._widemul
Int256, UInt256 = FixedPointDecimals.Int256, FixedPointDecimals.UInt256
Int512, UInt512 = FixedPointDecimals.Int512, FixedPointDecimals.UInt512

@test _widemul(UInt8(3), UInt8(2)) === widemul(UInt8(3), UInt8(2)) === UInt16(6)
@test _widemul(Int8(3), UInt8(2)) === widemul(Int8(3), UInt8(2)) === Int16(6)
@test _widemul(UInt8(3), Int8(2)) === widemul(UInt8(3), Int8(2)) === Int16(6)

@test _widemul(UInt16(3), UInt8(2)) === widemul(UInt16(3), UInt8(2)) === UInt32(6)
@test _widemul(UInt16(3), Int8(2)) === widemul(UInt16(3), Int8(2)) === Int32(6)
@test _widemul(Int16(3), UInt8(2)) === widemul(Int16(3), UInt8(2)) === Int32(6)

# Custom widenings
@test _widemul(UInt128(3), UInt128(2)) === UInt256(6)
@test _widemul(Int128(3), UInt128(2)) === Int256(6)
@test _widemul(UInt128(3), Int128(2)) === Int256(6)

@test _widemul(UInt256(3), UInt256(2)) === UInt512(6)
@test _widemul(Int256(3), UInt256(2)) === Int512(6)
@test _widemul(UInt256(3), Int256(2)) === Int512(6)
end
115 changes: 115 additions & 0 deletions test/fldmod-by-const_tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
using Test
using FixedPointDecimals

@testset "div_by_const" begin
vals = [2432, 100, 0x1, Int32(10000), typemax(Int64), typemax(Int16), 8, Int64(2)^32]
for a_base in vals
# Only test negative numbers on `a`, since div_by_const requires b > 0.
@testset for (a, b, f) in Iterators.product((a_base, -a_base), vals, (unsigned, signed))
a, b = promote(f(a), f(b))
@test FixedPointDecimals.div_by_const(a, Val(b)) == a ÷ b
end
end
end

@testset "fldmod_by_const" begin
vals = [2432, 100, 0x1, Int32(10000), typemax(Int64), typemax(Int16), 8, Int64(2)^32]
for a_base in vals
# Only test negative numbers on `a`, since fldmod_by_const requires b > 0.
@testset for (a, b, f) in Iterators.product((a_base, -a_base), vals, (unsigned, signed))
a, b = promote(f(a), f(b))
@test FixedPointDecimals.fldmod_by_const(a, b) == fldmod(a, b)
end
end
end

# We don't actually use fldmod_by_const with 8-bit ints, but they're useful because
# we can exhaustively test every possible combination, to increase our confidence in
# the implementation.
@testset "flmdod_by_const - exhaustive 8-bit" begin
@testset for T in (Int8, UInt8)
@testset for x in typemin(T) : typemax(T)
@testset for y in typemin(T) : typemax(T)
y == 0 && continue
y == -1 && continue
@testset "fldmod($x, $y)" begin
@test fldmod(x, y) == FixedPointDecimals.fldmod_by_const(x, y)
end
end
end
end
end

# 64-bit FD multiplication uses the custom fldmod_by_const implementation.
# Test a few various different cases to try to ensure it works correctly.
@testset "fixed decimal multiplication - 64-bit" begin
@testset for P in (0,1,2,3,4)
@testset for T in (Int64, UInt64)
FD = FixedDecimal{T,P}

function test_multiplies_correctly(fd, x)
big = FixedDecimal{BigInt, P}(fd)
big_mul = big * x
# This might overflow: ...
mul = fd * x
@testset "$fd * $x" begin
# ... so we truncate big to the same size
@test big_mul.i % T == mul.i % T
end
end
num_tests = 2<<11
# Add one to avoid powers-of-2 to get hopefully better tests
epsilon = (typemax(FD) ÷ num_tests) + eps(FD)
@testset for v in typemin(FD) : epsilon : typemax(FD)
test_multiplies_correctly(v, typemin(T))
test_multiplies_correctly(v, -1)
test_multiplies_correctly(v, -eps(FD))
test_multiplies_correctly(v, 0)
test_multiplies_correctly(v, eps(FD))
test_multiplies_correctly(v, 1)
test_multiplies_correctly(v, 2)
test_multiplies_correctly(v, 3)
test_multiplies_correctly(v, typemax(T))
end
end
end
end

@testset "fixed decimal multiplication - 128-bit" begin
@testset for P in 0:37
@testset for T in (Int128, UInt128)
FD = FixedDecimal{T,P}

function test_multiplies_correctly(fd, x)
big = FixedDecimal{BigInt, P}(fd)
big_mul = big * x
# This might overflow: ...
mul = fd * x
@testset "$fd * $x" begin
# ... so we truncate big to the same size
@test big_mul.i % T == mul.i % T
end
end
vals = FD[
typemin(FD), typemax(FD),
typemin(FD) + eps(FD), typemax(FD) - eps(FD),
0.0, eps(FD), 0.1, 1.0, 2.0,
typemax(FD) ÷ 2,
]
if T <: Signed
append!(vals, vals.*-1)
end
@testset for v in vals
test_multiplies_correctly(v, typemin(T))
test_multiplies_correctly(v, -1)
test_multiplies_correctly(v, -eps(FD))
test_multiplies_correctly(v, 0)
test_multiplies_correctly(v, eps(FD))
test_multiplies_correctly(v, 1)
test_multiplies_correctly(v, 2)
test_multiplies_correctly(v, 3)
test_multiplies_correctly(v, typemax(T))
end
end
end
end
4 changes: 3 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ include(joinpath(pkg_path, "test", "utils.jl"))

@testset "FixedPointDecimals" begin
include("FixedDecimal.jl")
end # global testset
isdefined(Base, Symbol("@assume_effects")) && include("fldmod-by-const_tests.jl")
end

Loading