Skip to content

Commit 3e7e75f

Browse files
committed
Use Base.Ordering for heap, and other performance improvements
1 parent c1c07c4 commit 3e7e75f

9 files changed

+260
-239
lines changed

benchmark/bench_heap.jl

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,12 @@ heaptypes = [BinaryHeap, MutableBinaryHeap]
2626
aexps = [1,3]
2727
datatypes = [Int, Float64]
2828
baseorderings = Dict(
29-
"Min" => DataStructures.LessThan,
30-
#"Max" => DataStructures.GreaterThan,
29+
"Min" => Base.ForwardOrdering,
30+
#"Max" => Base.ReverseOrdering,
3131
)
3232
fastfloatorderings = Dict(
33-
# These will be enabled upon reordering change
34-
#"FastMin" => DataStructures.FasterForward(),
35-
#"FastMax" => DataStructures.FasterReverse(),
33+
"Min" => DataStructures.FasterForward,
34+
"Max" => DataStructures.FasterReverse,
3635
)
3736

3837
for heap in heaptypes
@@ -41,7 +40,8 @@ for heap in heaptypes
4140
Random.seed!(0)
4241
a = rand(dt, 10^aexp)
4342

44-
orderings = baseorderings
43+
# Dict types to force use of abstract type if containing single value
44+
orderings = Dict{String, DataType}(baseorderings)
4545
if dt == Float64
4646
# swap to faster ordering operation
4747
for (k,v) in orderings
@@ -66,38 +66,22 @@ for heap in heaptypes
6666
end
6767
end
6868

69-
# Quick check to ensure no Float regressions with Min/Max convenience functions
70-
# These don't fit in well with the above loop, since ordering is hardcoded.
71-
heapalias = Dict(
72-
"BinaryMinHeap" => BinaryMinHeap,
73-
"BinaryMaxHeap" => BinaryMaxHeap,
74-
"BinaryMinMaxHeap" => BinaryMinMaxHeap, # <- no alias issue
75-
)
76-
for (heapname, heap) in heapalias
77-
for aexp in aexps
78-
for dt in [Float64]
79-
Random.seed!(0)
80-
a = rand(dt, 10^aexp)
81-
prepath = [heapname]
82-
postpath = [string(dt), "10^"*string(aexp)]
83-
suite[vcat(prepath, ["make"], postpath)] =
84-
@benchmarkable $(heap)($a)
85-
suite[vcat(prepath, ["push"], postpath)] =
86-
@benchmarkable push_heap(h, $a) setup=(h=$(heap){$dt}())
87-
suite[vcat(prepath, ["pop"], postpath)] =
88-
@benchmarkable pop_heap(h) setup=(h=$(heap)($a))
89-
end
90-
end
91-
end
69+
fast_extreme_orderings = Dict(
70+
nsmallest => DataStructures.FasterForward(),
71+
nlargest => DataStructures.FasterReverse(),
72+
)
9273

9374
for func in [nlargest, nsmallest]
75+
fastord = fast_extreme_orderings[func]
9476
for aexp in [4]
9577
Random.seed!(0);
9678
a = rand(10^aexp);
9779
for nexp in [2]
9880
n = 10^nexp
99-
suite[[string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
81+
suite[["Slow " * string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
10082
@benchmarkable $(func)($n, $a)
83+
suite[[string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
84+
@benchmarkable DataStructures.nextreme($fastord, $n, $a)
10185
end
10286
end
10387
end

docs/src/heaps.md

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ All heaps in this package are derived from `AbstractHeap`, and provide
77
the following interface:
88

99
```julia
10-
# Let h be a heap, i be a handle, and v be a value.
10+
# Let `h` be a heap, `v` be a value, and `n` be an integer size
1111

1212
length(h) # returns the number of elements
1313

@@ -19,13 +19,16 @@ top(h) # return the top value of a heap
1919

2020
pop!(h) # removes the top value, and returns it
2121

22+
sizehint!(h, n) # reserve capacity for at least `n` elements
2223
```
2324

2425
Mutable heaps (values can be changed after being pushed to a heap) are
2526
derived from `AbstractMutableHeap <: AbstractHeap`, and additionally
2627
provides the following interface:
2728

2829
```julia
30+
# Let `h` be a heap, `i` be a handle, and `v` be a value.
31+
2932
i = push!(h, v) # adds a value to the heap and and returns a handle to v
3033

3134
update!(h, i, v) # updates the value of an element (referred to by the handle i)
@@ -54,6 +57,21 @@ h = MutableBinaryMinHeap([1,4,3,2])
5457
h = MutableBinaryMaxHeap([1,4,3,2]) # create a mutable min/max heap from a vector
5558
```
5659

60+
Heaps may be constructed with a custom ordering. One use case for custom orderings
61+
is to achieve faster performance with `Float` elements with the risk of random ordering
62+
if any elements are `NaN`. The provided `DataStructures.FasterForward` and
63+
`DataStructures.FasterReverse` orderings are optimized for this purpose.
64+
Custom orderings may also be used for defining the order of structs as heap elements.
65+
```julia
66+
h = BinaryHeap{Float64, DataStructures.FasterForward}() # faster min heap
67+
h = BinaryHeap{Float64, DataStructures.FasterReverse}() # faster max heap
68+
69+
h = MutableBinaryHeap{Float64, DataStructures.FasterForward}() # faster mutable min heap
70+
h = MutableBinaryHeap{Float64, DataStructures.FasterReverse}() # faster mutable max heap
71+
72+
h = BinaryHeap{MyStruct, MyStructOrdering}() # heap containing custom struct
73+
```
74+
5775
## Min-max heaps
5876
Min-max heaps maintain the minimum _and_ the maximum of a set,
5977
allowing both to be retrieved in constant (`O(1)`) time.
@@ -97,5 +115,9 @@ nlargest(3, [0,21,-12,68,-25,14]) # => [68,21,14]
97115
nsmallest(3, [0,21,-12,68,-25,14]) # => [-25,-12,0]
98116
```
99117

100-
`nlargest(n, a)` is equivalent to `sort(a, lt = >)[1:min(n, end)]`, and
101-
`nsmallest(n, a)` is equivalent to `sort(a, lt = <)[1:min(n, end)]`.
118+
Note that if the array contains floats and is free of NaN values,
119+
then the following alternatives may be used to achieve a 2x performance boost.
120+
```
121+
DataStructures.nextreme(DataStructures.FasterReverse(), n, a) # faster nlargest(n, a)
122+
DataStructures.nextreme(DataStructures.FasterForward(), n, a) # faster nsmallest(n, a)
123+
```

src/heaps.jl

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -55,28 +55,30 @@ abstract type AbstractMutableHeap{VT,HT} <: AbstractHeap{VT} end
5555

5656
abstract type AbstractMinMaxHeap{VT} <: AbstractHeap{VT} end
5757

58-
# comparer
59-
60-
struct LessThan
61-
end
62-
63-
struct GreaterThan
64-
end
65-
66-
compare(c::LessThan, x, y) = x < y
67-
compare(c::GreaterThan, x, y) = x > y
68-
6958
# heap implementations
7059

7160
include("heaps/binary_heap.jl")
7261
include("heaps/mutable_binary_heap.jl")
73-
include("heaps/arrays_as_heaps.jl")
7462
include("heaps/minmax_heap.jl")
7563

7664
# generic functions
7765

7866
Base.eltype(::Type{<:AbstractHeap{T}}) where T = T
7967

68+
#=
69+
Note that extract_all and extract_all_rev are slower than
70+
sorting the array of values in-place.
71+
Leaving these function here for use in testing.
72+
=#
73+
74+
"""
75+
extract_all!(h)
76+
77+
returns an array of heap elements in sorted order (heap head at first index).
78+
79+
Note that sorting the heap's internal array of elements in-place is faster;
80+
however, this function adds some convenience and works for mutable heaps too.
81+
"""
8082
function extract_all!(h::AbstractHeap{VT}) where VT
8183
n = length(h)
8284
r = Vector{VT}(undef, n)
@@ -86,6 +88,14 @@ function extract_all!(h::AbstractHeap{VT}) where VT
8688
r
8789
end
8890

91+
"""
92+
extract_all_rev!(h)
93+
94+
returns an array of heap elements in reverse sorted order (heap head at last index).
95+
96+
Note that sorting the heap's internal array of elements in-place is faster;
97+
however, this function adds some convenience and works for mutable heaps too.
98+
"""
8999
function extract_all_rev!(h::AbstractHeap{VT}) where VT
90100
n = length(h)
91101
r = Vector{VT}(undef, n)
@@ -97,50 +107,65 @@ end
97107

98108
# Array functions using heaps
99109

100-
function nextreme(comp::Comp, n::Int, arr::AbstractVector{T}) where {T, Comp}
110+
"""
111+
nextreme(ord, n, arr)
112+
113+
return an array of the first `n` values of `arr` sorted by `ord`.
114+
"""
115+
function nextreme(ord::Base.Ordering, n::Int, arr::AbstractVector{T}) where T
101116
if n <= 0
102117
return T[] # sort(arr)[1:n] returns [] for n <= 0
103118
elseif n >= length(arr)
104-
return sort(arr, lt = (x, y) -> compare(comp, y, x))
119+
return sort(arr, order = ord)
105120
end
106121

107-
buffer = BinaryHeap{T,Comp}()
122+
rev = Base.ReverseOrdering(ord)
108123

109-
for i = 1 : n
110-
@inbounds xi = arr[i]
111-
push!(buffer, xi)
112-
end
124+
buffer = heapify(arr[1:n], rev)
113125

114126
for i = n + 1 : length(arr)
115127
@inbounds xi = arr[i]
116-
if compare(comp, top(buffer), xi)
117-
# This could use a pushpop method
118-
pop!(buffer)
119-
push!(buffer, xi)
128+
if Base.lt(rev, buffer[1], xi)
129+
buffer[1] = xi
130+
percolate_down!(buffer, 1, rev)
120131
end
121132
end
122133

123-
return extract_all_rev!(buffer)
134+
return sort!(buffer, order = ord)
124135
end
125136

126137
"""
127138
nlargest(n, arr)
128139
129140
Return the `n` largest elements of the array `arr`.
130141
131-
Equivalent to `sort(arr, lt = >)[1:min(n, end)]`
142+
Equivalent to:
143+
sort(arr, order = Base.Reverse)[1:min(n, end)]
144+
145+
Note that if `arr` contains floats and is free of NaN values,
146+
then the following alternative may be used to achieve 2x performance.
147+
DataStructures.nextreme(DataStructures.FasterReverse(), n, arr)
148+
This faster version is equivalent to:
149+
sort(arr, lt = >)[1:min(n, end)]
132150
"""
133-
function nlargest(n::Int, arr::AbstractVector{T}) where T
134-
return nextreme(LessThan(), n, arr)
151+
function nlargest(n::Int, arr::AbstractVector)
152+
return nextreme(Base.Reverse, n, arr)
135153
end
136154

137155
"""
138156
nsmallest(n, arr)
139157
140158
Return the `n` smallest elements of the array `arr`.
141159
142-
Equivalent to `sort(arr, lt = <)[1:min(n, end)]`
160+
Equivalent to:
161+
sort(arr, order = Base.Forward)[1:min(n, end)]
162+
163+
Note that if `arr` contains floats and is free of NaN values,
164+
then the following alternative may be used to achieve 2x performance.
165+
DataStructures.nextreme(DataStructures.FasterForward(), n, arr)
166+
This faster version is equivalent to:
167+
sort(arr, lt = <)[1:min(n, end)]
143168
"""
144-
function nsmallest(n::Int, arr::AbstractVector{T}) where T
145-
return nextreme(GreaterThan(), n, arr)
169+
function nsmallest(n::Int, arr::AbstractVector)
170+
return nextreme(Base.Forward, n, arr)
146171
end

src/heaps/arrays_as_heaps.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ function percolate_up!(xs::AbstractArray, i::Integer, x=xs[i], o::Ordering=Forwa
4444
xs[i] = x
4545
end
4646

47-
percolate_up!(xs::AbstractArray{T}, i::Integer, o::Ordering) where {T} = percolate_up!(xs, i, xs[i], o)
47+
percolate_up!(xs::AbstractArray, i::Integer, o::Ordering) = percolate_up!(xs, i, xs[i], o)
4848

4949
"""
5050
heappop!(v, [ord])
@@ -69,12 +69,12 @@ For efficiency, this function does not check that the array is indeed heap-order
6969
"""
7070
function heappush!(xs::AbstractArray, x, o::Ordering=Forward)
7171
push!(xs, x)
72-
percolate_up!(xs, length(xs), x, o)
72+
percolate_up!(xs, length(xs), o)
7373
xs
7474
end
7575

7676

77-
# Turn an arbitrary array into a binary min-heap in linear time.
77+
# Turn an arbitrary array into a binary min-heap (by default) in linear time.
7878
"""
7979
heapify!(v, ord::Ordering=Forward)
8080
@@ -111,6 +111,7 @@ julia> heapify(a, Base.Order.Reverse)
111111
2
112112
```
113113
"""
114+
# Todo, benchmarking shows copy(xs) outperforms copyto!(similar(xs), xs) for 10^6 Float64
114115
heapify(xs::AbstractArray, o::Ordering=Forward) = heapify!(copyto!(similar(xs), xs), o)
115116

116117
"""

0 commit comments

Comments
 (0)