Skip to content

Commit 635f1db

Browse files
committed
Use Base.Ordering for heap, and other performance improvements
1 parent cce2313 commit 635f1db

10 files changed

+273
-243
lines changed

benchmark/bench_heap.jl

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,12 @@ heaptypes = [BinaryHeap, MutableBinaryHeap]
2626
aexps = [1,3]
2727
datatypes = [Int, Float64]
2828
baseorderings = Dict(
29-
"Min" => DataStructures.LessThan,
30-
#"Max" => DataStructures.GreaterThan,
29+
"Min" => Base.ForwardOrdering,
30+
#"Max" => Base.ReverseOrdering,
3131
)
3232
fastfloatorderings = Dict(
33-
# These will be enabled upon reordering change
34-
#"FastMin" => DataStructures.FasterForward(),
35-
#"FastMax" => DataStructures.FasterReverse(),
33+
"Min" => DataStructures.FasterForward,
34+
"Max" => DataStructures.FasterReverse,
3635
)
3736

3837
for heap in heaptypes
@@ -41,7 +40,8 @@ for heap in heaptypes
4140
Random.seed!(0)
4241
a = rand(dt, 10^aexp)
4342

44-
orderings = baseorderings
43+
# Dict types to force use of abstract type if containing single value
44+
orderings = Dict{String, DataType}(baseorderings)
4545
if dt == Float64
4646
# swap to faster ordering operation
4747
for (k,v) in orderings
@@ -66,38 +66,22 @@ for heap in heaptypes
6666
end
6767
end
6868

69-
# Quick check to ensure no Float regressions with Min/Max convenience functions
70-
# These don't fit in well with the above loop, since ordering is hardcoded.
71-
heapalias = Dict(
72-
"BinaryMinHeap" => BinaryMinHeap,
73-
"BinaryMaxHeap" => BinaryMaxHeap,
74-
"BinaryMinMaxHeap" => BinaryMinMaxHeap, # <- no alias issue
75-
)
76-
for (heapname, heap) in heapalias
77-
for aexp in aexps
78-
for dt in [Float64]
79-
Random.seed!(0)
80-
a = rand(dt, 10^aexp)
81-
prepath = [heapname]
82-
postpath = [string(dt), "10^"*string(aexp)]
83-
suite[vcat(prepath, ["make"], postpath)] =
84-
@benchmarkable $(heap)($a)
85-
suite[vcat(prepath, ["push"], postpath)] =
86-
@benchmarkable push_heap(h, $a) setup=(h=$(heap){$dt}())
87-
suite[vcat(prepath, ["pop"], postpath)] =
88-
@benchmarkable pop_heap(h) setup=(h=$(heap)($a))
89-
end
90-
end
91-
end
69+
fast_extreme_orderings = Dict(
70+
nsmallest => DataStructures.FasterForward(),
71+
nlargest => DataStructures.FasterReverse(),
72+
)
9273

9374
for func in [nlargest, nsmallest]
75+
fastord = fast_extreme_orderings[func]
9476
for aexp in [4]
9577
Random.seed!(0);
9678
a = rand(10^aexp);
9779
for nexp in [2]
9880
n = 10^nexp
99-
suite[[string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
81+
suite[["Slow " * string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
10082
@benchmarkable $(func)($n, $a)
83+
suite[[string(func), "a=rand(10^"*string(aexp)*")", "n=10^"*string(nexp)]] =
84+
@benchmarkable DataStructures.nextreme($fastord, $n, $a)
10185
end
10286
end
10387
end

docs/src/heaps.md

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,32 @@ All heaps in this package are derived from `AbstractHeap`, and provide
77
the following interface:
88

99
```julia
10-
# Let h be a heap, i be a handle, and v be a value.
10+
# Let `h` be a heap, `v` be a value, and `n` be an integer size
1111

12-
length(h) # returns the number of elements
12+
length(h) # returns the number of elements
1313

14-
isempty(h) # returns whether the heap is empty
14+
isempty(h) # returns whether the heap is empty
1515

16-
push!(h, v) # add a value to the heap
16+
push!(h, v) # add a value to the heap
1717

18-
top(h) # return the top value of a heap
18+
top(h) # return the top value of a heap
1919

20-
pop!(h) # removes the top value, and returns it
20+
pop!(h) # removes the top value, and returns it
2121

22+
extract_all!(h) # removes all elements and returns sorted array
23+
24+
extract_all_rev!(h) # removes all elements and returns reverse sorted array
25+
26+
sizehint!(h, n) # reserve capacity for at least `n` elements
2227
```
2328

2429
Mutable heaps (values can be changed after being pushed to a heap) are
2530
derived from `AbstractMutableHeap <: AbstractHeap`, and additionally
2631
provides the following interface:
2732

2833
```julia
34+
# Let `h` be a heap, `i` be a handle, and `v` be a value.
35+
2936
i = push!(h, v) # adds a value to the heap and and returns a handle to v
3037

3138
update!(h, i, v) # updates the value of an element (referred to by the handle i)
@@ -54,6 +61,21 @@ h = MutableBinaryMinHeap([1,4,3,2])
5461
h = MutableBinaryMaxHeap([1,4,3,2]) # create a mutable min/max heap from a vector
5562
```
5663

64+
Heaps may be constructed with a custom ordering. One use case for custom orderings
65+
is to achieve faster performance with `Float` elements with the risk of random ordering
66+
if any elements are `NaN`. The provided `DataStructures.FasterForward` and
67+
`DataStructures.FasterReverse` orderings are optimized for this purpose.
68+
Custom orderings may also be used for defining the order of structs as heap elements.
69+
```julia
70+
h = BinaryHeap{Float64, DataStructures.FasterForward}() # faster min heap
71+
h = BinaryHeap{Float64, DataStructures.FasterReverse}() # faster max heap
72+
73+
h = MutableBinaryHeap{Float64, DataStructures.FasterForward}() # faster mutable min heap
74+
h = MutableBinaryHeap{Float64, DataStructures.FasterReverse}() # faster mutable max heap
75+
76+
h = BinaryHeap{MyStruct, MyStructOrdering}() # heap containing custom struct
77+
```
78+
5779
## Min-max heaps
5880
Min-max heaps maintain the minimum _and_ the maximum of a set,
5981
allowing both to be retrieved in constant (`O(1)`) time.
@@ -97,5 +119,9 @@ nlargest(3, [0,21,-12,68,-25,14]) # => [68,21,14]
97119
nsmallest(3, [0,21,-12,68,-25,14]) # => [-25,-12,0]
98120
```
99121

100-
`nlargest(n, a)` is equivalent to `sort(a, lt = >)[1:min(n, end)]`, and
101-
`nsmallest(n, a)` is equivalent to `sort(a, lt = <)[1:min(n, end)]`.
122+
Note that if the array contains floats and is free of NaN values,
123+
then the following alternatives may be used to achieve a 2x performance boost.
124+
```
125+
DataStructures.nextreme(DataStructures.FasterReverse(), n, a) # faster nlargest(n, a)
126+
DataStructures.nextreme(DataStructures.FasterForward(), n, a) # faster nsmallest(n, a)
127+
```

src/DataStructures.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ module DataStructures
3535

3636
export FenwickTree, length, inc!, dec!, incdec!, prefixsum
3737

38-
export AbstractHeap, compare, extract_all!
38+
export AbstractHeap, compare, extract_all!, extract_all_rev!
3939
export BinaryHeap, BinaryMinHeap, BinaryMaxHeap, nlargest, nsmallest
4040
export MutableBinaryHeap, MutableBinaryMinHeap, MutableBinaryMaxHeap
4141
export heapify!, heapify, heappop!, heappush!, isheap

src/heaps.jl

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -55,28 +55,24 @@ abstract type AbstractMutableHeap{VT,HT} <: AbstractHeap{VT} end
5555

5656
abstract type AbstractMinMaxHeap{VT} <: AbstractHeap{VT} end
5757

58-
# comparer
59-
60-
struct LessThan
61-
end
62-
63-
struct GreaterThan
64-
end
65-
66-
compare(c::LessThan, x, y) = x < y
67-
compare(c::GreaterThan, x, y) = x > y
68-
6958
# heap implementations
7059

7160
include("heaps/binary_heap.jl")
7261
include("heaps/mutable_binary_heap.jl")
73-
include("heaps/arrays_as_heaps.jl")
7462
include("heaps/minmax_heap.jl")
7563

7664
# generic functions
7765

7866
Base.eltype(::Type{<:AbstractHeap{T}}) where T = T
7967

68+
"""
69+
extract_all!(h)
70+
71+
Return an array of heap elements in sorted order (heap head at first index).
72+
73+
Note that for simple heaps (not mutable or minmax)
74+
sorting the internal array of elements in-place is faster.
75+
"""
8076
function extract_all!(h::AbstractHeap{VT}) where VT
8177
n = length(h)
8278
r = Vector{VT}(undef, n)
@@ -86,6 +82,14 @@ function extract_all!(h::AbstractHeap{VT}) where VT
8682
return r
8783
end
8884

85+
"""
86+
extract_all_rev!(h)
87+
88+
Return an array of heap elements in reverse sorted order (heap head at last index).
89+
90+
Note that for simple heaps (not mutable or minmax)
91+
sorting the internal array of elements in-place is faster.
92+
"""
8993
function extract_all_rev!(h::AbstractHeap{VT}) where VT
9094
n = length(h)
9195
r = Vector{VT}(undef, n)
@@ -97,50 +101,65 @@ end
97101

98102
# Array functions using heaps
99103

100-
function nextreme(comp::Comp, n::Int, arr::AbstractVector{T}) where {T, Comp}
104+
"""
105+
nextreme(ord, n, arr)
106+
107+
return an array of the first `n` values of `arr` sorted by `ord`.
108+
"""
109+
function nextreme(ord::Base.Ordering, n::Int, arr::AbstractVector{T}) where T
101110
if n <= 0
102111
return T[] # sort(arr)[1:n] returns [] for n <= 0
103112
elseif n >= length(arr)
104-
return sort(arr, lt = (x, y) -> compare(comp, y, x))
113+
return sort(arr, order = ord)
105114
end
106115

107-
buffer = BinaryHeap{T,Comp}()
116+
rev = Base.ReverseOrdering(ord)
108117

109-
for i = 1 : n
110-
@inbounds xi = arr[i]
111-
push!(buffer, xi)
112-
end
118+
buffer = heapify(arr[1:n], rev)
113119

114120
for i = n + 1 : length(arr)
115121
@inbounds xi = arr[i]
116-
if compare(comp, top(buffer), xi)
117-
# This could use a pushpop method
118-
pop!(buffer)
119-
push!(buffer, xi)
122+
if Base.lt(rev, buffer[1], xi)
123+
buffer[1] = xi
124+
percolate_down!(buffer, 1, rev)
120125
end
121126
end
122127

123-
return extract_all_rev!(buffer)
128+
return sort!(buffer, order = ord)
124129
end
125130

126131
"""
127132
nlargest(n, arr)
128133
129134
Return the `n` largest elements of the array `arr`.
130135
131-
Equivalent to `sort(arr, lt = >)[1:min(n, end)]`
136+
Equivalent to:
137+
sort(arr, order = Base.Reverse)[1:min(n, end)]
138+
139+
Note that if `arr` contains floats and is free of NaN values,
140+
then the following alternative may be used to achieve 2x performance.
141+
DataStructures.nextreme(DataStructures.FasterReverse(), n, arr)
142+
This faster version is equivalent to:
143+
sort(arr, lt = >)[1:min(n, end)]
132144
"""
133-
function nlargest(n::Int, arr::AbstractVector{T}) where T
134-
return nextreme(LessThan(), n, arr)
145+
function nlargest(n::Int, arr::AbstractVector)
146+
return nextreme(Base.Reverse, n, arr)
135147
end
136148

137149
"""
138150
nsmallest(n, arr)
139151
140152
Return the `n` smallest elements of the array `arr`.
141153
142-
Equivalent to `sort(arr, lt = <)[1:min(n, end)]`
154+
Equivalent to:
155+
sort(arr, order = Base.Forward)[1:min(n, end)]
156+
157+
Note that if `arr` contains floats and is free of NaN values,
158+
then the following alternative may be used to achieve 2x performance.
159+
DataStructures.nextreme(DataStructures.FasterForward(), n, arr)
160+
This faster version is equivalent to:
161+
sort(arr, lt = <)[1:min(n, end)]
143162
"""
144-
function nsmallest(n::Int, arr::AbstractVector{T}) where T
145-
return nextreme(GreaterThan(), n, arr)
163+
function nsmallest(n::Int, arr::AbstractVector)
164+
return nextreme(Base.Forward, n, arr)
146165
end

src/heaps/arrays_as_heaps.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ function percolate_up!(xs::AbstractArray, i::Integer, x=xs[i], o::Ordering=Forwa
4444
xs[i] = x
4545
end
4646

47-
percolate_up!(xs::AbstractArray{T}, i::Integer, o::Ordering) where {T} = percolate_up!(xs, i, xs[i], o)
47+
percolate_up!(xs::AbstractArray, i::Integer, o::Ordering) = percolate_up!(xs, i, xs[i], o)
4848

4949
"""
5050
heappop!(v, [ord])
@@ -69,12 +69,12 @@ For efficiency, this function does not check that the array is indeed heap-order
6969
"""
7070
function heappush!(xs::AbstractArray, x, o::Ordering=Forward)
7171
push!(xs, x)
72-
percolate_up!(xs, length(xs), x, o)
72+
percolate_up!(xs, length(xs), o)
7373
return xs
7474
end
7575

7676

77-
# Turn an arbitrary array into a binary min-heap in linear time.
77+
# Turn an arbitrary array into a binary min-heap (by default) in linear time.
7878
"""
7979
heapify!(v, ord::Ordering=Forward)
8080
@@ -111,6 +111,7 @@ julia> heapify(a, Base.Order.Reverse)
111111
2
112112
```
113113
"""
114+
# Todo, benchmarking shows copy(xs) outperforms copyto!(similar(xs), xs) for 10^6 Float64
114115
heapify(xs::AbstractArray, o::Ordering=Forward) = heapify!(copyto!(similar(xs), xs), o)
115116

116117
"""

0 commit comments

Comments
 (0)