Skip to content

Commit 886cace

Browse files
JackDevinequinnj
authored andcommitted
Add unique! (#20619)
1 parent 316da9e commit 886cace

File tree

6 files changed

+132
-0
lines changed

6 files changed

+132
-0
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ Library improvements
7777
* `logspace` now accepts a `base` keyword argument to specify the base of the logarithmic
7878
range. The base defaults to 10 ([#22310]).
7979

80+
* Added `unique!` which is an inplace version of `unique` ([#20549]).
81+
8082
Compiler/Runtime improvements
8183
-----------------------------
8284

base/exports.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,7 @@ export
719719
symdiff,
720720
union!,
721721
union,
722+
unique!,
722723
unique,
723724
values,
724725
valtype,

base/set.jl

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,94 @@ function unique(f::Callable, C)
202202
out
203203
end
204204

205+
# If A is not grouped, then we will need to keep track of all of the elements that we have
206+
# seen so far.
207+
function _unique!(A::AbstractVector)
208+
seen = Set{eltype(A)}()
209+
idxs = eachindex(A)
210+
i = state = start(idxs)
211+
for x in A
212+
if x seen
213+
push!(seen, x)
214+
i, state = next(idxs, state)
215+
A[i] = x
216+
end
217+
end
218+
resize!(A, i - first(idxs) + 1)
219+
end
220+
221+
# If A is grouped, so that each unique element is in a contiguous group, then we only
222+
# need to keep track of one element at a time. We replace the elements of A with the
223+
# unique elements that we see in the order that we see them. Once we have iterated
224+
# through A, we resize A based on the number of unique elements that we see.
225+
function _groupedunique!(A::AbstractVector)
226+
isempty(A) && return A
227+
idxs = eachindex(A)
228+
y = first(A)
229+
state = start(idxs)
230+
i, state = next(idxs, state)
231+
for x in A
232+
if !isequal(x, y)
233+
i, state = next(idxs, state)
234+
y = A[i] = x
235+
end
236+
end
237+
resize!(A, i - first(idxs) + 1)
238+
end
239+
240+
"""
241+
unique!(A::AbstractVector)
242+
243+
Remove duplicate items as determined by [`isequal`](@ref), then return the modified `A`.
244+
`unique!` will return the elements of `A` in the order that they occur. If you do not care
245+
about the order of the returned data, then calling `(sort!(A); unique!(A))` will be much
246+
more efficient as long as the elements of `A` can be sorted.
247+
248+
```jldoctest
249+
julia> unique!([1, 1, 1])
250+
1-element Array{Int64,1}:
251+
1
252+
253+
julia> A = [7, 3, 2, 3, 7, 5];
254+
255+
julia> unique!(A)
256+
4-element Array{Int64,1}:
257+
7
258+
3
259+
2
260+
5
261+
262+
julia> B = [7, 6, 42, 6, 7, 42];
263+
264+
julia> sort!(B); # unique! is able to process sorted data much more efficiently.
265+
266+
julia> unique!(B)
267+
3-element Array{Int64,1}:
268+
6
269+
7
270+
42
271+
```
272+
"""
273+
function unique!(A::Union{AbstractVector{<:Real}, AbstractVector{<:AbstractString},
274+
AbstractVector{<:Symbol}})
275+
if isempty(A)
276+
return A
277+
elseif issorted(A) || issorted(A, rev=true)
278+
return _groupedunique!(A)
279+
else
280+
return _unique!(A)
281+
end
282+
end
283+
# issorted fails for some element types, so the method above has to be restricted to
284+
# elements with isless/< defined.
285+
function unique!(A)
286+
if isempty(A)
287+
return A
288+
else
289+
return _unique!(A)
290+
end
291+
end
292+
205293
"""
206294
allunique(itr) -> Bool
207295

doc/src/stdlib/collections.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ Base.eltype
7979
Base.indexin
8080
Base.findin
8181
Base.unique
82+
Base.unique!
8283
Base.allunique
8384
Base.reduce(::Any, ::Any, ::Any)
8485
Base.reduce(::Any, ::Any)

test/TestHelpers.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ _offset(out, ::Tuple{}, ::Tuple{}) = out
224224
indsoffset(r::Range) = first(r) - 1
225225
indsoffset(i::Integer) = 0
226226

227+
Base.resize!(A::OffsetVector, nl::Integer) = (resize!(A.parent, nl); A)
228+
227229
end
228230

229231
end

test/sets.jl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

33
# Set tests
4+
isdefined(Main, :TestHelpers) || @eval Main include("TestHelpers.jl")
5+
using TestHelpers.OAs
46

57
# Construction, collect
68
@test ===(typeof(Set([1,2,3])), Set{Int})
@@ -221,6 +223,42 @@ u = unique([1,1,2])
221223
@test @inferred(unique(x for x in 1:1)) == [1]
222224
@test unique(x for x in Any[1,1.0])::Vector{Real} == [1]
223225

226+
# unique!
227+
@testset "unique!" begin
228+
u = [1,1,3,2,1]
229+
unique!(u)
230+
@test u == [1,3,2]
231+
@test unique!([]) == []
232+
@test unique!(Float64[]) == Float64[]
233+
u = [1,2,2,3,5,5]
234+
@test unique!(u) === u
235+
@test u == [1,2,3,5]
236+
u = [6,5,5,3,3,2,1]
237+
@test unique!(u) === u
238+
@test u == [6,5,3,2,1]
239+
u = OffsetArray([1,2,2,3,5,5], -1)
240+
@test unique!(u) === u
241+
@test u == OffsetArray([1,2,3,5], -1)
242+
u = OffsetArray([5,5,4,4,2,2,0,-1,-1], -1)
243+
@test unique!(u) === u
244+
@test u == OffsetArray([5,4,2,0,-1], -1)
245+
u = OffsetArray(["w","we","w",5,"r",5,5], -1)
246+
@test unique!(u) === u
247+
@test u == OffsetArray(["w","we",5,"r"], -1)
248+
u = [0.0,-0.0,1.0,2]
249+
@test unique!(u) === u
250+
@test u == [0.0,-0.0,1.0,2.0]
251+
u = [1,NaN,NaN,3]
252+
@test unique!(u) === u
253+
@test u[1] == 1
254+
@test isnan(u[2])
255+
@test u[3] == 3
256+
u = [5,"w","we","w","r",5,"w"]
257+
unique!(u)
258+
@test u == [5,"w","we","r"]
259+
u = [1,2,5,1,3,2]
260+
end
261+
224262
# allunique
225263
@test allunique([])
226264
@test allunique(Set())

0 commit comments

Comments
 (0)