Skip to content

Commit d971131

Browse files
CI: Store Passes in results.json + various improvements. (#57686)
- Adds option to enable saving Test.Pass results to DefaultTestSet (started as #57690 but more complete here) and enable it when Base.runtests is called when CI=true, to match when reports are saved - Stops attributing testset duration to tests from that testset, it doesn't make sense. - Make the test name resemble (as far as possible) the original test call. Making the name independent of test outcome should make it easier to group tests and identify flaky ones. - store repeat counts as tags, rather than in the name - moves the save to inside the test workers, where all test info is available, and the save can be done in parallel to save time , then jsons are made at the end and uploaded as an artifact in a tarball - speed improvements to make saving time acceptable - fixes a couple of tests that seem to get a little flakier with this Note on the WeakRef test fix: If a test like `@test x == y` passes, the `Pass` object retains `x`. Previously to this the Pass object was still made, but must have been GC-ed immediately. This PR remakes the Pass object when appending it to DefaultTestSet without the captured data. I think that just delayed the GC a little to make the test flaky, so I moved the `@test` to outside the function to avoid capturing `x`. Also buildkite had to fix an issue to make tests show up properly on our jobs: https://forum.buildkite.community/t/test-failures-showing-up-in-test-digest-but-not-on-jobs/4306
1 parent fefe379 commit d971131

File tree

7 files changed

+178
-62
lines changed

7 files changed

+178
-62
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
/julia-*
1212
/source-dist.tmp
1313
/source-dist.tmp1
14+
/test/results_*.json
15+
/test/results_*.dat
1416

1517
*.expmap
1618
*.exe

stdlib/Mmap/test/runtests.jl

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ close(s)
4747
@test_throws ErrorException mmap(file, Vector{Ref}) # must be bit-type
4848
GC.gc(); GC.gc()
4949

50+
file = tempname() # new name to reduce chance of issues due slow windows fs
5051
s = open(f->f,file,"w")
5152
@test mmap(file) == Vector{UInt8}() # requested len=0 on empty file
5253
@test mmap(file,Vector{UInt8},0) == Vector{UInt8}()
@@ -191,6 +192,7 @@ m = mmap(file,Vector{UInt8},2,6)
191192
@test_throws BoundsError m[3]
192193
finalize(m); m = nothing; GC.gc()
193194

195+
file = tempname() # new name to reduce chance of issues due slow windows fs
194196
s = open(file, "w")
195197
write(s, [0xffffffffffffffff,
196198
0xffffffffffffffff,

stdlib/Test/src/Test.jl

+16-2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ const DISPLAY_FAILED = (
4343

4444
const FAIL_FAST = Ref{Bool}(false)
4545

46+
const record_passes = OncePerProcess{Bool}() do
47+
return Base.get_bool_env("JULIA_TEST_RECORD_PASSES", false)
48+
end
49+
4650
#-----------------------------------------------------------------------
4751

4852
# Backtrace utility functions
@@ -1100,8 +1104,18 @@ struct FailFastError <: Exception end
11001104

11011105
# For a broken result, simply store the result
11021106
record(ts::DefaultTestSet, t::Broken) = (push!(ts.results, t); t)
1103-
# For a passed result, do not store the result since it uses a lot of memory
1104-
record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
1107+
# For a passed result, do not store the result since it uses a lot of memory, unless
1108+
# `record_passes()` is true. i.e. set env var `JULIA_TEST_RECORD_PASSES=true` before running any testsets
1109+
function record(ts::DefaultTestSet, t::Pass)
1110+
ts.n_passed += 1
1111+
if record_passes()
1112+
# throw away the captured data so it can be GC-ed
1113+
t_nodata = Pass(t.test_type, t.orig_expr, nothing, t.value, t.source, t.message_only)
1114+
push!(ts.results, t_nodata)
1115+
return t_nodata
1116+
end
1117+
return t
1118+
end
11051119

11061120
# For the other result types, immediately print the error message
11071121
# but do not terminate. Print a backtrace.

test/buildkitetestjson.jl

+143-50
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ module BuildkiteTestJSON
77

88
using Test
99
using Dates
10+
using Serialization
1011

11-
export write_testset_json_files
12+
export serialize_testset_result_file, write_testset_json_files
1213

1314
# Bootleg JSON writer
1415

@@ -64,14 +65,12 @@ function result_dict(testset::Test.DefaultTestSet, prefix::String="")
6465
"id" => Base.UUID(rand(UInt128)),
6566
"scope" => scope,
6667
"tags" => Dict{String,String}(
67-
"job_label" => get(ENV, "BUILDKITE_LABEL", "unknown"),
68-
"job_id" => get(ENV, "BUILDKITE_JOB_ID", "unknown"),
69-
"job_group" => get(ENV, "BUILDKITE_GROUP_LABEL", "unknown"),
7068
"os" => string(Sys.KERNEL),
7169
"arch" => string(Sys.ARCH),
7270
"julia_version" => string(VERSION),
7371
"testset" => testset.description,
7472
),
73+
# note we drop some of this from common_data before merging into individual results
7574
"history" => if !isnothing(testset.time_end)
7675
Dict{String,Any}(
7776
"start_at" => testset.time_start,
@@ -86,14 +85,33 @@ end
8685
# Test paths on runners are often in deep directories, so just make them contain enough information
8786
# to be able to identify the file. Also convert Windows-style paths to Unix-style paths so tests can
8887
# be grouped by file.
88+
const generalize_file_paths_cache = Dict{AbstractString,AbstractString}()
89+
const norm_build_root_path = normpath(Sys.BUILD_ROOT_PATH)
90+
const bindir_dir = dirname(Sys.BINDIR)
91+
const pathsep = Sys.iswindows() ? '\\' : '/'
8992
function generalize_file_paths(path::AbstractString)
90-
pathsep = Sys.iswindows() ? '\\' : '/'
91-
path = replace(path,
92-
string(Sys.STDLIB, pathsep) => "",
93-
string(normpath(Sys.BUILD_ROOT_PATH), pathsep) => "",
94-
string(dirname(Sys.BINDIR), pathsep) => ""
95-
)
96-
return Sys.iswindows() ? replace(path, "\\" => "/") : path
93+
return get!(generalize_file_paths_cache, path) do
94+
path = replace(path,
95+
Sys.STDLIB => "stdlib",
96+
string(norm_build_root_path, pathsep) => "",
97+
string(bindir_dir, pathsep) => ""
98+
)
99+
@static if Sys.iswindows()
100+
return replace(path, "\\" => "/")
101+
else
102+
return path
103+
end
104+
end
105+
end
106+
107+
# raw_file_path,line => file,location
108+
const location_cache = Dict{Tuple{Symbol,Int},Tuple{String,String}}()
109+
function get_location(file::Symbol, line::Int)
110+
return get!(location_cache, (file, line)) do
111+
_file = generalize_file_paths(string(file))
112+
_location = string(_file, ":", line)
113+
return _file, _location
114+
end
97115
end
98116

99117
# passed, failed, skipped, or unknown
@@ -111,28 +129,65 @@ function get_status(result)
111129
end
112130
end
113131

114-
function result_dict(result::Test.Result)
132+
# An attempt to reconstruct the test call.
133+
# Note we can't know if broken or skip was via the broken/skip macros or kwargs.
134+
const TEST_TYPE_MAP = Dict(
135+
:test => "@test",
136+
:test_nonbool => "@test",
137+
:test_error => "@test",
138+
:test_interrupted => "@test",
139+
:test_unbroken => "@test_broken",
140+
:skipped => "@test_skip",
141+
:test_throws => "@test_throws",
142+
:test_throws_wrong => "@test_throws",
143+
:test_throws_nothing => "@test_throws"
144+
)
145+
function get_test_call_str(result)
146+
prefix = get(TEST_TYPE_MAP, result.test_type, nothing)
147+
prefix === nothing && return error("Unknown test type $(repr(result.test_type))")
148+
return prefix == "@test_throws" ? "@test_throws $(result.data) $(result.orig_expr)" : "$prefix $(result.orig_expr)"
149+
end
150+
151+
get_rid(rdata) = (rdata["location"], rdata["result"], haskey(rdata, "failure_expanded") ? hash(rdata["failure_expanded"]) : UInt64(0))
152+
153+
const ResultCountDict = Dict{Tuple{String,String,UInt64},Int}
154+
155+
function is_duplicate_pass(result::Test.Pass, location, status, result_counts::ResultCountDict)
156+
rid = (location, status, UInt64(0))
157+
count = get(result_counts, rid, nothing)
158+
if count !== nothing
159+
result_counts[rid] = count + 1
160+
return true
161+
end
162+
return false
163+
end
164+
is_duplicate_pass(result::Test.Result, location, status, result_counts::ResultCountDict) = false
165+
166+
function result_dict(result::Test.Result, result_counts::ResultCountDict)
115167
file, line = if !hasproperty(result, :source) || isnothing(result.source)
116-
"unknown", 0
168+
:unknown, 0
117169
else
118-
something(result.source.file, "unknown"), result.source.line
170+
something(result.source.file, :unknown), result.source.line
119171
end
120-
file = generalize_file_paths(string(file))
121-
172+
file, location = get_location(file, line)
122173
status = get_status(result)
123174

124-
result_show = sprint(show, result; context=:color => false)
125-
firstline = split(result_show, '\n')[1]
126-
primary_reason = split(firstline, " at ")[1]
175+
# Early exit for passed tests before more expensive operations
176+
if is_duplicate_pass(result, location, status, result_counts)
177+
return nothing
178+
end
127179

128180
data = Dict{String,Any}(
129-
"name" => "$(primary_reason). Expression: $(result.orig_expr)",
130-
"location" => string(file, ':', line),
131-
"file_name" => file,
132-
"result" => status)
181+
"location" => location,
182+
"result" => status,
183+
"name" => get_test_call_str(result),
184+
"file_name" => file)
133185

134-
job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
135186
if result isa Test.Fail || result isa Test.Error
187+
job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
188+
result_show = sprint(show, result; context=:color => false)
189+
firstline = split(result_show, '\n')[1]
190+
# put the job label at the end here because of the way buildkite UI is laid out
136191
data["failure_reason"] = generalize_file_paths(firstline) * " | $job_label"
137192
err_trace = split(result_show, "\nStacktrace:\n", limit=2)
138193
if length(err_trace) == 2
@@ -142,49 +197,87 @@ function result_dict(result::Test.Result)
142197
data["failure_expanded"] = [Dict{String,Any}("expanded" => split(result_show, '\n'), "backtrace" => [])]
143198
end
144199
end
145-
return data
200+
201+
rid = get_rid(data)
202+
duplicate = haskey(result_counts, rid)
203+
204+
if duplicate
205+
result_counts[rid] += 1
206+
return nothing
207+
else
208+
result_counts[rid] = 1
209+
return data
210+
end
211+
end
212+
213+
function collect_results!(results::Vector{Dict{String,Any}}, result::Test.Result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
214+
rdata = result_dict(result, result_counts)
215+
if rdata !== nothing # nothing if it's a duplicate that's been counted
216+
push!(results, merge(common_data, rdata))
217+
end
218+
end
219+
function collect_results!(results::Vector{Dict{String,Any}}, result::Test.DefaultTestSet, common_data::Dict{String,Any}, result_counts::ResultCountDict)
220+
collect_results!(results, result, common_data["scope"])
221+
end
222+
function collect_results!(results::Vector{Dict{String,Any}}, result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
223+
return nothing
146224
end
147225

148226
function collect_results!(results::Vector{Dict{String,Any}}, testset::Test.DefaultTestSet, prefix::String="")
149227
common_data = result_dict(testset, prefix)
228+
# testset duration is not relevant for individual test results
229+
common_data["history"]["duration"] = 0.0 # required field
230+
delete!(common_data["history"], "end_at")
150231
result_offset = length(results) + 1
151-
result_counts = Dict{Tuple{String,String},Int}()
152-
get_rid(rdata) = (rdata["location"], rdata["result"])
153-
for (i, result) in enumerate(testset.results)
154-
if result isa Test.Result
155-
rdata = result_dict(result)
156-
rid = get_rid(rdata)
157-
if haskey(result_counts, rid)
158-
result_counts[rid] += 1
159-
else
160-
result_counts[rid] = 1
161-
push!(results, merge(common_data, rdata))
162-
end
163-
elseif result isa Test.DefaultTestSet
164-
collect_results!(results, result, common_data["scope"])
165-
end
232+
result_counts = ResultCountDict()
233+
234+
for result in testset.results
235+
collect_results!(results, result, common_data, result_counts)
166236
end
167-
# Modify names to hold `result_counts`
168-
for i in result_offset:length(results)
169-
result = results[i]
237+
# Add a tag for count of each result
238+
for result in results[result_offset:end]
170239
rid = get_rid(result)
171-
if get(result_counts, rid, 0) > 1
172-
result["name"] = replace(result["name"], r"^([^:]):" =>
173-
SubstitutionString("\\1 (x$(result_counts[rid])):"))
174-
end
240+
result["tags"]["count"] = string(get(result_counts, rid, 1))
175241
end
176242
return results
177243
end
178244

179-
function write_testset_json_files(dir::String, testset::Test.DefaultTestSet)
245+
function serialize_testset_result_file(dir::String, testset::Test.DefaultTestSet)
246+
data = Dict{String,Any}[]
247+
t = @elapsed collect_results!(data, testset)
248+
if t > 20 # most are << 5s
249+
@warn "Collating test result data was slow: $t seconds" collated_results=length(data)
250+
end
251+
name = replace(testset.description, r"[^a-zA-Z0-9]" => "_")
252+
res_file = joinpath(dir, "results_$(name).dat")
253+
t = @elapsed Serialization.serialize(res_file, data)
254+
if t > 10
255+
@warn "Serializing test result data was slow: $t seconds" file = res_file size = Base.format_bytes(filesize(res_file))
256+
end
257+
return res_file
258+
end
259+
260+
# deserilalizes the results files and writes them to collated JSON files of 5000 max results
261+
function write_testset_json_files(dir::String)
180262
data = Dict{String,Any}[]
181-
collect_results!(data, testset)
263+
read_files = String[]
264+
for res_dat in filter!(x -> occursin(r"^results.*\.dat$", x), readdir(dir))
265+
res_file = joinpath(dir, res_dat)
266+
append!(data, Serialization.deserialize(res_file))
267+
@debug "Loaded $(basename(res_file)) ($(Base.format_bytes(filesize(res_file))))"
268+
push!(read_files, res_file)
269+
end
182270
files = String[]
183271
# Buildkite is limited to 5000 results per file https://buildkite.com/docs/test-analytics/importing-json
184272
for (i, chunk) in enumerate(Iterators.partition(data, 5000))
185-
res_file = joinpath(dir, "results_$i.json")
273+
res_file = joinpath(dir, "results_$(lpad(i, 3, '0')).json")
186274
open(io -> json_repr(io, chunk), res_file, "w")
187275
push!(files, res_file)
276+
@debug "Saved $(basename(res_file)) ($(length(chunk)) results, $(Base.format_bytes(filesize(res_file))))"
277+
end
278+
for res_file in read_files
279+
rm(res_file)
280+
@debug "Deleted $(basename(res_file))"
188281
end
189282
return files
190283
end

test/core.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -2852,15 +2852,15 @@ mutable struct Obj; x; end
28522852
push!(wr, WeakRef(x))
28532853
nothing
28542854
end
2855-
@noinline test_wr(r, wr) = @test r[1] == wr[1].value
2855+
@noinline test_wr(r, wr) = r[1] == wr[1].value
28562856
function test_wr()
28572857
# we need to be very careful here that we never
28582858
# use the value directly in this function, so we aren't dependent
28592859
# on optimizations deleting the root for it before reaching the test
28602860
ref = []
28612861
wref = []
28622862
mk_wr(ref, wref)
2863-
test_wr(ref, wref)
2863+
@test test_wr(ref, wref)
28642864
GC.gc()
28652865
test_wr(ref, wref)
28662866
empty!(ref)

test/runtests.jl

+2-7
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ include("choosetests.jl")
1313
include("testenv.jl")
1414
include("buildkitetestjson.jl")
1515

16-
using .BuildkiteTestJSON
17-
1816
(; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
1917
tests = unique(tests)
2018

@@ -344,6 +342,8 @@ cd(@__DIR__) do
344342
end
345343
end
346344

345+
BuildkiteTestJSON.write_testset_json_files(@__DIR__)
346+
347347
#=
348348
` Construct a testset on the master node which will hold results from all the
349349
test files run on workers and on node1. The loop goes through the results,
@@ -418,11 +418,6 @@ cd(@__DIR__) do
418418
Test.pop_testset()
419419
end
420420

421-
if Base.get_bool_env("CI", false)
422-
@info "Writing test result data to $(@__DIR__)"
423-
write_testset_json_files(@__DIR__, o_ts)
424-
end
425-
426421
Test.TESTSET_PRINT_ENABLE[] = true
427422
println()
428423
# o_ts.verbose = true # set to true to show all timings when successful

0 commit comments

Comments
 (0)