Skip to content

Commit 8301633

Browse files
speed up saving results.json files
also rm tags we now set during upload see JuliaCI/julia-buildkite#435
1 parent adfefa4 commit 8301633

File tree

1 file changed

+141
-50
lines changed

1 file changed

+141
-50
lines changed

test/buildkitetestjson.jl

+141-50
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ module BuildkiteTestJSON
77

88
using Test
99
using Dates
10+
using Serialization
1011

11-
export write_testset_json_files
12+
export serialize_testset_result_file, write_testset_json_files
1213

1314
# Bootleg JSON writer
1415

@@ -64,14 +65,12 @@ function result_dict(testset::Test.DefaultTestSet, prefix::String="")
6465
"id" => Base.UUID(rand(UInt128)),
6566
"scope" => scope,
6667
"tags" => Dict{String,String}(
67-
"job_label" => get(ENV, "BUILDKITE_LABEL", "unknown"),
68-
"job_id" => get(ENV, "BUILDKITE_JOB_ID", "unknown"),
69-
"job_group" => get(ENV, "BUILDKITE_GROUP_LABEL", "unknown"),
7068
"os" => string(Sys.KERNEL),
7169
"arch" => string(Sys.ARCH),
7270
"julia_version" => string(VERSION),
7371
"testset" => testset.description,
7472
),
73+
# note we drop some of this from common_data before merging into individual results
7574
"history" => if !isnothing(testset.time_end)
7675
Dict{String,Any}(
7776
"start_at" => testset.time_start,
@@ -86,14 +85,33 @@ end
8685
# Test paths on runners are often in deep directories, so just make them contain enough information
8786
# to be able to identify the file. Also convert Windows-style paths to Unix-style paths so tests can
8887
# be grouped by file.
88+
const generalize_file_paths_cache = Dict{AbstractString,AbstractString}()
89+
const norm_build_root_path = normpath(Sys.BUILD_ROOT_PATH)
90+
const bindir_dir = dirname(Sys.BINDIR)
91+
const pathsep = Sys.iswindows() ? '\\' : '/'
8992
function generalize_file_paths(path::AbstractString)
90-
pathsep = Sys.iswindows() ? '\\' : '/'
91-
path = replace(path,
92-
string(Sys.STDLIB, pathsep) => "",
93-
string(normpath(Sys.BUILD_ROOT_PATH), pathsep) => "",
94-
string(dirname(Sys.BINDIR), pathsep) => ""
95-
)
96-
return Sys.iswindows() ? replace(path, "\\" => "/") : path
93+
return get!(generalize_file_paths_cache, path) do
94+
path = replace(path,
95+
Sys.STDLIB => "stdlib",
96+
string(norm_build_root_path, pathsep) => "",
97+
string(bindir_dir, pathsep) => ""
98+
)
99+
@static if Sys.iswindows()
100+
return replace(path, "\\" => "/")
101+
else
102+
return path
103+
end
104+
end
105+
end
106+
107+
# raw_file_path,line => file,location
108+
const location_cache = Dict{Tuple{Symbol,Int},Tuple{String,String}}()
109+
function get_location(file::Symbol, line::Int)
110+
return get!(location_cache, (file, line)) do
111+
_file = generalize_file_paths(string(file))
112+
_location = string(_file, ":", line)
113+
return _file, _location
114+
end
97115
end
98116

99117
# passed, failed, skipped, or unknown
@@ -111,28 +129,65 @@ function get_status(result)
111129
end
112130
end
113131

114-
function result_dict(result::Test.Result)
132+
# An attempt to reconstruct the test call.
133+
# Note we can't know if broken or skip was via the broken/skip macros or kwargs.
134+
const TEST_TYPE_MAP = Dict(
135+
:test => "@test",
136+
:test_nonbool => "@test",
137+
:test_error => "@test",
138+
:test_interrupted => "@test",
139+
:test_unbroken => "@test_broken",
140+
:skipped => "@test_skip",
141+
:test_throws => "@test_throws",
142+
:test_throws_wrong => "@test_throws",
143+
:test_throws_nothing => "@test_throws"
144+
)
145+
function get_test_call_str(result)
146+
prefix = get(TEST_TYPE_MAP, result.test_type, nothing)
147+
prefix === nothing && return error("Unknown test type $(repr(result.test_type))")
148+
return prefix == "@test_throws" ? "@test_throws $(result.data) $(result.orig_expr)" : "$prefix $(result.orig_expr)"
149+
end
150+
151+
get_rid(rdata) = (rdata["location"], rdata["result"], haskey(rdata, "failure_expanded") ? hash(rdata["failure_expanded"]) : UInt64(0))
152+
153+
const ResultCountDict = Dict{Tuple{String,String,UInt64},Int}
154+
155+
function is_duplicate_pass(result::Test.Pass, location, status, result_counts::ResultCountDict)
156+
rid = (location, status, UInt64(0))
157+
count = get(result_counts, rid, nothing)
158+
if count !== nothing
159+
result_counts[rid] = count + 1
160+
return true
161+
end
162+
return false
163+
end
164+
is_duplicate_pass(result::Test.Result, location, status, result_counts::ResultCountDict) = false
165+
166+
function result_dict(result::Test.Result, result_counts::ResultCountDict)
115167
file, line = if !hasproperty(result, :source) || isnothing(result.source)
116-
"unknown", 0
168+
:unknown, 0
117169
else
118-
something(result.source.file, "unknown"), result.source.line
170+
something(result.source.file, :unknown), result.source.line
119171
end
120-
file = generalize_file_paths(string(file))
121-
172+
file, location = get_location(file, line)
122173
status = get_status(result)
123174

124-
result_show = sprint(show, result; context=:color => false)
125-
firstline = split(result_show, '\n')[1]
126-
primary_reason = split(firstline, " at ")[1]
175+
# Early exit for passed tests before more expensive operations
176+
if is_duplicate_pass(result, location, status, result_counts)
177+
return nothing
178+
end
127179

128180
data = Dict{String,Any}(
129-
"name" => "$(primary_reason). Expression: $(result.orig_expr)",
130-
"location" => string(file, ':', line),
131-
"file_name" => file,
132-
"result" => status)
181+
"location" => location,
182+
"result" => status,
183+
"name" => get_test_call_str(result),
184+
"file_name" => file)
133185

134-
job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
135186
if result isa Test.Fail || result isa Test.Error
187+
job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
188+
result_show = sprint(show, result; context=:color => false)
189+
firstline = split(result_show, '\n')[1]
190+
# put the job label at the end here because of the way buildkite UI is laid out
136191
data["failure_reason"] = generalize_file_paths(firstline) * " | $job_label"
137192
err_trace = split(result_show, "\nStacktrace:\n", limit=2)
138193
if length(err_trace) == 2
@@ -142,49 +197,85 @@ function result_dict(result::Test.Result)
142197
data["failure_expanded"] = [Dict{String,Any}("expanded" => split(result_show, '\n'), "backtrace" => [])]
143198
end
144199
end
145-
return data
200+
201+
rid = get_rid(data)
202+
duplicate = haskey(result_counts, rid)
203+
204+
if duplicate
205+
result_counts[rid] += 1
206+
return nothing
207+
else
208+
result_counts[rid] = 1
209+
return data
210+
end
211+
end
212+
213+
function collect_results!(results::Vector{Dict{String,Any}}, result::Test.Result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
214+
rdata = result_dict(result, result_counts)
215+
if rdata !== nothing # nothing if it's a duplicate that's been counted
216+
push!(results, merge(common_data, rdata))
217+
end
218+
end
219+
220+
function collect_results!(results::Vector{Dict{String,Any}}, result::Test.DefaultTestSet, common_data::Dict{String,Any}, result_counts::ResultCountDict)
221+
collect_results!(results, result, common_data["scope"])
146222
end
147223

148224
function collect_results!(results::Vector{Dict{String,Any}}, testset::Test.DefaultTestSet, prefix::String="")
149225
common_data = result_dict(testset, prefix)
226+
# testset duration is not relevant for individual test results
227+
common_data["history"]["duration"] = 0.0 # required field
228+
delete!(common_data["history"], "end_at")
150229
result_offset = length(results) + 1
151-
result_counts = Dict{Tuple{String,String},Int}()
152-
get_rid(rdata) = (rdata["location"], rdata["result"])
153-
for (i, result) in enumerate(testset.results)
154-
if result isa Test.Result
155-
rdata = result_dict(result)
156-
rid = get_rid(rdata)
157-
if haskey(result_counts, rid)
158-
result_counts[rid] += 1
159-
else
160-
result_counts[rid] = 1
161-
push!(results, merge(common_data, rdata))
162-
end
163-
elseif result isa Test.DefaultTestSet
164-
collect_results!(results, result, common_data["scope"])
165-
end
230+
result_counts = ResultCountDict()
231+
232+
for result in testset.results
233+
collect_results!(results, result, common_data, result_counts)
166234
end
167-
# Modify names to hold `result_counts`
168-
for i in result_offset:length(results)
169-
result = results[i]
235+
# Add a tag for count of each result
236+
for result in results[result_offset:end]
170237
rid = get_rid(result)
171-
if get(result_counts, rid, 0) > 1
172-
result["name"] = replace(result["name"], r"^([^:]):" =>
173-
SubstitutionString("\\1 (x$(result_counts[rid])):"))
174-
end
238+
result["tags"]["count"] = string(get(result_counts, rid, 1))
175239
end
176240
return results
177241
end
178242

179-
function write_testset_json_files(dir::String, testset::Test.DefaultTestSet)
243+
function serialize_testset_result_file(dir::String, testset::Test.DefaultTestSet)
244+
data = Dict{String,Any}[]
245+
t = @elapsed collect_results!(data, testset)
246+
if t > 20 # most are << 5s
247+
@warn "Collating test result data was slow: $t seconds" collated_results=length(data)
248+
end
249+
name = replace(testset.description, r"[^a-zA-Z0-9]" => "_")
250+
res_file = joinpath(dir, "results_$(name).dat")
251+
t = @elapsed Serialization.serialize(res_file, data)
252+
if t > 10
253+
@warn "Serializing test result data was slow: $t seconds" file = res_file size = Base.format_bytes(filesize(res_file))
254+
end
255+
return res_file
256+
end
257+
258+
# deserilalizes the results files and writes them to collated JSON files of 5000 max results
259+
function write_testset_json_files(dir::String)
180260
data = Dict{String,Any}[]
181-
collect_results!(data, testset)
261+
read_files = String[]
262+
for res_dat in filter!(x -> occursin(r"^results.*\.dat$", x), readdir(dir))
263+
res_file = joinpath(dir, res_dat)
264+
append!(data, Serialization.deserialize(res_file))
265+
@debug "Loaded $(basename(res_file)) ($(Base.format_bytes(filesize(res_file))))"
266+
push!(read_files, res_file)
267+
end
182268
files = String[]
183269
# Buildkite is limited to 5000 results per file https://buildkite.com/docs/test-analytics/importing-json
184270
for (i, chunk) in enumerate(Iterators.partition(data, 5000))
185-
res_file = joinpath(dir, "results_$i.json")
271+
res_file = joinpath(dir, "results_$(lpad(i, 3, '0')).json")
186272
open(io -> json_repr(io, chunk), res_file, "w")
187273
push!(files, res_file)
274+
@debug "Saved $(basename(res_file)) ($(length(chunk)) results, $(Base.format_bytes(filesize(res_file))))"
275+
end
276+
for res_file in read_files
277+
rm(res_file)
278+
@debug "Deleted $(basename(res_file))"
188279
end
189280
return files
190281
end

0 commit comments

Comments
 (0)