Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI: Store Passes in results.json + various improvements. #57686

Merged
merged 6 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
/julia-*
/source-dist.tmp
/source-dist.tmp1
/test/results_*.json
/test/results_*.dat

*.expmap
*.exe
Expand Down
2 changes: 2 additions & 0 deletions stdlib/Mmap/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ close(s)
@test_throws ErrorException mmap(file, Vector{Ref}) # must be bit-type
GC.gc(); GC.gc()

file = tempname() # new name to reduce chance of issues due slow windows fs
s = open(f->f,file,"w")
@test mmap(file) == Vector{UInt8}() # requested len=0 on empty file
@test mmap(file,Vector{UInt8},0) == Vector{UInt8}()
Expand Down Expand Up @@ -191,6 +192,7 @@ m = mmap(file,Vector{UInt8},2,6)
@test_throws BoundsError m[3]
finalize(m); m = nothing; GC.gc()

file = tempname() # new name to reduce chance of issues due slow windows fs
s = open(file, "w")
write(s, [0xffffffffffffffff,
0xffffffffffffffff,
Expand Down
18 changes: 16 additions & 2 deletions stdlib/Test/src/Test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ const DISPLAY_FAILED = (

const FAIL_FAST = Ref{Bool}(false)

const record_passes = OncePerProcess{Bool}() do
return Base.get_bool_env("JULIA_TEST_RECORD_PASSES", false)
end

#-----------------------------------------------------------------------

# Backtrace utility functions
Expand Down Expand Up @@ -1100,8 +1104,18 @@ struct FailFastError <: Exception end

# For a broken result, simply store the result
record(ts::DefaultTestSet, t::Broken) = (push!(ts.results, t); t)
# For a passed result, do not store the result since it uses a lot of memory
record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
# For a passed result, do not store the result since it uses a lot of memory, unless
# `record_passes()` is true. i.e. set env var `JULIA_TEST_RECORD_PASSES=true` before running any testsets
function record(ts::DefaultTestSet, t::Pass)
ts.n_passed += 1
if record_passes()
# throw away the captured data so it can be GC-ed
t_nodata = Pass(t.test_type, t.orig_expr, nothing, t.value, t.source, t.message_only)
push!(ts.results, t_nodata)
return t_nodata
end
return t
end

# For the other result types, immediately print the error message
# but do not terminate. Print a backtrace.
Expand Down
193 changes: 143 additions & 50 deletions test/buildkitetestjson.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ module BuildkiteTestJSON

using Test
using Dates
using Serialization

export write_testset_json_files
export serialize_testset_result_file, write_testset_json_files

# Bootleg JSON writer

Expand Down Expand Up @@ -64,14 +65,12 @@ function result_dict(testset::Test.DefaultTestSet, prefix::String="")
"id" => Base.UUID(rand(UInt128)),
"scope" => scope,
"tags" => Dict{String,String}(
"job_label" => get(ENV, "BUILDKITE_LABEL", "unknown"),
"job_id" => get(ENV, "BUILDKITE_JOB_ID", "unknown"),
"job_group" => get(ENV, "BUILDKITE_GROUP_LABEL", "unknown"),
"os" => string(Sys.KERNEL),
"arch" => string(Sys.ARCH),
"julia_version" => string(VERSION),
"testset" => testset.description,
),
# note we drop some of this from common_data before merging into individual results
"history" => if !isnothing(testset.time_end)
Dict{String,Any}(
"start_at" => testset.time_start,
Expand All @@ -86,14 +85,33 @@ end
# Test paths on runners are often in deep directories, so just make them contain enough information
# to be able to identify the file. Also convert Windows-style paths to Unix-style paths so tests can
# be grouped by file.
const generalize_file_paths_cache = Dict{AbstractString,AbstractString}()
const norm_build_root_path = normpath(Sys.BUILD_ROOT_PATH)
const bindir_dir = dirname(Sys.BINDIR)
const pathsep = Sys.iswindows() ? '\\' : '/'
function generalize_file_paths(path::AbstractString)
pathsep = Sys.iswindows() ? '\\' : '/'
path = replace(path,
string(Sys.STDLIB, pathsep) => "",
string(normpath(Sys.BUILD_ROOT_PATH), pathsep) => "",
string(dirname(Sys.BINDIR), pathsep) => ""
)
return Sys.iswindows() ? replace(path, "\\" => "/") : path
return get!(generalize_file_paths_cache, path) do
path = replace(path,
Sys.STDLIB => "stdlib",
string(norm_build_root_path, pathsep) => "",
string(bindir_dir, pathsep) => ""
)
@static if Sys.iswindows()
return replace(path, "\\" => "/")
else
return path
end
end
end

# raw_file_path,line => file,location
const location_cache = Dict{Tuple{Symbol,Int},Tuple{String,String}}()
function get_location(file::Symbol, line::Int)
return get!(location_cache, (file, line)) do
_file = generalize_file_paths(string(file))
_location = string(_file, ":", line)
return _file, _location
end
end

# passed, failed, skipped, or unknown
Expand All @@ -111,28 +129,65 @@ function get_status(result)
end
end

function result_dict(result::Test.Result)
# An attempt to reconstruct the test call.
# Note we can't know if broken or skip was via the broken/skip macros or kwargs.
const TEST_TYPE_MAP = Dict(
:test => "@test",
:test_nonbool => "@test",
:test_error => "@test",
:test_interrupted => "@test",
:test_unbroken => "@test_broken",
:skipped => "@test_skip",
:test_throws => "@test_throws",
:test_throws_wrong => "@test_throws",
:test_throws_nothing => "@test_throws"
)
function get_test_call_str(result)
prefix = get(TEST_TYPE_MAP, result.test_type, nothing)
prefix === nothing && return error("Unknown test type $(repr(result.test_type))")
return prefix == "@test_throws" ? "@test_throws $(result.data) $(result.orig_expr)" : "$prefix $(result.orig_expr)"
end

get_rid(rdata) = (rdata["location"], rdata["result"], haskey(rdata, "failure_expanded") ? hash(rdata["failure_expanded"]) : UInt64(0))

const ResultCountDict = Dict{Tuple{String,String,UInt64},Int}

function is_duplicate_pass(result::Test.Pass, location, status, result_counts::ResultCountDict)
rid = (location, status, UInt64(0))
count = get(result_counts, rid, nothing)
if count !== nothing
result_counts[rid] = count + 1
return true
end
return false
end
is_duplicate_pass(result::Test.Result, location, status, result_counts::ResultCountDict) = false

function result_dict(result::Test.Result, result_counts::ResultCountDict)
file, line = if !hasproperty(result, :source) || isnothing(result.source)
"unknown", 0
:unknown, 0
else
something(result.source.file, "unknown"), result.source.line
something(result.source.file, :unknown), result.source.line
end
file = generalize_file_paths(string(file))

file, location = get_location(file, line)
status = get_status(result)

result_show = sprint(show, result; context=:color => false)
firstline = split(result_show, '\n')[1]
primary_reason = split(firstline, " at ")[1]
# Early exit for passed tests before more expensive operations
if is_duplicate_pass(result, location, status, result_counts)
return nothing
end

data = Dict{String,Any}(
"name" => "$(primary_reason). Expression: $(result.orig_expr)",
"location" => string(file, ':', line),
"file_name" => file,
"result" => status)
"location" => location,
"result" => status,
"name" => get_test_call_str(result),
"file_name" => file)

job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
if result isa Test.Fail || result isa Test.Error
job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
result_show = sprint(show, result; context=:color => false)
firstline = split(result_show, '\n')[1]
# put the job label at the end here because of the way buildkite UI is laid out
data["failure_reason"] = generalize_file_paths(firstline) * " | $job_label"
err_trace = split(result_show, "\nStacktrace:\n", limit=2)
if length(err_trace) == 2
Expand All @@ -142,49 +197,87 @@ function result_dict(result::Test.Result)
data["failure_expanded"] = [Dict{String,Any}("expanded" => split(result_show, '\n'), "backtrace" => [])]
end
end
return data

rid = get_rid(data)
duplicate = haskey(result_counts, rid)

if duplicate
result_counts[rid] += 1
return nothing
else
result_counts[rid] = 1
return data
end
end

function collect_results!(results::Vector{Dict{String,Any}}, result::Test.Result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
rdata = result_dict(result, result_counts)
if rdata !== nothing # nothing if it's a duplicate that's been counted
push!(results, merge(common_data, rdata))
end
end
function collect_results!(results::Vector{Dict{String,Any}}, result::Test.DefaultTestSet, common_data::Dict{String,Any}, result_counts::ResultCountDict)
collect_results!(results, result, common_data["scope"])
end
function collect_results!(results::Vector{Dict{String,Any}}, result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
return nothing
end

function collect_results!(results::Vector{Dict{String,Any}}, testset::Test.DefaultTestSet, prefix::String="")
common_data = result_dict(testset, prefix)
# testset duration is not relevant for individual test results
common_data["history"]["duration"] = 0.0 # required field
delete!(common_data["history"], "end_at")
result_offset = length(results) + 1
result_counts = Dict{Tuple{String,String},Int}()
get_rid(rdata) = (rdata["location"], rdata["result"])
for (i, result) in enumerate(testset.results)
if result isa Test.Result
rdata = result_dict(result)
rid = get_rid(rdata)
if haskey(result_counts, rid)
result_counts[rid] += 1
else
result_counts[rid] = 1
push!(results, merge(common_data, rdata))
end
elseif result isa Test.DefaultTestSet
collect_results!(results, result, common_data["scope"])
end
result_counts = ResultCountDict()

for result in testset.results
collect_results!(results, result, common_data, result_counts)
end
# Modify names to hold `result_counts`
for i in result_offset:length(results)
result = results[i]
# Add a tag for count of each result
for result in results[result_offset:end]
rid = get_rid(result)
if get(result_counts, rid, 0) > 1
result["name"] = replace(result["name"], r"^([^:]):" =>
SubstitutionString("\\1 (x$(result_counts[rid])):"))
end
result["tags"]["count"] = string(get(result_counts, rid, 1))
end
return results
end

function write_testset_json_files(dir::String, testset::Test.DefaultTestSet)
function serialize_testset_result_file(dir::String, testset::Test.DefaultTestSet)
data = Dict{String,Any}[]
t = @elapsed collect_results!(data, testset)
if t > 20 # most are << 5s
@warn "Collating test result data was slow: $t seconds" collated_results=length(data)
end
name = replace(testset.description, r"[^a-zA-Z0-9]" => "_")
res_file = joinpath(dir, "results_$(name).dat")
t = @elapsed Serialization.serialize(res_file, data)
if t > 10
@warn "Serializing test result data was slow: $t seconds" file = res_file size = Base.format_bytes(filesize(res_file))
end
return res_file
end

# deserilalizes the results files and writes them to collated JSON files of 5000 max results
function write_testset_json_files(dir::String)
data = Dict{String,Any}[]
collect_results!(data, testset)
read_files = String[]
for res_dat in filter!(x -> occursin(r"^results.*\.dat$", x), readdir(dir))
res_file = joinpath(dir, res_dat)
append!(data, Serialization.deserialize(res_file))
@debug "Loaded $(basename(res_file)) ($(Base.format_bytes(filesize(res_file))))"
push!(read_files, res_file)
end
files = String[]
# Buildkite is limited to 5000 results per file https://buildkite.com/docs/test-analytics/importing-json
for (i, chunk) in enumerate(Iterators.partition(data, 5000))
res_file = joinpath(dir, "results_$i.json")
res_file = joinpath(dir, "results_$(lpad(i, 3, '0')).json")
open(io -> json_repr(io, chunk), res_file, "w")
push!(files, res_file)
@debug "Saved $(basename(res_file)) ($(length(chunk)) results, $(Base.format_bytes(filesize(res_file))))"
end
for res_file in read_files
rm(res_file)
@debug "Deleted $(basename(res_file))"
end
return files
end
Expand Down
4 changes: 2 additions & 2 deletions test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2852,15 +2852,15 @@ mutable struct Obj; x; end
push!(wr, WeakRef(x))
nothing
end
@noinline test_wr(r, wr) = @test r[1] == wr[1].value
@noinline test_wr(r, wr) = r[1] == wr[1].value
function test_wr()
# we need to be very careful here that we never
# use the value directly in this function, so we aren't dependent
# on optimizations deleting the root for it before reaching the test
ref = []
wref = []
mk_wr(ref, wref)
test_wr(ref, wref)
@test test_wr(ref, wref)
GC.gc()
test_wr(ref, wref)
empty!(ref)
Expand Down
9 changes: 2 additions & 7 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ include("choosetests.jl")
include("testenv.jl")
include("buildkitetestjson.jl")

using .BuildkiteTestJSON

(; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
tests = unique(tests)

Expand Down Expand Up @@ -344,6 +342,8 @@ cd(@__DIR__) do
end
end

BuildkiteTestJSON.write_testset_json_files(@__DIR__)

#=
` Construct a testset on the master node which will hold results from all the
test files run on workers and on node1. The loop goes through the results,
Expand Down Expand Up @@ -418,11 +418,6 @@ cd(@__DIR__) do
Test.pop_testset()
end

if Base.get_bool_env("CI", false)
@info "Writing test result data to $(@__DIR__)"
write_testset_json_files(@__DIR__, o_ts)
end

Test.TESTSET_PRINT_ENABLE[] = true
println()
# o_ts.verbose = true # set to true to show all timings when successful
Expand Down
Loading