diff --git a/.gitignore b/.gitignore
index 80bdd67619454..7975fa91a69d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,8 @@
 /julia-*
 /source-dist.tmp
 /source-dist.tmp1
+/test/results_*.json
+/test/results_*.dat
 
 *.expmap
 *.exe
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index 03e4b48d95f7a..f4b6abb147fb7 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -47,6 +47,7 @@ close(s)
 @test_throws ErrorException mmap(file, Vector{Ref}) # must be bit-type
 GC.gc(); GC.gc()
 
+file = tempname() # new name to reduce chance of issues due slow windows fs
 s = open(f->f,file,"w")
 @test mmap(file) == Vector{UInt8}() # requested len=0 on empty file
 @test mmap(file,Vector{UInt8},0) == Vector{UInt8}()
@@ -191,6 +192,7 @@ m = mmap(file,Vector{UInt8},2,6)
 @test_throws BoundsError m[3]
 finalize(m); m = nothing; GC.gc()
 
+file = tempname() # new name to reduce chance of issues due slow windows fs
 s = open(file, "w")
 write(s, [0xffffffffffffffff,
           0xffffffffffffffff,
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index e8d670b3d7d00..f3bdc5c2defd2 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -43,6 +43,10 @@ const DISPLAY_FAILED = (
 
 const FAIL_FAST = Ref{Bool}(false)
 
+const record_passes = OncePerProcess{Bool}() do
+    return Base.get_bool_env("JULIA_TEST_RECORD_PASSES", false)
+end
+
 #-----------------------------------------------------------------------
 
 # Backtrace utility functions
@@ -1100,8 +1104,18 @@ struct FailFastError <: Exception end
 
 # For a broken result, simply store the result
 record(ts::DefaultTestSet, t::Broken) = (push!(ts.results, t); t)
-# For a passed result, do not store the result since it uses a lot of memory
-record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
+# For a passed result, do not store the result since it uses a lot of memory, unless
+# `record_passes()` is true. i.e. set env var `JULIA_TEST_RECORD_PASSES=true` before running any testsets
+function record(ts::DefaultTestSet, t::Pass)
+    ts.n_passed += 1
+    if record_passes()
+        # throw away the captured data so it can be GC-ed
+        t_nodata = Pass(t.test_type, t.orig_expr, nothing, t.value, t.source, t.message_only)
+        push!(ts.results, t_nodata)
+        return t_nodata
+    end
+    return t
+end
 
 # For the other result types, immediately print the error message
 # but do not terminate. Print a backtrace.
diff --git a/test/buildkitetestjson.jl b/test/buildkitetestjson.jl
index 3333ca3c826d2..e9f969483c2aa 100644
--- a/test/buildkitetestjson.jl
+++ b/test/buildkitetestjson.jl
@@ -7,8 +7,9 @@ module BuildkiteTestJSON
 
 using Test
 using Dates
+using Serialization
 
-export write_testset_json_files
+export serialize_testset_result_file, write_testset_json_files
 
 # Bootleg JSON writer
 
@@ -64,14 +65,12 @@ function result_dict(testset::Test.DefaultTestSet, prefix::String="")
         "id" => Base.UUID(rand(UInt128)),
         "scope" => scope,
         "tags" => Dict{String,String}(
-            "job_label" => get(ENV, "BUILDKITE_LABEL", "unknown"),
-            "job_id" => get(ENV, "BUILDKITE_JOB_ID", "unknown"),
-            "job_group" => get(ENV, "BUILDKITE_GROUP_LABEL", "unknown"),
             "os" => string(Sys.KERNEL),
             "arch" => string(Sys.ARCH),
             "julia_version" => string(VERSION),
             "testset" => testset.description,
         ),
+        # note we drop some of this from common_data before merging into individual results
         "history" => if !isnothing(testset.time_end)
             Dict{String,Any}(
                 "start_at" => testset.time_start,
@@ -86,14 +85,33 @@ end
 # Test paths on runners are often in deep directories, so just make them contain enough information
 # to be able to identify the file. Also convert Windows-style paths to Unix-style paths so tests can
 # be grouped by file.
+const generalize_file_paths_cache = Dict{AbstractString,AbstractString}()
+const norm_build_root_path = normpath(Sys.BUILD_ROOT_PATH)
+const bindir_dir = dirname(Sys.BINDIR)
+const pathsep = Sys.iswindows() ? '\\' : '/'
 function generalize_file_paths(path::AbstractString)
-    pathsep = Sys.iswindows() ? '\\' : '/'
-    path = replace(path,
-        string(Sys.STDLIB, pathsep) => "",
-        string(normpath(Sys.BUILD_ROOT_PATH), pathsep) => "",
-        string(dirname(Sys.BINDIR), pathsep) => ""
-    )
-    return Sys.iswindows() ? replace(path, "\\" => "/") : path
+    return get!(generalize_file_paths_cache, path) do
+        path = replace(path,
+            Sys.STDLIB => "stdlib",
+            string(norm_build_root_path, pathsep) => "",
+            string(bindir_dir, pathsep) => ""
+        )
+        @static if Sys.iswindows()
+            return replace(path, "\\" => "/")
+        else
+            return path
+        end
+    end
+end
+
+# raw_file_path,line => file,location
+const location_cache = Dict{Tuple{Symbol,Int},Tuple{String,String}}()
+function get_location(file::Symbol, line::Int)
+    return get!(location_cache, (file, line)) do
+        _file = generalize_file_paths(string(file))
+        _location = string(_file, ":", line)
+        return _file, _location
+    end
 end
 
 # passed, failed, skipped, or unknown
@@ -111,28 +129,65 @@ function get_status(result)
     end
 end
 
-function result_dict(result::Test.Result)
+# An attempt to reconstruct the test call.
+# Note we can't know if broken or skip was via the broken/skip macros or kwargs.
+const TEST_TYPE_MAP = Dict(
+    :test => "@test",
+    :test_nonbool => "@test",
+    :test_error => "@test",
+    :test_interrupted => "@test",
+    :test_unbroken => "@test_broken",
+    :skipped => "@test_skip",
+    :test_throws => "@test_throws",
+    :test_throws_wrong => "@test_throws",
+    :test_throws_nothing => "@test_throws"
+)
+function get_test_call_str(result)
+    prefix = get(TEST_TYPE_MAP, result.test_type, nothing)
+    prefix === nothing && return error("Unknown test type $(repr(result.test_type))")
+    return prefix == "@test_throws" ? "@test_throws $(result.data) $(result.orig_expr)" : "$prefix $(result.orig_expr)"
+end
+
+get_rid(rdata) = (rdata["location"], rdata["result"], haskey(rdata, "failure_expanded") ? hash(rdata["failure_expanded"]) : UInt64(0))
+
+const ResultCountDict = Dict{Tuple{String,String,UInt64},Int}
+
+function is_duplicate_pass(result::Test.Pass, location, status, result_counts::ResultCountDict)
+    rid = (location, status, UInt64(0))
+    count = get(result_counts, rid, nothing)
+    if count !== nothing
+        result_counts[rid] = count + 1
+        return true
+    end
+    return false
+end
+is_duplicate_pass(result::Test.Result, location, status, result_counts::ResultCountDict) = false
+
+function result_dict(result::Test.Result, result_counts::ResultCountDict)
     file, line = if !hasproperty(result, :source) || isnothing(result.source)
-        "unknown", 0
+        :unknown, 0
     else
-        something(result.source.file, "unknown"), result.source.line
+        something(result.source.file, :unknown), result.source.line
     end
-    file = generalize_file_paths(string(file))
-
+    file, location = get_location(file, line)
     status = get_status(result)
 
-    result_show = sprint(show, result; context=:color => false)
-    firstline = split(result_show, '\n')[1]
-    primary_reason = split(firstline, " at ")[1]
+    # Early exit for passed tests before more expensive operations
+    if is_duplicate_pass(result, location, status, result_counts)
+        return nothing
+    end
 
     data = Dict{String,Any}(
-        "name" => "$(primary_reason). Expression: $(result.orig_expr)",
-        "location" => string(file, ':', line),
-        "file_name" => file,
-        "result" => status)
+        "location" => location,
+        "result" => status,
+        "name" => get_test_call_str(result),
+        "file_name" => file)
 
-    job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
     if result isa Test.Fail || result isa Test.Error
+        job_label = replace(get(ENV, "BUILDKITE_LABEL", "job label not found"), r":\w+:\s*" => "")
+        result_show = sprint(show, result; context=:color => false)
+        firstline = split(result_show, '\n')[1]
+        # put the job label at the end here because of the way buildkite UI is laid out
         data["failure_reason"] = generalize_file_paths(firstline) * " | $job_label"
         err_trace = split(result_show, "\nStacktrace:\n", limit=2)
         if length(err_trace) == 2
@@ -142,49 +197,87 @@ function result_dict(result::Test.Result)
             data["failure_expanded"] = [Dict{String,Any}("expanded" => split(result_show, '\n'), "backtrace" => [])]
         end
     end
-    return data
+
+    rid = get_rid(data)
+    duplicate = haskey(result_counts, rid)
+
+    if duplicate
+        result_counts[rid] += 1
+        return nothing
+    else
+        result_counts[rid] = 1
+        return data
+    end
+end
+
+function collect_results!(results::Vector{Dict{String,Any}}, result::Test.Result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
+    rdata = result_dict(result, result_counts)
+    if rdata !== nothing # nothing if it's a duplicate that's been counted
+        push!(results, merge(common_data, rdata))
+    end
+end
+function collect_results!(results::Vector{Dict{String,Any}}, result::Test.DefaultTestSet, common_data::Dict{String,Any}, result_counts::ResultCountDict)
+    collect_results!(results, result, common_data["scope"])
+end
+function collect_results!(results::Vector{Dict{String,Any}}, result, common_data::Dict{String,Any}, result_counts::ResultCountDict)
+    return nothing
 end
 
 function collect_results!(results::Vector{Dict{String,Any}}, testset::Test.DefaultTestSet, prefix::String="")
     common_data = result_dict(testset, prefix)
+    # testset duration is not relevant for individual test results
+    common_data["history"]["duration"] = 0.0 # required field
+    delete!(common_data["history"], "end_at")
     result_offset = length(results) + 1
-    result_counts = Dict{Tuple{String,String},Int}()
-    get_rid(rdata) = (rdata["location"], rdata["result"])
-    for (i, result) in enumerate(testset.results)
-        if result isa Test.Result
-            rdata = result_dict(result)
-            rid = get_rid(rdata)
-            if haskey(result_counts, rid)
-                result_counts[rid] += 1
-            else
-                result_counts[rid] = 1
-                push!(results, merge(common_data, rdata))
-            end
-        elseif result isa Test.DefaultTestSet
-            collect_results!(results, result, common_data["scope"])
-        end
+    result_counts = ResultCountDict()
+
+    for result in testset.results
+        collect_results!(results, result, common_data, result_counts)
     end
-    # Modify names to hold `result_counts`
-    for i in result_offset:length(results)
-        result = results[i]
+    # Add a tag for count of each result
+    for result in results[result_offset:end]
         rid = get_rid(result)
-        if get(result_counts, rid, 0) > 1
-            result["name"] = replace(result["name"], r"^([^:]):" =>
-                SubstitutionString("\\1 (x$(result_counts[rid])):"))
-        end
+        result["tags"]["count"] = string(get(result_counts, rid, 1))
     end
     return results
 end
 
-function write_testset_json_files(dir::String, testset::Test.DefaultTestSet)
+function serialize_testset_result_file(dir::String, testset::Test.DefaultTestSet)
+    data = Dict{String,Any}[]
+    t = @elapsed collect_results!(data, testset)
+    if t > 20 # most are << 5s
+        @warn "Collating test result data was slow: $t seconds" collated_results=length(data)
+    end
+    name = replace(testset.description, r"[^a-zA-Z0-9]" => "_")
+    res_file = joinpath(dir, "results_$(name).dat")
+    t = @elapsed Serialization.serialize(res_file, data)
+    if t > 10
+        @warn "Serializing test result data was slow: $t seconds" file = res_file size = Base.format_bytes(filesize(res_file))
+    end
+    return res_file
+end
+
+# deserilalizes the results files and writes them to collated JSON files of 5000 max results
+function write_testset_json_files(dir::String)
     data = Dict{String,Any}[]
-    collect_results!(data, testset)
+    read_files = String[]
+    for res_dat in filter!(x -> occursin(r"^results.*\.dat$", x), readdir(dir))
+        res_file = joinpath(dir, res_dat)
+        append!(data, Serialization.deserialize(res_file))
+        @debug "Loaded $(basename(res_file)) ($(Base.format_bytes(filesize(res_file))))"
+        push!(read_files, res_file)
+    end
     files = String[]
     # Buildkite is limited to 5000 results per file https://buildkite.com/docs/test-analytics/importing-json
     for (i, chunk) in enumerate(Iterators.partition(data, 5000))
-        res_file = joinpath(dir, "results_$i.json")
+        res_file = joinpath(dir, "results_$(lpad(i, 3, '0')).json")
         open(io -> json_repr(io, chunk), res_file, "w")
         push!(files, res_file)
+        @debug "Saved $(basename(res_file)) ($(length(chunk)) results, $(Base.format_bytes(filesize(res_file))))"
+    end
+    for res_file in read_files
+        rm(res_file)
+        @debug "Deleted $(basename(res_file))"
     end
     return files
 end
diff --git a/test/core.jl b/test/core.jl
index 7bf34da3076d9..0e5ccc745ade9 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -2852,7 +2852,7 @@ mutable struct Obj; x; end
         push!(wr, WeakRef(x))
         nothing
     end
-    @noinline test_wr(r, wr) = @test r[1] == wr[1].value
+    @noinline test_wr(r, wr) = r[1] == wr[1].value
     function test_wr()
         # we need to be very careful here that we never
         # use the value directly in this function, so we aren't dependent
@@ -2860,7 +2860,7 @@ mutable struct Obj; x; end
         ref = []
         wref = []
         mk_wr(ref, wref)
-        test_wr(ref, wref)
+        @test test_wr(ref, wref)
         GC.gc()
         test_wr(ref, wref)
         empty!(ref)
diff --git a/test/runtests.jl b/test/runtests.jl
index 1872c888333da..60139a4691bd7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -13,8 +13,6 @@ include("choosetests.jl")
 include("testenv.jl")
 include("buildkitetestjson.jl")
 
-using .BuildkiteTestJSON
-
 (; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
 tests = unique(tests)
 
@@ -344,6 +342,8 @@ cd(@__DIR__) do
         end
     end
 
+    BuildkiteTestJSON.write_testset_json_files(@__DIR__)
+
     #=
 `   Construct a testset on the master node which will hold results from all the
     test files run on workers and on node1. The loop goes through the results,
@@ -418,11 +418,6 @@ cd(@__DIR__) do
         Test.pop_testset()
     end
 
-    if Base.get_bool_env("CI", false)
-        @info "Writing test result data to $(@__DIR__)"
-        write_testset_json_files(@__DIR__, o_ts)
-    end
-
     Test.TESTSET_PRINT_ENABLE[] = true
     println()
     # o_ts.verbose = true # set to true to show all timings when successful
diff --git a/test/testdefs.jl b/test/testdefs.jl
index eb0bf570b11fd..35ffe580b3a16 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -2,12 +2,15 @@
 
 using Test, Random
 
+include("buildkitetestjson.jl")
+
 function runtests(name, path, isolate=true; seed=nothing)
     old_print_setting = Test.TESTSET_PRINT_ENABLE[]
     Test.TESTSET_PRINT_ENABLE[] = false
     # remove all hint_handlers, so that errorshow tests are not changed by which packages have been loaded on this worker already
     # packages that call register_error_hint should also call this again, and then re-add any hooks they want to test
     empty!(Base.Experimental._hint_handlers)
+    withenv("JULIA_TEST_RECORD_PASSES" => Base.get_bool_env("CI", false)) do
     try
         if isolate
             # Simple enough to type and random enough so that no one will hard
@@ -30,7 +33,13 @@ function runtests(name, path, isolate=true; seed=nothing)
             original_env = copy(ENV)
             original_project = Base.active_project()
 
-            Base.include(m, "$path.jl")
+            try
+                Base.include(m, "$path.jl")
+            finally
+                if Base.get_bool_env("CI", false)
+                    BuildkiteTestJSON.serialize_testset_result_file(@__DIR__, Test.get_testset())
+                end
+            end
 
             if Base.DEPOT_PATH != original_depot_path
                 msg = "The `$(name)` test set mutated Base.DEPOT_PATH and did not restore the original values"
@@ -98,6 +107,7 @@ function runtests(name, path, isolate=true; seed=nothing)
         ex isa TestSetException || rethrow()
         return Any[ex]
     end
+    end # withenv
 end
 
 # looking in . messes things up badly