Skip to content

Commit 665817b

Browse files
committed
[Profile] add builtin Allocs.print and formatting equivalents to Profile.print
Defines a converter for Allocs to the types implemented by Profile for printing, allowing flamegraph report generation of allocations with just the stdlib tooling. Refs: #42768
1 parent 5db9dbd commit 665817b

File tree

5 files changed

+246
-27
lines changed

5 files changed

+246
-27
lines changed

stdlib/Profile/docs/src/index.md

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
106106

107107
```@docs
108108
Profile.Allocs.clear
109+
Profile.Allocs.print
109110
Profile.Allocs.fetch
110111
Profile.Allocs.start
111112
Profile.Allocs.stop

stdlib/Profile/src/Allocs.jl

+208-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
module Allocs
22

3+
global print # Allocs.print is separate from both Base.print and Profile.print
4+
5+
using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
36
using Base.StackTraces: StackTrace, StackFrame, lookup
47
using Base: InterpreterIP
58

@@ -138,7 +141,7 @@ end
138141
# Without this, the Alloc's stacktrace prints for lines and lines and lines...
139142
function Base.show(io::IO, a::Alloc)
140143
stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
141-
print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
144+
Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
142145
end
143146

144147
const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
@@ -216,4 +219,208 @@ function stacktrace_memoized(
216219
return stack
217220
end
218221

222+
function warning_empty()
223+
@warn """
224+
There were no samples collected.
225+
Run your program longer (perhaps by running it multiple times),
226+
or adjust the frequency of samples to record every event with
227+
the `sample_rate=1.0` kwarg."""
228+
end
229+
230+
231+
print(; kwargs...) =
232+
Profile.print(stdout, fetch(); kwargs...)
233+
print(io::IO; kwargs...) =
234+
Profile.print(io, fetch(); kwargs...)
235+
print(io::IO, data::AllocResults; kwargs...) =
236+
Profile.print(io, data; kwargs...)
237+
238+
"""
239+
Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
240+
241+
Prints profiling results to `io` (by default, `stdout`). If you do not
242+
supply a `data` vector, the internal buffer of accumulated backtraces
243+
will be used.
244+
"""
245+
function Profile.print(io::IO,
246+
data::AllocResults,
247+
;
248+
format = :tree,
249+
C = false,
250+
#combine = true,
251+
maxdepth::Int = typemax(Int),
252+
mincount::Int = 0,
253+
noisefloor = 0,
254+
sortedby::Symbol = :filefuncline,
255+
groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
256+
recur::Symbol = :off,
257+
)
258+
259+
pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
260+
Profile.print(io, data, pf, format)
261+
return
262+
end
263+
264+
"""
265+
Profile.Allocs.print([io::IO = stdout,] data::AllocResults=fetch(); kwargs...)
266+
267+
Prints profiling results to `io`.
268+
269+
See `Profile.Allocs.print([io], data)` for an explanation of the valid keyword arguments.
270+
"""
271+
Profile.print(data::AllocResults; kwargs...) =
272+
Profile.print(stdout, data; kwargs...)
273+
274+
function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
275+
cols::Int = Base.displaysize(io)[2]
276+
fmt.recur (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
277+
data = data.allocs
278+
if format === :tree
279+
tree(io, data, cols, fmt)
280+
elseif format === :flat
281+
fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
282+
flat(io, data, cols, fmt)
283+
else
284+
throw(ArgumentError("output format $(repr(format)) not recognized"))
285+
end
286+
nothing
287+
end
288+
289+
290+
function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
291+
lilist = StackFrame[]
292+
n = Int[]
293+
m = Int[]
294+
lilist_idx = Dict{T, Int}()
295+
recursive = Set{T}()
296+
totalbytes = 0
297+
for r in data
298+
first = true
299+
empty!(recursive)
300+
nb = r.size # or 1 for counting
301+
totalbytes += nb
302+
for frame in r.stacktrace
303+
!C && frame.from_c && continue
304+
key = (T === UInt64 ? ip : frame)
305+
idx = get!(lilist_idx, key, length(lilist) + 1)
306+
if idx > length(lilist)
307+
push!(recursive, key)
308+
push!(lilist, frame)
309+
push!(n, nb)
310+
push!(m, 0)
311+
elseif !(key in recursive)
312+
push!(recursive, key)
313+
n[idx] += nb
314+
end
315+
if first
316+
m[idx] += nb
317+
first = false
318+
end
319+
end
320+
end
321+
@assert length(lilist) == length(n) == length(m) == length(lilist_idx)
322+
return (lilist, n, m, totalbytes)
323+
end
324+
325+
function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
326+
fmt.combine || error(ArgumentError("combine=false"))
327+
lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
328+
filenamemap = Dict{Symbol,String}()
329+
if isempty(lilist)
330+
warning_empty()
331+
return true
332+
end
333+
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
334+
Base.println(io, "Total snapshots: ", length(data))
335+
Base.println(io, "Total bytes: ", totalbytes)
336+
return false
337+
end
338+
339+
function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
340+
tops = Vector{StackFrameTree{T}}()
341+
build = Dict{T, StackFrameTree{T}}()
342+
for r in all
343+
first = true
344+
nb = r.size # or 1 for counting
345+
root.recur = 0
346+
root.count += nb
347+
parent = root
348+
for i in reverse(eachindex(r.stacktrace))
349+
frame = r.stacktrace[i]
350+
key = (T === UInt64 ? ip : frame)
351+
if (recur === :flat && !frame.from_c) || recur === :flatc
352+
# see if this frame already has a parent
353+
this = get!(build, frame, parent)
354+
if this !== parent
355+
# Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
356+
push!(tops, parent)
357+
parent = this
358+
end
359+
end
360+
!C && frame.from_c && continue
361+
this = get!(StackFrameTree{T}, parent.down, key)
362+
if recur === :off || this.recur == 0
363+
this.frame = frame
364+
this.up = parent
365+
this.count += nb
366+
this.recur = 1
367+
else
368+
this.count_recur += 1
369+
end
370+
parent = this
371+
end
372+
parent.overhead += nb
373+
if recur !== :off
374+
# We mark all visited nodes to so we'll only count those branches
375+
# once for each backtrace. Reset that now for the next backtrace.
376+
empty!(build)
377+
push!(tops, parent)
378+
for top in tops
379+
while top.recur != 0
380+
top.max_recur < top.recur && (top.max_recur = top.recur)
381+
top.recur = 0
382+
top = top.up
383+
end
384+
end
385+
empty!(tops)
386+
end
387+
let this = parent
388+
while this !== root
389+
this.flat_count += nb
390+
this = this.up
391+
end
392+
end
393+
end
394+
function cleanup!(node::StackFrameTree)
395+
stack = [node]
396+
while !isempty(stack)
397+
node = pop!(stack)
398+
node.recur = 0
399+
empty!(node.builder_key)
400+
empty!(node.builder_value)
401+
append!(stack, values(node.down))
402+
end
403+
nothing
404+
end
405+
cleanup!(root)
406+
return root
407+
end
408+
409+
function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
410+
fmt.combine || error(ArgumentError("combine=false"))
411+
if fmt.combine
412+
root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
413+
else
414+
root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
415+
end
416+
print_tree(io, root, cols, fmt, false)
417+
if isempty(root.down)
418+
warning_empty()
419+
return true
420+
end
421+
Base.println(io, "Total snapshots: ", length(data))
422+
Base.println(io, "Total bytes: ", root.count)
423+
return false
424+
end
425+
219426
end

stdlib/Profile/src/Profile.jl

+6-6
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ function print(io::IO,
220220

221221
pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
222222
if groupby === :none
223-
print(io, data, lidict, pf, format, threads, tasks, false)
223+
print_group(io, data, lidict, pf, format, threads, tasks, false)
224224
else
225225
if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
226226
error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
@@ -244,7 +244,7 @@ function print(io::IO,
244244
printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
245245
for threadid in threadids
246246
printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
247-
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
247+
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
248248
nosamples && (any_nosamples = true)
249249
println(io)
250250
end
@@ -262,7 +262,7 @@ function print(io::IO,
262262
printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
263263
for taskid in taskids
264264
printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
265-
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
265+
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
266266
nosamples && (any_nosamples = true)
267267
println(io)
268268
end
@@ -274,7 +274,7 @@ function print(io::IO,
274274
isempty(taskids) && (any_nosamples = true)
275275
for taskid in taskids
276276
printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
277-
nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
277+
nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
278278
nosamples && (any_nosamples = true)
279279
println(io)
280280
end
@@ -284,7 +284,7 @@ function print(io::IO,
284284
isempty(threadids) && (any_nosamples = true)
285285
for threadid in threadids
286286
printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
287-
nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
287+
nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
288288
nosamples && (any_nosamples = true)
289289
println(io)
290290
end
@@ -306,7 +306,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
306306
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
307307
print(stdout, data, lidict; kwargs...)
308308

309-
function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
309+
function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
310310
format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
311311
is_subsection::Bool = false)
312312
cols::Int = Base.displaysize(io)[2]

stdlib/Profile/test/allocs.jl

+21
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
using Test
22
using Profile: Allocs
33

4+
Allocs.clear()
5+
let iobuf = IOBuffer()
6+
for format in (:tree, :flat)
7+
Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
8+
end
9+
end
10+
411
@testset "alloc profiler doesn't segfault" begin
512
res = Allocs.@profile sample_rate=1.0 begin
613
# test the allocations during compilation
@@ -13,6 +20,20 @@ using Profile: Allocs
1320
@test first_alloc.size > 0
1421
@test length(first_alloc.stacktrace) > 0
1522
@test length(string(first_alloc.type)) > 0
23+
24+
# test printing options
25+
for options in ((format=:tree, C=true),
26+
(format=:tree, maxdepth=2),
27+
(format=:flat, C=true),
28+
(),
29+
(format=:flat, sortedby=:count),
30+
(format=:tree, recur=:flat),
31+
)
32+
iobuf = IOBuffer()
33+
Allocs.print(iobuf; options...)
34+
str = String(take!(iobuf))
35+
@test !isempty(str)
36+
end
1637
end
1738

1839
@testset "alloc profiler works when there are multiple tasks on multiple threads" begin

stdlib/Profile/test/runtests.jl

+10-20
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,18 @@ let r = Profile.retrieve()
3838
end
3939
end
4040

41-
let iobuf = IOBuffer()
42-
Profile.print(iobuf, format=:tree, C=true)
43-
str = String(take!(iobuf))
44-
@test !isempty(str)
45-
truncate(iobuf, 0)
46-
Profile.print(iobuf, format=:tree, maxdepth=2)
47-
str = String(take!(iobuf))
48-
@test !isempty(str)
49-
truncate(iobuf, 0)
50-
Profile.print(iobuf, format=:flat, C=true)
51-
str = String(take!(iobuf))
52-
@test !isempty(str)
53-
truncate(iobuf, 0)
54-
Profile.print(iobuf)
55-
@test !isempty(String(take!(iobuf)))
56-
truncate(iobuf, 0)
57-
Profile.print(iobuf, format=:flat, sortedby=:count)
58-
@test !isempty(String(take!(iobuf)))
59-
Profile.print(iobuf, format=:tree, recur=:flat)
41+
# test printing options
42+
for options in ((format=:tree, C=true),
43+
(format=:tree, maxdepth=2),
44+
(format=:flat, C=true),
45+
(),
46+
(format=:flat, sortedby=:count),
47+
(format=:tree, recur=:flat),
48+
)
49+
iobuf = IOBuffer()
50+
Profile.print(iobuf; options...)
6051
str = String(take!(iobuf))
6152
@test !isempty(str)
62-
truncate(iobuf, 0)
6353
end
6454

6555
@testset "Profile.print() groupby options" begin

0 commit comments

Comments
 (0)