Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/cmd/compile/internal/ir/symtab.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ type symsStruct struct {
InterfaceSwitch *obj.LSym
MallocGC *obj.LSym
MallocGCTiny *obj.LSym
MallocGCSmallNoScan [27]*obj.LSym
MallocGCSmallScanNoHeader [27]*obj.LSym
MallocGCSmallNoScan [11]*obj.LSym
MallocGCSmallScanNoHeader [11]*obj.LSym
Memmove *obj.LSym
Memequal *obj.LSym
Msanread *obj.LSym
Expand Down
7 changes: 2 additions & 5 deletions src/cmd/compile/internal/ssagen/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -806,11 +806,8 @@ func (s *state) specializedMallocSym(size int64, hasPointers bool) *obj.LSym {
if !s.sizeSpecializedMallocEnabled() {
return nil
}
ptrSize := s.config.PtrSize
ptrBits := ptrSize * 8
minSizeForMallocHeader := ptrSize * ptrBits
heapBitsInSpan := size <= minSizeForMallocHeader
if !heapBitsInSpan {
const specializedMallocMax = 128 // This must match the constant in mkmalloc.
if size > specializedMallocMax {
return nil
}
divRoundUp := func(n, a uintptr) uintptr { return (n + a - 1) / a }
Expand Down
10 changes: 6 additions & 4 deletions src/runtime/_mkmalloc/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ const (
maxPtrSize = max(4, 8)
maxPtrBits = 8 * maxPtrSize

// Maximum size smallScanNoHeader would be called for, which is the
// maximum value gc.MinSizeForMallocHeader can have on any platform.
// gc.MinSizeForMallocHeader is defined as goarch.PtrSize * goarch.PtrBits.
smallScanNoHeaderMax = maxPtrSize * maxPtrBits
// Maximum size to generate size specialized functions for.
// We've seen very limited benefit for specialized functions for larger
// size classes, and with the wrapper they are sometimes slower
// than the non-specialized functions.
// This must match the constant in the compiler.
specializedMallocMax = 128
)
98 changes: 87 additions & 11 deletions src/runtime/_mkmalloc/mkmalloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ var stdout = flag.Bool("stdout", false, "write sizeclasses source to stdout inst

func makeSizeToSizeClass(classes []class) []uint8 {
sc := uint8(0)
ret := make([]uint8, smallScanNoHeaderMax+1)
ret := make([]uint8, benchmarkMax+1)
for i := range ret {
if i > classes[sc].size {
sc++
Expand Down Expand Up @@ -63,6 +63,12 @@ func main() {
if err := os.WriteFile(tablefile, mustFormat(generateTable(sizeToSizeClass)), 0666); err != nil {
log.Fatal(err)
}

benchmarkFile := "../malloc_bench_generated_test.go"
if err := os.WriteFile(benchmarkFile, mustFormat(append(inline(benchmarkConfig(classes, sizeToSizeClass)), []byte(generateTopBenchmark(classes, sizeToSizeClass))...)), 0666); err != nil {
log.Fatal(err)
}

}

// withLineNumbers returns b with line numbers added to help debugging.
Expand Down Expand Up @@ -140,10 +146,10 @@ func smallNoScanSCFuncName(sc, scMax uint8) string {
func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generatorConfig {
config := generatorConfig{file: "../malloc_stubs.go"}

// Only generate specialized functions for sizes that don't have
// a header on 64-bit platforms. (They may have a header on 32-bit, but
// we will fall back to the non-specialized versions in that case)
scMax := sizeToSizeClass[smallScanNoHeaderMax]
// Only generate specialized functions for sizes up to specializedMallocMax.
// We've noticed limited benefit (or sometimes worse performance) for specialized
// functions for larger sizes, and having too many functions causes icache issues.
scMax := sizeToSizeClass[specializedMallocMax]

str := fmt.Sprint

Expand Down Expand Up @@ -626,27 +632,27 @@ func replaceWithAssignment(cursor *astutil.Cursor, lhs, rhs []ast.Expr, tok toke

// generateTable generates the file with the jump tables for the specialized malloc functions.
func generateTable(sizeToSizeClass []uint8) []byte {
scMax := sizeToSizeClass[smallScanNoHeaderMax]
scMax := sizeToSizeClass[specializedMallocMax]

var b bytes.Buffer
fmt.Fprintln(&b, `// Code generated by mkmalloc.go; DO NOT EDIT.
fmt.Fprintf(&b, `// Code generated by mkmalloc.go; DO NOT EDIT.
//go:build !plan9

package runtime

import "unsafe"

var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
var mallocScanTable = [129]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)

for i := range uintptr(smallScanNoHeaderMax + 1) {
for i := range uintptr(specializedMallocMax + 1) {
fmt.Fprintf(&b, "%s,\n", smallScanNoHeaderSCFuncName(sizeToSizeClass[i], scMax))
}

fmt.Fprintln(&b, `
}

var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
for i := range uintptr(smallScanNoHeaderMax + 1) {
var mallocNoScanTable = [129]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
for i := range uintptr(specializedMallocMax + 1) {
if i < 16 {
fmt.Fprintf(&b, "%s,\n", "mallocPanic")
} else {
Expand All @@ -659,3 +665,73 @@ var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsaf

return b.Bytes()
}

// Generate benchmarks for all potentially small sizes
// (sizes for which smallScanNoHeader would be called)
// gc.MinSizeForMallocHeader is defined as goarch.PtrSize * goarch.PtrBits.

const benchmarkMax = maxPtrSize * maxPtrBits

// benchmarkConfig produces an inlining config to stamp out microbenchmarks.
func benchmarkConfig(classes []class, sizeToSizeClass []uint8) generatorConfig {
config := generatorConfig{file: "../malloc_stubs_test.go"}

scMax := sizeToSizeClass[benchmarkMax]

str := fmt.Sprint

for sc := uint8(1); sc <= scMax; sc++ {
elemsize := classes[sc].size
config.specs = append(config.specs, spec{
templateFunc: "benchmarkStub",
name: fmt.Sprintf("benchmarkMallocgcNoscan%d", elemsize),
ops: []op{
{subBasicLit, "size_", str(elemsize)},
{foldCondition, "noscan_", str(true)},
},
})
config.specs = append(config.specs, spec{
templateFunc: "benchmarkStub",
name: fmt.Sprintf("benchmarkMallocgcScan%d", elemsize),
ops: []op{
{subBasicLit, "size_", str(elemsize)},
{foldCondition, "noscan_", str(false)},
},
})
}

for size := 1; size < tinySize; size++ {
config.specs = append(config.specs, spec{
templateFunc: "benchmarkStubTiny",
name: fmt.Sprintf("benchmarkMallocgcTiny%d", size),
ops: []op{{subBasicLit, "size_", str(size)}, {foldCondition, "noscan_", str(true)}},
})
}

return config
}

func generateTopBenchmark(classes []class, sizeToSizeClass []uint8) string {
scMax := sizeToSizeClass[benchmarkMax]
bench := `func BenchmarkMallocgc(b *testing.B) {
b.Run("scan=noscan", func(b *testing.B) {
`
for size := 1; size < tinySize; size++ {
bench += fmt.Sprintf(`b.Run("size=%d", benchmarkMallocgcTiny%d)`, size, size) + "\n"
}
for sc := uint8(2); sc <= scMax; sc++ {
elemsize := classes[sc].size
bench += fmt.Sprintf(`b.Run("size=%d", benchmarkMallocgcNoscan%d)`, elemsize, elemsize) + "\n"
}
bench += `})
b.Run("scan=scan", func(b *testing.B) {
`
for sc := uint8(1); sc <= scMax; sc++ {
elemsize := classes[sc].size
bench += fmt.Sprintf(`b.Run("size=%d", benchmarkMallocgcScan%d)`, elemsize, elemsize) + "\n"
}
bench += `})
}`

return bench
}
4 changes: 4 additions & 0 deletions src/runtime/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2114,3 +2114,7 @@ func GetScanAlloc() uintptr {
c := getMCache(getg().m)
return c.scanAlloc
}

func MallocGC(size uintptr, typ *abi.Type, needzero bool) unsafe.Pointer {
return mallocgc(size, typ, needzero)
}
9 changes: 6 additions & 3 deletions src/runtime/malloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
return unsafe.Pointer(&zerobase)
}

if sizeSpecializedMallocEnabled && heapBitsInSpan(size) {
if sizeSpecializedMallocEnabled && size < uintptr(len(mallocNoScanTable)) {
if typ == nil || !typ.Pointers() {
if size >= maxTinySize {
return mallocNoScanTable[size](size, typ, needzero)
Expand Down Expand Up @@ -1120,15 +1120,18 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
var x unsafe.Pointer
var elemsize uintptr
if sizeSpecializedMallocEnabled {
// we know that heapBitsInSpan is false.
if size <= maxSmallSize-gc.MallocHeaderSize {
if typ == nil || !typ.Pointers() {
x, elemsize = mallocgcSmallNoscan(size, typ, needzero)
} else {
if !needzero {
throw("objects with pointers must be zeroed")
}
x, elemsize = mallocgcSmallScanHeader(size, typ)
if heapBitsInSpan(size) {
x, elemsize = mallocgcSmallScanNoHeader(size, typ)
} else {
x, elemsize = mallocgcSmallScanHeader(size, typ)
}
}
} else {
x, elemsize = mallocgcLarge(size, typ, needzero)
Expand Down
Loading
Loading