Skip to content

Box::new() doesn't optimize out stack usage #58570

@upsuper

Description

@upsuper

If you feed the compiler with the following code:

#![feature(box_syntax)]

pub fn foo() -> Box<[i32; 1_000_000]> {
    box [0; 1_000_000]
}

pub fn bar() -> Box<[i32; 1_000_000]> {
    Box::new([0; 1_000_000])
}

The compiler would output (from playground) in release mode:

Details
playground::foo: # @playground::foo
# %bb.0:
	pushq	%rbx
	movl	$4000000, %edi          # imm = 0x3D0900
	movl	$4, %esi
	callq	*__rust_alloc@GOTPCREL(%rip)
	testq	%rax, %rax
	je	.LBB0_1
# %bb.2:
	movq	%rax, %rbx
	movl	$4000000, %edx          # imm = 0x3D0900
	movq	%rax, %rdi
	xorl	%esi, %esi
	callq	*memset@GOTPCREL(%rip)
	movq	%rbx, %rax
	popq	%rbx
	retq

.LBB0_1:
	movl	$4000000, %edi          # imm = 0x3D0900
	movl	$4, %esi
	callq	*alloc::alloc::handle_alloc_error@GOTPCREL(%rip)
	ud2
                                        # -- End function

playground::bar: # @playground::bar
# %bb.0:
	pushq	%rbx
	movl	$4000000, %eax          # imm = 0x3D0900
	callq	__rust_probestack
	subq	%rax, %rsp
	movq	%rsp, %rdi
	movl	$4000000, %edx          # imm = 0x3D0900
	xorl	%esi, %esi
	callq	*memset@GOTPCREL(%rip)
	movl	$4000000, %edi          # imm = 0x3D0900
	movl	$4, %esi
	callq	*__rust_alloc@GOTPCREL(%rip)
	testq	%rax, %rax
	je	.LBB1_1
# %bb.2:
	movq	%rax, %rbx
	movq	%rsp, %rsi
	movl	$4000000, %edx          # imm = 0x3D0900
	movq	%rax, %rdi
	callq	*memcpy@GOTPCREL(%rip)
	movq	%rbx, %rax
	addq	$4000000, %rsp          # imm = 0x3D0900
	popq	%rbx
	retq

.LBB1_1:
	movl	$4000000, %edi          # imm = 0x3D0900
	movl	$4, %esi
	callq	*alloc::alloc::handle_alloc_error@GOTPCREL(%rip)
	ud2
                                        # -- End function

It can be seen clearly that foo does the allocation first and then runs memset, while bar clearly runs the memset before allocation, and it then memcpy the content into the box.

This is not very optimal. Users should be able to expect Box::new() to have no stack allocation.

It seems that Box::new() is implemented via box-syntax, and it even has #[inline(always)], so they shouldn't really have any difference here...

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions