|
| 1 | +From d914f133f8d6770cfd230e1b55aef67c16d19473 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Elliot Saba < [email protected]> |
| 3 | +Date: Tue, 7 Sep 2021 10:10:49 -0400 |
| 4 | +Subject: [PATCH] [X86] Don't clobber EBX in stackprobes |
| 5 | + |
| 6 | +On X86, the stackprobe emission code chooses the `R11D` register, which |
| 7 | +is illegal on i686. This ends up wrapping around to `EBX`, which does |
| 8 | +not get properly callee-saved within the stack probing prologue, |
| 9 | +clobbering the register for the callers. |
| 10 | + |
| 11 | +We fix this by explicitly using `EAX` as the stack probe register. |
| 12 | + |
| 13 | +Reviewed By: pengfei |
| 14 | + |
| 15 | +Differential Revision: https://reviews.llvm.org/D109203 |
| 16 | +--- |
| 17 | + llvm/lib/Target/X86/X86FrameLowering.cpp | 8 +- |
| 18 | + llvm/test/CodeGen/X86/stack-clash-large.ll | 97 +++++++++++++--------- |
| 19 | + 2 files changed, 64 insertions(+), 41 deletions(-) |
| 20 | + |
| 21 | +diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp |
| 22 | +index 4cde7971e597..86cb86b19d62 100644 |
| 23 | +--- a/llvm/lib/Target/X86/X86FrameLowering.cpp |
| 24 | ++++ b/llvm/lib/Target/X86/X86FrameLowering.cpp |
| 25 | +@@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( |
| 26 | + MF.insert(MBBIter, testMBB); |
| 27 | + MF.insert(MBBIter, tailMBB); |
| 28 | + |
| 29 | +- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; |
| 30 | ++ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
| 31 | ++ : Is64Bit ? X86::R11D |
| 32 | ++ : X86::EAX; |
| 33 | + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) |
| 34 | + .addReg(StackPtr) |
| 35 | + .setMIFlag(MachineInstr::FrameSetup); |
| 36 | +@@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, |
| 37 | + MF.insert(MBBIter, bodyMBB); |
| 38 | + MF.insert(MBBIter, footMBB); |
| 39 | + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
| 40 | +- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; |
| 41 | ++ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
| 42 | ++ : Is64Bit ? X86::R11D |
| 43 | ++ : X86::EAX; |
| 44 | + |
| 45 | + // Setup entry block |
| 46 | + { |
| 47 | +diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll |
| 48 | +index 9129e4ed40fd..00c7843b54f5 100644 |
| 49 | +--- a/llvm/test/CodeGen/X86/stack-clash-large.ll |
| 50 | ++++ b/llvm/test/CodeGen/X86/stack-clash-large.ll |
| 51 | +@@ -1,45 +1,64 @@ |
| 52 | +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp |
| 53 | +-; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s |
| 54 | +-; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s |
| 55 | ++; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s |
| 56 | ++; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s |
| 57 | ++; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s |
| 58 | + |
| 59 | + define i32 @foo() local_unnamed_addr #0 { |
| 60 | +-; CHECK-X86-64-LABEL: foo: |
| 61 | +-; CHECK-X86-64: # %bb.0: |
| 62 | +-; CHECK-X86-64-NEXT: movq %rsp, %r11 |
| 63 | +-; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000 |
| 64 | +-; CHECK-X86-64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| 65 | +-; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 |
| 66 | +-; CHECK-X86-64-NEXT: movq $0, (%rsp) |
| 67 | +-; CHECK-X86-64-NEXT: cmpq %r11, %rsp |
| 68 | +-; CHECK-X86-64-NEXT: jne .LBB0_1 |
| 69 | +-; CHECK-X86-64-NEXT: # %bb.2: |
| 70 | +-; CHECK-X86-64-NEXT: subq $2248, %rsp # imm = 0x8C8 |
| 71 | +-; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 |
| 72 | +-; CHECK-X86-64-NEXT: movl $1, 264(%rsp) |
| 73 | +-; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) |
| 74 | +-; CHECK-X86-64-NEXT: movl -128(%rsp), %eax |
| 75 | +-; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8 |
| 76 | +-; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 |
| 77 | +-; CHECK-X86-64-NEXT: retq |
| 78 | ++; CHECK-X64-LABEL: foo: |
| 79 | ++; CHECK-X64: # %bb.0: |
| 80 | ++; CHECK-X64-NEXT: movq %rsp, %r11 |
| 81 | ++; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000 |
| 82 | ++; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| 83 | ++; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000 |
| 84 | ++; CHECK-X64-NEXT: movq $0, (%rsp) |
| 85 | ++; CHECK-X64-NEXT: cmpq %r11, %rsp |
| 86 | ++; CHECK-X64-NEXT: jne .LBB0_1 |
| 87 | ++; CHECK-X64-NEXT: # %bb.2: |
| 88 | ++; CHECK-X64-NEXT: subq $2248, %rsp # imm = 0x8C8 |
| 89 | ++; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888 |
| 90 | ++; CHECK-X64-NEXT: movl $1, 264(%rsp) |
| 91 | ++; CHECK-X64-NEXT: movl $1, 28664(%rsp) |
| 92 | ++; CHECK-X64-NEXT: movl -128(%rsp), %eax |
| 93 | ++; CHECK-X64-NEXT: addq $71880, %rsp # imm = 0x118C8 |
| 94 | ++; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 |
| 95 | ++; CHECK-X64-NEXT: retq |
| 96 | + ; |
| 97 | +-; CHECK-X86-32-LABEL: foo: |
| 98 | +-; CHECK-X86-32: # %bb.0: |
| 99 | +-; CHECK-X86-32-NEXT: movl %esp, %r11d |
| 100 | +-; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000 |
| 101 | +-; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| 102 | +-; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 |
| 103 | +-; CHECK-X86-32-NEXT: movl $0, (%esp) |
| 104 | +-; CHECK-X86-32-NEXT: cmpl %r11d, %esp |
| 105 | +-; CHECK-X86-32-NEXT: jne .LBB0_1 |
| 106 | +-; CHECK-X86-32-NEXT: # %bb.2: |
| 107 | +-; CHECK-X86-32-NEXT: subl $2380, %esp # imm = 0x94C |
| 108 | +-; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 |
| 109 | +-; CHECK-X86-32-NEXT: movl $1, 392(%esp) |
| 110 | +-; CHECK-X86-32-NEXT: movl $1, 28792(%esp) |
| 111 | +-; CHECK-X86-32-NEXT: movl (%esp), %eax |
| 112 | +-; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C |
| 113 | +-; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 |
| 114 | +-; CHECK-X86-32-NEXT: retl |
| 115 | ++; CHECK-X86-LABEL: foo: |
| 116 | ++; CHECK-X86: # %bb.0: |
| 117 | ++; CHECK-X86-NEXT: movl %esp, %eax |
| 118 | ++; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000 |
| 119 | ++; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| 120 | ++; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000 |
| 121 | ++; CHECK-X86-NEXT: movl $0, (%esp) |
| 122 | ++; CHECK-X86-NEXT: cmpl %eax, %esp |
| 123 | ++; CHECK-X86-NEXT: jne .LBB0_1 |
| 124 | ++; CHECK-X86-NEXT: # %bb.2: |
| 125 | ++; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C |
| 126 | ++; CHECK-X86-NEXT: .cfi_def_cfa_offset 72016 |
| 127 | ++; CHECK-X86-NEXT: movl $1, 392(%esp) |
| 128 | ++; CHECK-X86-NEXT: movl $1, 28792(%esp) |
| 129 | ++; CHECK-X86-NEXT: movl (%esp), %eax |
| 130 | ++; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C |
| 131 | ++; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 |
| 132 | ++; CHECK-X86-NEXT: retl |
| 133 | ++; |
| 134 | ++; CHECK-X32-LABEL: foo: |
| 135 | ++; CHECK-X32: # %bb.0: |
| 136 | ++; CHECK-X32-NEXT: movl %esp, %r11d |
| 137 | ++; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000 |
| 138 | ++; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| 139 | ++; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000 |
| 140 | ++; CHECK-X32-NEXT: movq $0, (%esp) |
| 141 | ++; CHECK-X32-NEXT: cmpl %r11d, %esp |
| 142 | ++; CHECK-X32-NEXT: jne .LBB0_1 |
| 143 | ++; CHECK-X32-NEXT: # %bb.2: |
| 144 | ++; CHECK-X32-NEXT: subl $2248, %esp # imm = 0x8C8 |
| 145 | ++; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888 |
| 146 | ++; CHECK-X32-NEXT: movl $1, 264(%esp) |
| 147 | ++; CHECK-X32-NEXT: movl $1, 28664(%esp) |
| 148 | ++; CHECK-X32-NEXT: movl -128(%esp), %eax |
| 149 | ++; CHECK-X32-NEXT: addl $71880, %esp # imm = 0x118C8 |
| 150 | ++; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 |
| 151 | ++; CHECK-X32-NEXT: retq |
| 152 | + %a = alloca i32, i64 18000, align 16 |
| 153 | + %b0 = getelementptr inbounds i32, i32* %a, i64 98 |
| 154 | + %b1 = getelementptr inbounds i32, i32* %a, i64 7198 |
| 155 | +-- |
| 156 | +2.33.0 |
| 157 | + |
0 commit comments