diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index bbef65700b2a5..6d2e51cb4bd92 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -587,6 +587,41 @@ class MCPlusBuilder { return getNoRegister(); } + /// Returns the register containing an address safely materialized by `Inst` + /// under the Pointer Authentication threat model. + /// + /// Returns the register `Inst` writes to if: + /// 1. the register is a materialized address, and + /// 2. the register has been materialized safely, i.e. cannot be attacker- + /// controlled, under the Pointer Authentication threat model. + /// + /// If the instruction does not write to any register satisfying the above + /// two conditions, NoRegister is returned. + /// + /// The Pointer Authentication threat model assumes an attacker is able to + /// modify any writable memory, but not executable code (due to W^X). + virtual MCPhysReg + getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return getNoRegister(); + } + + /// Analyzes if this instruction can safely perform address arithmetics + /// under Pointer Authentication threat model. + /// + /// If an (OutReg, InReg) pair is returned, then after Inst is executed, + /// OutReg is as trusted as InReg is. + /// + /// The arithmetic instruction is considered safe if OutReg is not attacker- + /// controlled, provided InReg and executable code are not. Please note that + /// registers other than InReg as well as the contents of memory which is + /// writable by the process should be considered attacker-controlled. + virtual std::optional> + analyzeAddressArithmeticsForPtrAuth(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return std::make_pair(getNoRegister(), getNoRegister()); + } + virtual bool isTerminator(const MCInst &Inst) const; virtual bool isNoop(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index a3b320c545734..00846247fdc21 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -335,6 +335,49 @@ class PacRetAnalysis }); } + BitVector getClobberedRegs(const MCInst &Point) const { + BitVector Clobbered(NumRegs, false); + // Assume a call can clobber all registers, including callee-saved + // registers. There's a good chance that callee-saved registers will be + // saved on the stack at some point during execution of the callee. + // Therefore they should also be considered as potentially modified by an + // attacker/written to. + // Also, not all functions may respect the AAPCS ABI rules about + // caller/callee-saved registers. + if (BC.MIB->isCall(Point)) + Clobbered.set(); + else + BC.MIB->getClobberedRegs(Point, Clobbered); + return Clobbered; + } + + // Returns all registers that can be treated as if they are written by an + // authentication instruction. + SmallVector getRegsMadeSafeToDeref(const MCInst &Point, + const State &Cur) const { + SmallVector Regs; + const MCPhysReg NoReg = BC.MIB->getNoRegister(); + + // A signed pointer can be authenticated, or + ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); + if (AutReg && *AutReg != NoReg) + Regs.push_back(*AutReg); + + // ... a safe address can be materialized, or + MCPhysReg NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point); + if (NewAddrReg != NoReg) + Regs.push_back(NewAddrReg); + + // ... an address can be updated in a safe manner, producing the result + // which is as trusted as the input address. + if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) { + if (Cur.SafeToDerefRegs[DstAndSrc->second]) + Regs.push_back(DstAndSrc->first); + } + + return Regs; + } + State computeNext(const MCInst &Point, const State &Cur) { PacStatePrinter P(BC); LLVM_DEBUG({ @@ -355,19 +398,16 @@ class PacRetAnalysis return State(); } + // First, compute various properties of the instruction, taking the state + // before its execution into account, if necessary. + + BitVector Clobbered = getClobberedRegs(Point); + SmallVector NewSafeToDerefRegs = + getRegsMadeSafeToDeref(Point, Cur); + + // Then, compute the state after this instruction is executed. State Next = Cur; - BitVector Clobbered(NumRegs, false); - // Assume a call can clobber all registers, including callee-saved - // registers. There's a good chance that callee-saved registers will be - // saved on the stack at some point during execution of the callee. - // Therefore they should also be considered as potentially modified by an - // attacker/written to. - // Also, not all functions may respect the AAPCS ABI rules about - // caller/callee-saved registers. - if (BC.MIB->isCall(Point)) - Clobbered.set(); - else - BC.MIB->getClobberedRegs(Point, Clobbered); + Next.SafeToDerefRegs.reset(Clobbered); // Keep track of this instruction if it writes to any of the registers we // need to track that for: @@ -375,17 +415,18 @@ class PacRetAnalysis if (Clobbered[Reg]) lastWritingInsts(Next, Reg) = {&Point}; - ErrorOr AutReg = BC.MIB->getAuthenticatedReg(Point); - if (AutReg && *AutReg != BC.MIB->getNoRegister()) { - // The sub-registers of *AutReg are also trusted now, but not its - // super-registers (as they retain untrusted register units). - BitVector AuthenticatedSubregs = - BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true); - for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) { - Next.SafeToDerefRegs.set(Reg); - if (RegsToTrackInstsFor.isTracked(Reg)) - lastWritingInsts(Next, Reg).clear(); - } + // After accounting for clobbered registers in general, override the state + // according to authentication and other *special cases* of clobbering. + + // The sub-registers are also safe-to-dereference now, but not their + // super-registers (as they retain untrusted register units). + BitVector NewSafeSubregs(NumRegs); + for (MCPhysReg SafeReg : NewSafeToDerefRegs) + NewSafeSubregs |= BC.MIB->getAliases(SafeReg, /*OnlySmaller=*/true); + for (MCPhysReg Reg : NewSafeSubregs.set_bits()) { + Next.SafeToDerefRegs.set(Reg); + if (RegsToTrackInstsFor.isTracked(Reg)) + lastWritingInsts(Next, Reg).clear(); } LLVM_DEBUG({ diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 2a648baa4d514..f8ae7ce0aa129 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -304,6 +304,43 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } } + MCPhysReg + getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const override { + switch (Inst.getOpcode()) { + case AArch64::ADR: + case AArch64::ADRP: + // These instructions produce an address value based on the information + // encoded into the instruction itself (which should reside in a read-only + // code memory) and the value of PC register (that is, the location of + // this instruction), so the produced value is not attacker-controlled. + return Inst.getOperand(0).getReg(); + default: + return getNoRegister(); + } + } + + std::optional> + analyzeAddressArithmeticsForPtrAuth(const MCInst &Inst) const override { + switch (Inst.getOpcode()) { + default: + return std::nullopt; + case AArch64::ADDXri: + case AArch64::SUBXri: + // The immediate addend is encoded into the instruction itself, so it is + // not attacker-controlled under Pointer Authentication threat model. + return std::make_pair(Inst.getOperand(0).getReg(), + Inst.getOperand(1).getReg()); + case AArch64::ORRXrs: + // "mov Xd, Xm" is equivalent to "orr Xd, XZR, Xm, lsl #0" + if (Inst.getOperand(1).getReg() != AArch64::XZR || + Inst.getOperand(3).getImm() != 0) + return std::nullopt; + + return std::make_pair(Inst.getOperand(0).getReg(), + Inst.getOperand(2).getReg()); + } + } + bool isADRP(const MCInst &Inst) const override { return Inst.getOpcode() == AArch64::ADRP; } diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s index 01b7cec3272e6..d506ec13f4895 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s +++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s @@ -141,24 +141,9 @@ f_nonx30_ret_ok: stp x29, x30, [sp, #-16]! mov x29, sp bl g - add x0, x0, #3 ldp x29, x30, [sp], #16 - // FIXME: Should the scanner understand that an authenticated register (below x30, - // after the autiasp instruction), is OK to be moved to another register - // and then that register being used to return? - // This respects that pac-ret hardening intent, but the scanner currently - // will produce a false positive for this. - // Is it worthwhile to make the scanner more complex for this case? - // So far, scanning many millions of instructions across a linux distro, - // I haven't encountered such an example. - // The ".if 0" block below tests this case and currently fails. -.if 0 autiasp mov x16, x30 -.else - mov x16, x30 - autia x16, sp -.endif // CHECK-NOT: function f_nonx30_ret_ok ret x16 .size f_nonx30_ret_ok, .-f_nonx30_ret_ok diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s new file mode 100644 index 0000000000000..b4dd53a5e3c8d --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s @@ -0,0 +1,244 @@ +// -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR. +// RUN: %clang %cflags -march=armv8.3-a -Wl,--no-relax %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test various patterns that should or should not be considered safe +// materialization of PC-relative addresses. +// +// Note that while "instructions that write to the affected registers" +// section of the report is still technically correct, it does not necessarily +// mention the instructions that are used incorrectly. +// +// FIXME: Switch to PAC* instructions instead of indirect tail call for testing +// if a register is considered safe when detection of signing oracles is +// implemented, as it is more traditional usage of PC-relative constants. +// Moreover, using PAC instructions would improve test robustness, as +// handling of *calls* can be influenced by what BOLT classifies as a +// tail call, for example. + + .text + +// Define a function that is reachable by ADR instruction. + .type sym,@function +sym: + ret + .size sym, .-sym + + .globl good_adr + .type good_adr,@function +good_adr: +// CHECK-NOT: good_adr + adr x0, sym + br x0 + .size good_adr, .-good_adr + + .globl good_adrp + .type good_adrp,@function +good_adrp: +// CHECK-NOT: good_adrp + adrp x0, sym + br x0 + .size good_adrp, .-good_adrp + + .globl good_adrp_add + .type good_adrp_add,@function +good_adrp_add: +// CHECK-NOT: good_adrp_add + adrp x0, sym + add x0, x0, :lo12:sym + br x0 + .size good_adrp_add, .-good_adrp_add + + .globl good_adrp_add_with_const_offset + .type good_adrp_add_with_const_offset,@function +good_adrp_add_with_const_offset: +// CHECK-NOT: good_adrp_add_with_const_offset + adrp x0, sym + add x0, x0, :lo12:sym + add x0, x0, #8 + br x0 + .size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset + + .globl bad_adrp_with_nonconst_offset + .type bad_adrp_with_nonconst_offset,@function +bad_adrp_with_nonconst_offset: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + adrp x0, sym + add x0, x0, x1 + br x0 + .size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset + + .globl bad_split_adrp + .type bad_split_adrp,@function +bad_split_adrp: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW + cbz x2, 1f + adrp x0, sym +1: + add x0, x0, :lo12:sym + br x0 + .size bad_split_adrp, .-bad_split_adrp + +// Materialization of absolute addresses is not handled, as it is not expected +// to be used by real-world code, but can be supported if needed. + + .globl bad_immediate_constant + .type bad_immediate_constant,@function +bad_immediate_constant: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + movz x0, #1234 + br x0 + .size bad_immediate_constant, .-bad_immediate_constant + +// Any ADR or ADRP instruction followed by any number of increments/decrements +// by constant is considered safe. + + .globl good_adr_with_add + .type good_adr_with_add,@function +good_adr_with_add: +// CHECK-NOT: good_adr_with_add + adr x0, sym + add x0, x0, :lo12:sym + br x0 + .size good_adr_with_add, .-good_adr_with_add + + .globl good_adrp_with_add_non_consecutive + .type good_adrp_with_add_non_consecutive,@function +good_adrp_with_add_non_consecutive: +// CHECK-NOT: good_adrp_with_add_non_consecutive + adrp x0, sym + mul x1, x2, x3 + add x0, x0, :lo12:sym + br x0 + .size good_adrp_with_add_non_consecutive, .-good_adrp_with_add_non_consecutive + + .globl good_many_offsets + .type good_many_offsets,@function +good_many_offsets: +// CHECK-NOT: good_many_offsets + adrp x0, sym + add x1, x0, #8 + add x2, x1, :lo12:sym + br x2 + .size good_many_offsets, .-good_many_offsets + + .globl good_negative_offset + .type good_negative_offset,@function +good_negative_offset: +// CHECK-NOT: good_negative_offset + adr x0, sym + sub x1, x0, #8 + br x1 + .size good_negative_offset, .-good_negative_offset + +// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of +// support for address arithmetics, but ORR in general is not. +// This restriction may be relaxed in the future. + + .globl good_mov_reg + .type good_mov_reg,@function +good_mov_reg: +// CHECK-NOT: good_mov_reg + adrp x0, sym + mov x1, x0 + orr x2, xzr, x1 // the same as "mov x2, x1" + br x2 + .size good_mov_reg, .-good_mov_reg + + .globl bad_orr_not_xzr + .type bad_orr_not_xzr,@function +bad_orr_not_xzr: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_xzr, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, x1, x0 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: mov x1, #0 +// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0 +// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL + adrp x0, sym + // The generic case of "orr Xd, Xn, Xm" is not allowed so far, + // even if Xn is known to be safe + movz x1, #0 + orr x2, x1, x0 + br x2 + .size bad_orr_not_xzr, .-bad_orr_not_xzr + + .globl bad_orr_not_lsl0 + .type bad_orr_not_lsl0,@function +bad_orr_not_lsl0: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_lsl0, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL + adrp x0, sym + // Currently, the only allowed form of "orr" is that used by "mov Xd, Xn" alias. + // This can be relaxed in the future. + orr x2, xzr, x0, lsl #1 + br x2 + .size bad_orr_not_lsl0, .-bad_orr_not_lsl0 + +// Check that the input register operands of `add`/`mov` is correct. + + .globl bad_add_input_reg + .type bad_add_input_reg,@function +bad_add_input_reg: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_add_input_reg, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}} +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + adrp x0, sym + add x0, x1, :lo12:sym + br x0 + .size bad_add_input_reg, .-bad_add_input_reg + + .globl bad_mov_input_reg + .type bad_mov_input_reg,@function +bad_mov_input_reg: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_mov_input_reg, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL +// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}} +// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL + adrp x0, sym + mov x0, x1 + br x0 + .size bad_mov_input_reg, .-bad_mov_input_reg + + .globl main + .type main,@function +main: + mov x0, 0 + ret + .size main, .-main