Skip to content

[BOLT] Gadget scanner: factor out utility code #131895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 84 additions & 52 deletions bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "bolt/Passes/NonPacProtectedRetAnalysis.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Passes/DataflowAnalysis.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Format.h"
Expand Down Expand Up @@ -58,6 +59,71 @@ raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {

namespace NonPacProtectedRetAnalysis {

static void traceInst(const BinaryContext &BC, StringRef Label,
const MCInst &MI) {
dbgs() << " " << Label << ": ";
BC.printInstruction(dbgs(), MI);
}

static void traceReg(const BinaryContext &BC, StringRef Label,
ErrorOr<MCPhysReg> Reg) {
dbgs() << " " << Label << ": ";
if (Reg.getError())
dbgs() << "(error)";
else if (*Reg == BC.MIB->getNoRegister())
dbgs() << "(none)";
else
dbgs() << BC.MRI->getName(*Reg);
dbgs() << "\n";
}

static void traceRegMask(const BinaryContext &BC, StringRef Label,
BitVector Mask) {
dbgs() << " " << Label << ": ";
RegStatePrinter(BC).print(dbgs(), Mask);
dbgs() << "\n";
}

// This class represents mapping from a set of arbitrary physical registers to
// consecutive array indexes.
class TrackedRegisters {
static constexpr uint16_t NoIndex = -1;
const std::vector<MCPhysReg> Registers;
std::vector<uint16_t> RegToIndexMapping;

static size_t getMappingSize(const std::vector<MCPhysReg> &RegsToTrack) {
if (RegsToTrack.empty())
return 0;
return 1 + *llvm::max_element(RegsToTrack);
}

public:
TrackedRegisters(const std::vector<MCPhysReg> &RegsToTrack)
: Registers(RegsToTrack),
RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
for (unsigned I = 0; I < RegsToTrack.size(); ++I)
RegToIndexMapping[RegsToTrack[I]] = I;
}

const ArrayRef<MCPhysReg> getRegisters() const { return Registers; }

size_t getNumTrackedRegisters() const { return Registers.size(); }

bool empty() const { return Registers.empty(); }

bool isTracked(MCPhysReg Reg) const {
bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() &&
RegToIndexMapping[Reg] != NoIndex;
assert(IsTracked == llvm::is_contained(Registers, Reg));
return IsTracked;
}

unsigned getIndex(MCPhysReg Reg) const {
assert(isTracked(Reg) && "Register is not tracked");
return RegToIndexMapping[Reg];
}
};

// The security property that is checked is:
// When a register is used as the address to jump to in a return instruction,
// that register must either:
Expand Down Expand Up @@ -169,52 +235,34 @@ class PacRetAnalysis
PacRetAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
const std::vector<MCPhysReg> &RegsToTrackInstsFor)
: Parent(BF, AllocId), NumRegs(BF.getBinaryContext().MRI->getNumRegs()),
RegsToTrackInstsFor(RegsToTrackInstsFor),
TrackingLastInsts(!RegsToTrackInstsFor.empty()),
Reg2StateIdx(RegsToTrackInstsFor.empty()
? 0
: *llvm::max_element(RegsToTrackInstsFor) + 1,
-1) {
for (unsigned I = 0; I < RegsToTrackInstsFor.size(); ++I)
Reg2StateIdx[RegsToTrackInstsFor[I]] = I;
}
RegsToTrackInstsFor(RegsToTrackInstsFor) {}
virtual ~PacRetAnalysis() {}

protected:
const unsigned NumRegs;
/// RegToTrackInstsFor is the set of registers for which the dataflow analysis
/// must compute which the last set of instructions writing to it are.
const std::vector<MCPhysReg> RegsToTrackInstsFor;
const bool TrackingLastInsts;
/// Reg2StateIdx maps Register to the index in the vector used in State to
/// track which instructions last wrote to this register.
std::vector<uint16_t> Reg2StateIdx;
const TrackedRegisters RegsToTrackInstsFor;

SmallPtrSet<const MCInst *, 4> &lastWritingInsts(State &S,
MCPhysReg Reg) const {
assert(Reg < Reg2StateIdx.size());
assert(isTrackingReg(Reg));
return S.LastInstWritingReg[Reg2StateIdx[Reg]];
unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
return S.LastInstWritingReg[Index];
}
const SmallPtrSet<const MCInst *, 4> &lastWritingInsts(const State &S,
MCPhysReg Reg) const {
assert(Reg < Reg2StateIdx.size());
assert(isTrackingReg(Reg));
return S.LastInstWritingReg[Reg2StateIdx[Reg]];
}

bool isTrackingReg(MCPhysReg Reg) const {
return llvm::is_contained(RegsToTrackInstsFor, Reg);
unsigned Index = RegsToTrackInstsFor.getIndex(Reg);
return S.LastInstWritingReg[Index];
}

void preflight() {}

State getStartingStateAtBB(const BinaryBasicBlock &BB) {
return State(NumRegs, RegsToTrackInstsFor.size());
return State(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
}

State getStartingStateAtPoint(const MCInst &Point) {
return State(NumRegs, RegsToTrackInstsFor.size());
return State(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
}

void doConfluence(State &StateOut, const State &StateIn) {
Expand Down Expand Up @@ -275,7 +323,7 @@ class PacRetAnalysis
Next.NonAutClobRegs |= Written;
// Keep track of this instruction if it writes to any of the registers we
// need to track that for:
for (MCPhysReg Reg : RegsToTrackInstsFor)
for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters())
if (Written[Reg])
lastWritingInsts(Next, Reg) = {&Point};

Expand All @@ -287,7 +335,7 @@ class PacRetAnalysis
// https://github.com/llvm/llvm-project/pull/122304#discussion_r1939515516
Next.NonAutClobRegs.reset(
BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true));
if (TrackingLastInsts && isTrackingReg(*AutReg))
if (RegsToTrackInstsFor.isTracked(*AutReg))
lastWritingInsts(Next, *AutReg).clear();
}

Expand All @@ -306,7 +354,7 @@ class PacRetAnalysis
std::vector<MCInstReference>
getLastClobberingInsts(const MCInst Ret, BinaryFunction &BF,
const BitVector &UsedDirtyRegs) const {
if (!TrackingLastInsts)
if (RegsToTrackInstsFor.empty())
return {};
auto MaybeState = getStateAt(Ret);
if (!MaybeState)
Expand Down Expand Up @@ -355,28 +403,18 @@ Analysis::computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF,
}
MCPhysReg RetReg = *MaybeRetReg;
LLVM_DEBUG({
dbgs() << " Found RET inst: ";
BC.printInstruction(dbgs(), Inst);
dbgs() << " RetReg: " << BC.MRI->getName(RetReg)
<< "; authenticatesReg: "
<< BC.MIB->isAuthenticationOfReg(Inst, RetReg) << "\n";
traceInst(BC, "Found RET inst", Inst);
traceReg(BC, "RetReg", RetReg);
traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
});
if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
break;
BitVector UsedDirtyRegs = PRA.getStateAt(Inst)->NonAutClobRegs;
LLVM_DEBUG({
dbgs() << " NonAutClobRegs at Ret: ";
RegStatePrinter RSP(BC);
RSP.print(dbgs(), UsedDirtyRegs);
dbgs() << "\n";
});
LLVM_DEBUG(
{ traceRegMask(BC, "NonAutClobRegs at Ret", UsedDirtyRegs); });
UsedDirtyRegs &= BC.MIB->getAliases(RetReg, /*OnlySmaller=*/true);
LLVM_DEBUG({
dbgs() << " Intersection with RetReg: ";
RegStatePrinter RSP(BC);
RSP.print(dbgs(), UsedDirtyRegs);
dbgs() << "\n";
});
LLVM_DEBUG(
{ traceRegMask(BC, "Intersection with RetReg", UsedDirtyRegs); });
if (UsedDirtyRegs.any()) {
// This return instruction needs to be reported
Result.Diagnostics.push_back(std::make_shared<Gadget>(
Expand Down Expand Up @@ -472,12 +510,6 @@ void Gadget::generateReport(raw_ostream &OS, const BinaryContext &BC) const {
OS << " " << (I + 1) << ". ";
BC.printInstruction(OS, InstRef, InstRef.getAddress(), BF);
};
LLVM_DEBUG({
dbgs() << " .. OverWritingRetRegInst:\n";
for (MCInstReference Ref : OverwritingRetRegInst) {
dbgs() << " " << Ref << "\n";
}
});
if (OverwritingRetRegInst.size() == 1) {
const MCInstReference OverwInst = OverwritingRetRegInst[0];
assert(OverwInst.ParentKind == MCInstReference::BasicBlockParent);
Expand Down
142 changes: 142 additions & 0 deletions bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// REQUIRES: asserts
//
// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
// RUN: llvm-bolt-binary-analysis --scanners=pacret -no-threads \
// RUN: -debug-only bolt-nonpacprotectedret %t.exe 2>&1 | FileCheck %s

// Check the debug output generated by PAuth gadget scanner to make sure the
// that output is kept meaningful and to provide an overview of what happens
// inside the scanner.

.globl simple
.type simple,@function
simple:
paciasp
b 1f
1:
autiasp
ret
.size simple, .-simple

// CHECK-LABEL:Analyzing in function simple, AllocatorId 1
// CHECK-NEXT: Binary Function "simple" {
// CHECK-NEXT: Number : 1
// CHECK-NEXT: State : CFG constructed
// ...
// CHECK: BB Layout : [[BB0:[0-9a-zA-Z.]+]], [[BB1:[0-9a-zA-Z.]+]]
// CHECK-NEXT: }
// CHECK-NEXT: [[BB0]] (2 instructions, align : 1)
// CHECK-NEXT: Entry Point
// CHECK-NEXT: 00000000: paciasp
// CHECK-NEXT: 00000004: b [[BB1]]
// CHECK-NEXT: Successors: [[BB1]]
// CHECK-EMPTY:
// CHECK-NEXT: [[BB1]] (2 instructions, align : 1)
// CHECK-NEXT: Predecessors: [[BB0]]
// CHECK-NEXT: 00000008: autiasp
// CHECK-NEXT: 0000000c: ret
// CHECK-EMPTY:
// CHECK-NEXT: DWARF CFI Instructions:
// CHECK-NEXT: <empty>
// CHECK-NEXT: End of Function "simple"
// CHECK-EMPTY:
// CHECK-NEXT: PacRetAnalysis::ComputeNext( hint #25, pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: PacRetAnalysis::ComputeNext( b [[BB1]], pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: PacRetAnalysis::Confluence(
// CHECK-NEXT: State 1: pacret-state<NonAutClobRegs: , Insts: >
// CHECK-NEXT: State 2: pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: merged state: pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >
// CHECK-NEXT: PacRetAnalysis::ComputeNext( hint #29, pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: PacRetAnalysis::Confluence(
// CHECK-NEXT: State 1: pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >
// CHECK-NEXT: State 2: pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: merged state: pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >
// CHECK-NEXT: PacRetAnalysis::ComputeNext( hint #29, pacret-state<NonAutClobRegs: LR W30 W30_HI , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: After PacRetAnalysis:
// CHECK-NEXT: Binary Function "simple" {
// CHECK-NEXT: Number : 1
// CHECK-NEXT: State : CFG constructed
// ...
// CHECK: BB Layout : [[BB0]], [[BB1]]
// CHECK-NEXT: }
// CHECK-NEXT: [[BB0]] (2 instructions, align : 1)
// CHECK-NEXT: Entry Point
// CHECK-NEXT: 00000000: paciasp # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: >
// CHECK-NEXT: 00000004: b [[BB1]] # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: >
// CHECK-NEXT: Successors: [[BB1]]
// CHECK-EMPTY:
// CHECK-NEXT: [[BB1]] (2 instructions, align : 1)
// CHECK-NEXT: Predecessors: [[BB0]]
// CHECK-NEXT: 00000008: autiasp # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: >
// CHECK-NEXT: 0000000c: ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: >
// CHECK-EMPTY:
// CHECK-NEXT: DWARF CFI Instructions:
// CHECK-NEXT: <empty>
// CHECK-NEXT: End of Function "simple"
// CHECK-EMPTY:
// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: >
// CHECK-NEXT: RetReg: LR
// CHECK-NEXT: Authenticated reg: (none)
// CHECK-NEXT: NonAutClobRegs at Ret:{{[ \t]*$}}
// CHECK-NEXT: Intersection with RetReg:{{[ \t]*$}}

.globl clobber
.type clobber,@function
clobber:
mov w30, #0
ret
.size clobber, .-clobber

// CHECK-LABEL:Analyzing in function clobber, AllocatorId 1
// ...
// CHECK: PacRetAnalysis::ComputeNext( mov w30, #0x0, pacret-state<NonAutClobRegs: , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: W30 , Insts: >)
// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state<NonAutClobRegs: W30 , Insts: >)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: W30 , Insts: >)
// CHECK-NEXT: After PacRetAnalysis:
// CHECK-NEXT: Binary Function "clobber" {
// ...
// CHECK: End of Function "clobber"

// The above output was printed after first run of analysis

// CHECK-EMPTY:
// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: >
// CHECK-NEXT: RetReg: LR
// CHECK-NEXT: Authenticated reg: (none)
// CHECK-NEXT: NonAutClobRegs at Ret: W30
// CHECK-NEXT: Intersection with RetReg: W30
// CHECK-NEXT: PacRetAnalysis::ComputeNext( mov w30, #0x0, pacret-state<NonAutClobRegs: , Insts: [0]()>)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
// CHECK-NEXT: After PacRetAnalysis:
// CHECK-NEXT: Binary Function "clobber" {
// ...
// CHECK: End of Function "clobber"

// The analysis was re-computed with register tracking, as an issue was found in this function.
// Re-checking the instructions:

// CHECK-EMPTY:
// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
// CHECK-NEXT: RetReg: LR
// CHECK-NEXT: Authenticated reg: (none)
// CHECK-NEXT: NonAutClobRegs at Ret: W30
// CHECK-NEXT: Intersection with RetReg: W30


// CHECK-LABEL:Analyzing in function main, AllocatorId 1
.globl main
.type main,@function
main:
ret
.size main, .-main