Skip to content

[BOLT] Factor out MCInstReference from gadget scanner (NFC) #138655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/atrosinenko/bolt-gs-trap-instruction
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions bolt/include/bolt/Core/MCInstUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_CORE_MCINSTUTILS_H
#define BOLT_CORE_MCINSTUTILS_H

#include "bolt/Core/BinaryBasicBlock.h"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: drop empty line.

#include <map>
#include <tuple>
#include <variant>

namespace llvm {
namespace bolt {

class BinaryFunction;

/// MCInstReference represents a reference to a constant MCInst as stored either
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
/// (after a CFG is created).
class MCInstReference {
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;

// Two cases are possible:
// * functions with CFG reconstructed - a function stores a collection of
// basic blocks, each basic block stores a contiguous vector of MCInst
// * functions without CFG - there are no basic blocks created,
// the instructions are directly stored in std::map in BinaryFunction
//
// In both cases, the direct parent of MCInst is stored together with an
// iterator pointing to the instruction.

// Helper struct: CFG is available, the direct parent is a basic block,
// iterator's type is `MCInst *`.
struct RefInBB {
RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
: BB(BB), It(Inst) {}
RefInBB(const RefInBB &Other) = default;
RefInBB &operator=(const RefInBB &Other) = default;

const BinaryBasicBlock *BB;
BinaryBasicBlock::const_iterator It;

bool operator<(const RefInBB &Other) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are expected uses for this comparison? I'm concerned about non-deterministic order of BinaryBasicBlock *.

return std::tie(BB, It) < std::tie(Other.BB, Other.It);
}

bool operator==(const RefInBB &Other) const {
return BB == Other.BB && It == Other.It;
}
};

// Helper struct: CFG is *not* available, the direct parent is a function,
// iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
// is an instruction's offset).
struct RefInBF {
RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
: BF(BF), It(It) {}
RefInBF(const RefInBF &Other) = default;
RefInBF &operator=(const RefInBF &Other) = default;

const BinaryFunction *BF;
nocfg_const_iterator It;

bool operator<(const RefInBF &Other) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar concern regarding the BinaryFunction * order.

return std::tie(BF, It->first) < std::tie(Other.BF, Other.It->first);
}

bool operator==(const RefInBF &Other) const {
return BF == Other.BF && It->first == Other.It->first;
}
};

std::variant<RefInBB, RefInBF> Reference;

// Utility methods to be used like this:
//
// if (auto *Ref = tryGetRefInBB())
// return Ref->doSomething(...);
// return getRefInBF().doSomethingElse(...);
const RefInBB *tryGetRefInBB() const {
assert(std::get_if<RefInBB>(&Reference) ||
std::get_if<RefInBF>(&Reference));
return std::get_if<RefInBB>(&Reference);
}
const RefInBF &getRefInBF() const {
assert(std::get_if<RefInBF>(&Reference));
return *std::get_if<RefInBF>(&Reference);
}

public:
/// Constructs an empty reference.
MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
/// Constructs a reference to the instruction inside the basic block.
MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
: Reference(RefInBB(BB, Inst)) {
assert(BB && Inst && "Neither BB nor Inst should be nullptr");
}
/// Constructs a reference to the instruction inside the basic block.
MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
: Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
assert(BB && "Basic block should not be nullptr");
}
/// Constructs a reference to the instruction inside the function without
/// CFG information.
MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
: Reference(RefInBF(BF, It)) {
assert(BF && "Function should not be nullptr");
}

/// Locates an instruction inside a function and returns a reference.
static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);

bool operator<(const MCInstReference &Other) const {
return Reference < Other.Reference;
}

bool operator==(const MCInstReference &Other) const {
return Reference == Other.Reference;
}

const MCInst &getMCInst() const {
if (auto *Ref = tryGetRefInBB())
return *Ref->It;
return getRefInBF().It->second;
}

operator const MCInst &() const { return getMCInst(); }

operator bool() const {
if (auto *Ref = tryGetRefInBB())
return Ref->BB != nullptr;
return getRefInBF().BF != nullptr;
}

bool hasCFG() const {
return static_cast<bool>(*this) && tryGetRefInBB() != nullptr;
}

const BinaryFunction *getFunction() const {
if (auto *Ref = tryGetRefInBB())
return Ref->BB->getFunction();
return getRefInBF().BF;
}

const BinaryBasicBlock *getBasicBlock() const {
if (auto *Ref = tryGetRefInBB())
return Ref->BB;
return nullptr;
}

raw_ostream &print(raw_ostream &OS) const;
};

static inline raw_ostream &operator<<(raw_ostream &OS,
const MCInstReference &Ref) {
return Ref.print(OS);
}

} // namespace bolt
} // namespace llvm

#endif
178 changes: 1 addition & 177 deletions bolt/include/bolt/Passes/PAuthGadgetScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,189 +11,13 @@

#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCInstUtils.h"
#include "bolt/Passes/BinaryPasses.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>

namespace llvm {
namespace bolt {

/// @brief MCInstReference represents a reference to an MCInst as stored either
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
/// (after a CFG is created). It aims to store the necessary information to be
/// able to find the specific MCInst in either the BinaryFunction or
/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
/// the corresponding instruction can be computed.

struct MCInstInBBReference {
BinaryBasicBlock *BB;
int64_t BBIndex;
MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
: BB(BB), BBIndex(BBIndex) {}
MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
for (BinaryBasicBlock &BB : BF)
for (size_t I = 0; I < BB.size(); ++I)
if (Inst == &BB.getInstructionAtIndex(I))
return MCInstInBBReference(&BB, I);
return {};
}
bool operator==(const MCInstInBBReference &RHS) const {
return BB == RHS.BB && BBIndex == RHS.BBIndex;
}
bool operator<(const MCInstInBBReference &RHS) const {
return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
}
operator MCInst &() const {
assert(BB != nullptr);
return BB->getInstructionAtIndex(BBIndex);
}
uint64_t getAddress() const {
// 4 bytes per instruction on AArch64.
// FIXME: the assumption of 4 byte per instruction needs to be fixed before
// this method gets used on any non-AArch64 binaries (but should be fine for
// pac-ret analysis, as that is an AArch64-specific feature).
return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
}
};

raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);

struct MCInstInBFReference {
BinaryFunction *BF;
uint64_t Offset;
MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
: BF(BF), Offset(Offset) {}

static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
for (auto &I : BF.instrs())
if (Inst == &I.second)
return MCInstInBFReference(&BF, I.first);
return {};
}

MCInstInBFReference() : BF(nullptr), Offset(0) {}
bool operator==(const MCInstInBFReference &RHS) const {
return BF == RHS.BF && Offset == RHS.Offset;
}
bool operator<(const MCInstInBFReference &RHS) const {
if (BF != RHS.BF)
return BF < RHS.BF;
return Offset < RHS.Offset;
}
operator MCInst &() const {
assert(BF != nullptr);
return *BF->getInstructionAtOffset(Offset);
}

uint64_t getOffset() const { return Offset; }

uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
};

raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);

struct MCInstReference {
enum Kind { FunctionParent, BasicBlockParent };
Kind ParentKind;
union U {
MCInstInBBReference BBRef;
MCInstInBFReference BFRef;
U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
} U;
MCInstReference(MCInstInBBReference BBRef)
: ParentKind(BasicBlockParent), U(BBRef) {}
MCInstReference(MCInstInBFReference BFRef)
: ParentKind(FunctionParent), U(BFRef) {}
MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
: MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
MCInstReference(BinaryFunction *BF, uint32_t Offset)
: MCInstReference(MCInstInBFReference(BF, Offset)) {}

static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
if (BF.hasCFG())
return MCInstInBBReference::get(Inst, BF);
return MCInstInBFReference::get(Inst, BF);
}

bool operator<(const MCInstReference &RHS) const {
if (ParentKind != RHS.ParentKind)
return ParentKind < RHS.ParentKind;
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef < RHS.U.BBRef;
case FunctionParent:
return U.BFRef < RHS.U.BFRef;
}
llvm_unreachable("");
}

bool operator==(const MCInstReference &RHS) const {
if (ParentKind != RHS.ParentKind)
return false;
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef == RHS.U.BBRef;
case FunctionParent:
return U.BFRef == RHS.U.BFRef;
}
llvm_unreachable("");
}

operator MCInst &() const {
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef;
case FunctionParent:
return U.BFRef;
}
llvm_unreachable("");
}

operator bool() const {
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef.BB != nullptr;
case FunctionParent:
return U.BFRef.BF != nullptr;
}
llvm_unreachable("");
}

uint64_t getAddress() const {
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef.getAddress();
case FunctionParent:
return U.BFRef.getAddress();
}
llvm_unreachable("");
}

BinaryFunction *getFunction() const {
switch (ParentKind) {
case FunctionParent:
return U.BFRef.BF;
case BasicBlockParent:
return U.BBRef.BB->getFunction();
}
llvm_unreachable("");
}

BinaryBasicBlock *getBasicBlock() const {
switch (ParentKind) {
case FunctionParent:
return nullptr;
case BasicBlockParent:
return U.BBRef.BB;
}
llvm_unreachable("");
}
};

raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);

namespace PAuthGadgetScanner {

// The report classes are designed to be used in an immutable manner.
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ add_llvm_library(LLVMBOLTCore
GDBIndex.cpp
HashUtilities.cpp
JumpTable.cpp
MCInstUtils.cpp
MCPlusBuilder.cpp
ParallelUtilities.cpp
Relocation.cpp
Expand Down
Loading
Loading