-
Notifications
You must be signed in to change notification settings - Fork 5
Brainstorming
The high-level functioning of Hijacker can be described as split into four different steps:
-
Hijacker loads an input OF (Object File), then provides an IBR (Intermediate Binary Representation) made of:
- Sections: Coarse-grained containers for semantically-related pieces of information
- Symbols: Represent data in a section that can be explicitly referred to in the OF
- References: A reification of the concept of links between sections and symbols
-
The list of sections is scanned, and for each known section type the appropriate section driver is launched to take care of the following issues:
- Text driver: Disassembly, binary translation, IRs, etc.
- Data driver: Endianness, alignment, data formats and other conversions
- Debug driver: Additional symbols and meta-data
-
Instrumentation occurs on the IBR directly, therefore the in-memory representation for the input OF is subject to the desired manipulations.
-
Hijacker writes back a modified OF on the basis of the existing sections, symbols and references in the IBR and by relying on the appropriate drivers for each section type.
A program's IBR can be viewed as a list of sections, symbols and relocations, but also as a list of executable versions, each with its own list of sections, symbols and relocations. In practice, each IBR version must also deal explicitly with code/text sections. Therefore, each executable version is extended with a list of functions, blocks and instructions that make up the current version's code, as well as classic program representations such as the Control Flow Graph (CFG) and the Function Call Graph (FCG).
The multi-versioning is feature in encoded as follows:
typedef struct executable {
obj_format_t format; /// The object file format type (e.g. ELF)
isa_family_t family; /// The ISA language of the machine code (e.g. x86-64)
list_t<sec_t> sections; /// All sections in the program
list_t<sym_t> symbols; /// All symbols in the program
list_t<rel_t> relocs; /// All symbol references in the program
version_t *versions[MAX_VERSIONS]; /// One entry for each instrumented version of the executable
size_t cversion; /// The current version number
size_t nversion; /// The total number of existing versions
} exec_t;
typedef struct version {
size_t number; /// The version number
unsigned char *name; /// The name of this version
list_t<sec_t> sections; /// All sections specific for this version
list_t<sym_t> symbols; /// All symbols specific for this version
list_t<rel_t> relocs; /// All symbol references specific for this version
list_t<fun_t> funcs; /// The code that make up this version
list_t<blk_t> blocks; /// The code that make up this version, in terms of basic blocks
graph_t<fun_t> fcg; /// The Function Call Graph of this version
struct version *next;
} version_t;Sections, symbols and relocations are defined according to the following schema:
typedef size_t addr_t; /// Addresses and positive offsets
typedef ptrdiff_t disp_t; /// Displacements from a base address
typedef struct section {
sec_type_t type; /// DATA, CODE, RELOC, DEBUG, etc...
unsigned long flags; /// ALLOC, LOAD, READ, WRITE, etc...
unsigned char *name; /// The name of this section (possibly not needed as it can be recovered from $this->symbol->name)
void *payload; /// Section contents
size_t size; /// The size of this section (possibly not needed as it can be recovered from $this->symbol->size)
sym_t *symbol; /// The symbol representing this section
} sec_t;
typedef struct symbol {
sym_type_t type; /// FUNCTION, OBJECT, etc...
unsigned long flags; /// LOCAL, GLOBAL, WEAK, etc...
unsigned char *name; /// The name of this symbol
void *payload; /// Symbol contents
size_t size; /// The size of this symbol
union { /// What the symbol represents...
fun_t *function; /// ...a function
sec_t *section; /// ...a section
/// ...anything else?
} is;
union { /// Relocations associated to this symbol...
list_t<rel_t> source; /// ...when the symbol owns the relocation
list_t<rel_t> dest; /// ...when the relocation refers to the symbol
} rel;
sec_t *sec; /// The section that contains this symbol
addr_t offset; /// The offset from the beginning of the section at which the symbol contents can be found
} sym_t;
typedef struct relocation {
rel_type_t type; /// ABSOLUTE, RELATIVE, etc...
struct { /// Relocation found...
sec_t *section; /// ...in this section
addr_t offset; /// ...at this offset
ins_t *instr; /// ...(in this instruction)
} in;
struct { /// Relocation referring...
sym_t *symbol; /// ...to this symbol
disp_t addend; /// ...at this displacement
ins_t *instr; /// ...(to this instruction)
} to;
} rel_t;As for functions, blocks and instructions we have:
typedef struct function {
fun_type_t type; /// Anything useful here?
unsigned char *name; /// The name of this function (possibly not needed as it can be recovered from $this->symbol->name)
graph_t<blk_t> cfg; /// The CFG of this function
list_t<blk_t> blocks; /// The code that make up this function, in terms of basic blocks
sym_t *symbol; /// The symbol representing this function
} fun_t;
typedef struct block {
blk_type_t type; /// LOOP HEADER, LOOP FOOTER, etc...
list_t<ins_t> instrs; /// The code that make up this block, in terms of instructions
size_t size; /// The size of this block in bytes
size_t length; /// The number of instructions that make up this block
} blk_t;
typedef struct instruction {
unsigned long flags; /// MEMORY, ALGEBRIC, LOGIC, STACK, etc...
addr_t offset; /// The offset from the beginning of the section at which this instruction can be found
size_t size; /// The size of this instruction
// ... Other fields... see libasm!
struct { /// Jumptable for this instruction
size_t fanout; /// Number of detected targets
list_t<ins_t> instr; /// List of target instructions
} to;
struct { /// Inverse jumptable for this instruction
list_t<ins_t> instr; /// List of instructions that jump to this instruction
} from;
union { /// Relocations associated to this instruction...
list_t<rel_t> source; /// ...when the instruction owns the relocation
list_t<rel_t> dest; /// ...when the relocation refers to the instruction
} rel;
} ins_t;