@@ -34,6 +34,8 @@ impl AbstractField {
3434 let Some ( ident) = field. ident else {
3535 return Err ( field. span ( ) . error ( "field must be named" ) ) ;
3636 } ;
37+
38+ // If the field is a pointer, we want the type being pointed at, not the pointer itself.
3739 let kind = match field. ty {
3840 Type :: Ptr ( ty) => {
3941 let Some ( width) = pointer_width else {
@@ -47,6 +49,8 @@ impl AbstractField {
4749 }
4850 _ => FieldKind :: Ty ( field. ty ) ,
4951 } ;
52+
53+ // Fields may be decorated with either `#[binja(name = "...")]` or `#[binja(named)]`.
5054 let name = find_binja_attr ( & field. attrs ) ?
5155 . map ( |attr| match attr. kind {
5256 BinjaAttrKind :: PointerWidth ( _) => Err ( attr. span . error (
@@ -61,9 +65,11 @@ impl AbstractField {
6165 }
6266 } )
6367 . transpose ( ) ?;
68+
6469 Ok ( Self { kind, ident, name } )
6570 }
6671
72+ /// Transforms the `AbstractField` into a token stream that constructs a binja `Type` object
6773 fn resolved_ty ( & self ) -> TokenStream {
6874 let ty = self . kind . ty ( ) ;
6975 let mut resolved = quote ! { <#ty as :: binaryninja:: types:: AbstractType >:: resolve_type( ) } ;
@@ -93,9 +99,19 @@ enum BinjaAttrKind {
9399 Named ( Option < String > ) ,
94100}
95101
102+ /// Given a list of attributes, look for a `#[binja(...)]` attribute. At most one copy of the
103+ /// attribute is allowed to decorate an item (i.e. a type or field). If more than one copy is
104+ /// present, we throw an error.
105+ ///
106+ /// Three properties are supported, and for any given item they are mutually exclusive:
107+ /// - `pointer_width`: Expects an integer literal. Only allowed on types, not fields.
108+ /// - `name`: Expects a string literal. Only allowed on fields.
109+ /// - `named`: Must be a bare path. Only allowed on fields.
96110fn find_binja_attr ( attrs : & [ Attribute ] ) -> Result < Option < BinjaAttr > > {
111+ // Use a `OnceCell` to assert that we only allow a single `#[binja(...)]` attribute per-item.
97112 let binja_attr = OnceCell :: new ( ) ;
98113
114+ // Wrapper function for setting the value of the `OnceCell` above.
99115 let set_attr = |attr : BinjaAttr | {
100116 let span = attr. span ;
101117 binja_attr
@@ -111,6 +127,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
111127 let meta = attr. parse_args :: < Meta > ( ) ?;
112128 let meta_ident = meta. path ( ) . require_ident ( ) ?;
113129 if meta_ident == "pointer_width" {
130+ // #[binja(pointer_width = <int>)]
114131 let value = & meta. require_name_value ( ) ?. value ;
115132 if let Expr :: Lit ( expr) = & value {
116133 if let Lit :: Int ( val) = & expr. lit {
@@ -123,6 +140,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
123140 }
124141 return Err ( value. span ( ) . error ( "expected integer literal" ) ) ;
125142 } else if meta_ident == "name" {
143+ // #[binja(name = "...")]
126144 let value = & meta. require_name_value ( ) ?. value ;
127145 if let Expr :: Lit ( expr) = & value {
128146 if let Lit :: Str ( lit) = & expr. lit {
@@ -135,6 +153,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
135153 }
136154 return Err ( value. span ( ) . error ( r#"expected string literal"# ) ) ;
137155 } else if meta_ident == "named" {
156+ // #[binja(named)]
138157 meta. require_path_only ( ) ?;
139158 set_attr ( BinjaAttr {
140159 kind : BinjaAttrKind :: Named ( None ) ,
@@ -150,6 +169,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
150169 Ok ( binja_attr. into_inner ( ) )
151170}
152171
172+ /// Struct representing the contents of all `#[repr(...)]` attributes decorating a type.
153173struct Repr {
154174 c : bool ,
155175 packed : Option < Option < LitInt > > ,
@@ -158,6 +178,8 @@ struct Repr {
158178}
159179
160180impl Repr {
181+ /// Scan through a list of attributes and finds every instance of a `#[repr(...)]` attribute,
182+ /// then initialize `Self` based off the collective contents of those attributes.
161183 fn from_attrs ( attrs : & [ Attribute ] ) -> Result < Self > {
162184 let mut c = false ;
163185 let mut packed = None ;
@@ -213,23 +235,34 @@ fn ident_in_list<const N: usize>(ident: &Ident, list: [&'static str; N]) -> bool
213235 list. iter ( ) . any ( |id| ident == id)
214236}
215237
238+ /// Entry point to the proc-macro.
216239#[ proc_macro_derive( AbstractType , attributes( binja) ) ]
217240pub fn abstract_type_derive ( input : proc_macro:: TokenStream ) -> proc_macro:: TokenStream {
218241 let input = parse_macro_input ! ( input as DeriveInput ) ;
242+ // Transforming the error diagnostic into tokens for emission allows the business logic to
243+ // return `Result` and make use of the `?` operator like any normal Rust program
219244 match impl_abstract_type ( input) {
220245 Ok ( tokens) => tokens. into ( ) ,
221246 Err ( diag) => diag. emit_as_item_tokens ( ) . into ( ) ,
222247 }
223248}
224249
250+ /// Main business logic of the macro. Parses any relevant attributes decorating the type, then
251+ /// defers execution based on the kind of type: struct, enum, or union.
225252fn impl_abstract_type ( ast : DeriveInput ) -> Result < TokenStream > {
226253 let repr = Repr :: from_attrs ( & ast. attrs ) ?;
227254 let width = find_binja_attr ( & ast. attrs ) ?
228255 . map ( |attr| match attr. kind {
229- BinjaAttrKind :: PointerWidth ( width) => Ok ( width) ,
256+ BinjaAttrKind :: PointerWidth ( width) => {
257+ if let Data :: Enum ( _) = ast. data {
258+ Err ( attr. span . error ( "`#[binja(pointer_width)]` is only supported on structs and unions, not enums" ) )
259+ } else {
260+ Ok ( width)
261+ }
262+ }
230263 BinjaAttrKind :: Named ( Some ( _) ) => Err ( attr
231264 . span
232- . error ( r#"`#[binja(name = "..." )] is only supported on fields"# ) ) ,
265+ . error ( r#"`#[binja(name)] is only supported on fields"# ) ) ,
233266 BinjaAttrKind :: Named ( None ) => Err ( attr
234267 . span
235268 . error ( "`#[binja(named)]` is only supported on fields" ) ) ,
@@ -240,23 +273,23 @@ fn impl_abstract_type(ast: DeriveInput) -> Result<TokenStream> {
240273 return Err ( ast. generics . span ( ) . error ( "type must not be generic" ) ) ;
241274 }
242275
243- let ident = ast. ident ;
244276 match ast. data {
245277 Data :: Struct ( s) => match s. fields {
246278 Fields :: Named ( fields) => {
247- impl_abstract_structure_type ( ident, fields, repr, width, StructureKind :: Struct )
279+ impl_abstract_structure_type ( ast . ident , fields, repr, width, StructureKind :: Struct )
248280 }
249- Fields :: Unnamed ( _) => Err ( s
250- . fields
251- . span ( )
252- . error ( "tuple structs are unsupported; struct must have named fields" ) ) ,
253- Fields :: Unit => Err ( ident
254- . span ( )
255- . error ( "unit structs are unsupported; provide at least one named field" ) ) ,
281+ Fields :: Unnamed ( _) => Err ( s. fields . span ( ) . error (
282+ "tuple structs are unsupported; \
283+ struct must have named fields",
284+ ) ) ,
285+ Fields :: Unit => Err ( ast. ident . span ( ) . error (
286+ "unit structs are unsupported; \
287+ provide at least one named field",
288+ ) ) ,
256289 } ,
257- Data :: Enum ( e) => impl_abstract_enum_type ( ident, e. variants , repr) ,
290+ Data :: Enum ( e) => impl_abstract_enum_type ( ast . ident , e. variants , repr) ,
258291 Data :: Union ( u) => {
259- impl_abstract_structure_type ( ident, u. fields , repr, width, StructureKind :: Union )
292+ impl_abstract_structure_type ( ast . ident , u. fields , repr, width, StructureKind :: Union )
260293 }
261294 }
262295}
@@ -266,6 +299,70 @@ enum StructureKind {
266299 Union ,
267300}
268301
302+ /// Implements the `AbstractType` trait for either a struct or union, based on the value of `kind`.
303+ ///
304+ /// Unlike C-style enums, structs and unions can contain other types within them that affect their
305+ /// size and alignment. For example, the size of a struct is at least the sum of the sizes of its
306+ /// fields (plus any padding), and its alignment is equal to that of the most-aligned field.
307+ /// Likewise, a union's size is at least that of its largest field.
308+ ///
309+ /// Normally this would be fine, because the compiler can give you size and alignment information
310+ /// using `std::mem::{size_of, align_of}`. However, the `#[binja(pointer_width)]` attribute allows
311+ /// users to change the width of pointer fields to be different in Binja compared to the host CPU
312+ /// architecture, meaning the value calculated by the compiler will be wrong in that case. What's
313+ /// worse, is that a pointer field with custom width not only affects the size/alignment of its
314+ /// parent struct, but anything that contains *that* struct, and so on up the tree.
315+ ///
316+ /// So, we need a way to propagate the modified layout information at compile-time. To accomplish
317+ /// this, we use the `AbstractType::LAYOUT` associated constant, which by default matches the
318+ /// layout of the struct as calculated by the compiler, but which can be swapped out for any other
319+ /// valid `std::alloc::Layout` object when implementing the `AbstractType` trait. We then create a
320+ /// mock-type with the desired custom layout and use that for propagation.
321+ ///
322+ /// In order to mock a type, we make use of the following construction:
323+ ///
324+ /// ```ignore
325+ /// #[repr(C)]
326+ /// struct Mock<const SIZE: usize, const ALIGN: usize>
327+ /// where:
328+ /// elain::Align<ALIGN>: elain::Alignment,
329+ /// {
330+ /// t: [u8; SIZE],
331+ /// _align: elain::Align<ALIGN>
332+ /// }
333+ /// ```
334+ ///
335+ /// The `elain::Align` type is a zero-size type with a const-generic parameter specifying its
336+ /// alignment. The trait bound serves to restrict the possible values of `ALIGN` to only those
337+ /// valid for specifying alignment (powers of two). Additionally, we know that `[u8; SIZE]` is
338+ /// always of size `SIZE`, and alignment 1. Therefore, the `Mock` type is guaranteed to be of size
339+ /// `SIZE` and alignment equal to `ALIGN`.
340+ ///
341+ /// This constructed `Mock` type allows us to generate a struct with arbitrary layout, which we can
342+ /// use to mimic the layout of another struct:
343+ ///
344+ /// ```ignore
345+ /// #[derive(AbstractType)]
346+ /// #[repr(C)]
347+ /// struct S {
348+ /// first: u8,
349+ /// second: u16,
350+ /// third: u64,
351+ /// }
352+ ///
353+ /// // Identical layout to `S` above
354+ /// #[repr(C)]
355+ /// struct __S_layout {
356+ /// first: Mock<1, 1>,
357+ /// second: Mock<2, 2>,
358+ /// third: Mock<8, 8>,
359+ /// }
360+ /// ```
361+ ///
362+ /// Then, we can propagate any changes in the layout of `S` (due to custom pointer widths) by
363+ /// setting the `S::LAYOUT` constant equal to `alloc::Layout<__S_layout>` rather than the default
364+ /// value of `alloc::Layout<S>`. Then, when mocking fields of type `S`, we use `S::LAYOUT.size()`
365+ /// and `S::LAYOUT.align()` for the const-generic parameters of `Mock`, instead of just integers.
269366fn impl_abstract_structure_type (
270367 name : Ident ,
271368 fields : FieldsNamed ,
@@ -286,17 +383,41 @@ fn impl_abstract_structure_type(
286383 . into_iter ( )
287384 . map ( |field| AbstractField :: from_field ( field, & name, pointer_width) )
288385 . collect :: < Result < Vec < _ > > > ( ) ?;
386+
387+ // Generate the arguments to `StructureBuilder::insert`. Luckily `mem::offset_of!` was stabilized in
388+ // Rust 1.77 or otherwise this would be a lot more complicated.
289389 let layout_name = format_ident ! ( "__{name}_layout" ) ;
390+ let args = abstract_fields
391+ . iter ( )
392+ . map ( |field| {
393+ let ident = & field. ident ;
394+ let resolved_ty = field. resolved_ty ( ) ;
395+ quote ! {
396+ & #resolved_ty,
397+ stringify!( #ident) ,
398+ :: std:: mem:: offset_of!( #layout_name, #ident) as u64 ,
399+ false ,
400+ :: binaryninja:: types:: MemberAccess :: NoAccess ,
401+ :: binaryninja:: types:: MemberScope :: NoScope ,
402+ }
403+ } )
404+ . collect :: < Vec < _ > > ( ) ;
405+
406+ // Calculate size and alignment for each field - these may differ from the compiler's
407+ // calculated values so we use the construction discussed above to mock/propagate them.
290408 let field_wrapper = format_ident ! ( "__{name}_field_wrapper" ) ;
291409 let layout_fields = abstract_fields
292410 . iter ( )
293411 . map ( |field| {
294412 let ident = & field. ident ;
295413 let ( size, align) = match & field. kind {
414+ // Since pointers can be of arbitrary size as specified by the user, we manually
415+ // calculate size/alignment for them.
296416 FieldKind :: Ptr ( _, width) => {
297417 let align = width. next_power_of_two ( ) ;
298418 ( quote ! { #width } , quote ! { #align } )
299419 }
420+ // All other types defer to the value of Self::LAYOUT
300421 FieldKind :: Ty ( ty) => (
301422 quote ! { { <#ty as :: binaryninja:: types:: AbstractType >:: LAYOUT . size( ) } } ,
302423 quote ! { { <#ty as :: binaryninja:: types:: AbstractType >:: LAYOUT . align( ) } } ,
@@ -305,21 +426,9 @@ fn impl_abstract_structure_type(
305426 quote ! { #ident: #field_wrapper<#size, #align> }
306427 } )
307428 . collect :: < Vec < _ > > ( ) ;
308- let args = abstract_fields
309- . iter ( )
310- . map ( |field| {
311- let ident = & field. ident ;
312- let resolved_ty = field. resolved_ty ( ) ;
313- quote ! {
314- & #resolved_ty,
315- stringify!( #ident) ,
316- :: std:: mem:: offset_of!( #layout_name, #ident) as u64 ,
317- false ,
318- :: binaryninja:: types:: MemberAccess :: NoAccess ,
319- :: binaryninja:: types:: MemberScope :: NoScope ,
320- }
321- } )
322- . collect :: < Vec < _ > > ( ) ;
429+
430+ // If the struct/union is marked `#[repr(packed)]` or `#[repr(align(...))]`, we decorate the
431+ // mocked layout type with those as well
323432 let is_packed = repr. packed . is_some ( ) ;
324433 let packed = repr. packed . map ( |size| match size {
325434 Some ( n) => quote ! { #[ repr( packed( #n) ) ] } ,
@@ -334,17 +443,18 @@ fn impl_abstract_structure_type(
334443 )
335444 } )
336445 . unzip ( ) ;
446+
447+ // Distinguish between structs and unions
337448 let ( kind, set_union) = match kind {
338449 StructureKind :: Struct => ( quote ! { struct } , None ) ,
339450 StructureKind :: Union => (
340451 quote ! { union } ,
341452 Some ( quote ! {
342- . set_structure_type(
343- :: binaryninja:: types:: StructureType :: UnionStructureType
344- )
453+ . set_structure_type( :: binaryninja:: types:: StructureType :: UnionStructureType )
345454 } ) ,
346455 ) ,
347456 } ;
457+
348458 Ok ( quote ! {
349459 #[ repr( C ) ]
350460 #[ derive( Copy , Clone ) ]
@@ -380,6 +490,7 @@ fn impl_abstract_structure_type(
380490 } )
381491}
382492
493+ /// Implements the `AbstractType` trait for an enum.
383494fn impl_abstract_enum_type (
384495 name : Ident ,
385496 variants : impl IntoIterator < Item = Variant > ,
@@ -400,6 +511,9 @@ fn impl_abstract_enum_type(
400511 . span ( )
401512 . error ( "must provide a primitive `repr` type, e.g. `u32`" ) ) ;
402513 } ;
514+
515+ // Extract the variant names and the value of their discriminants. Variants must not hold any
516+ // nested data (in other words, they must be simple C-style identifiers).
403517 let variants = variants
404518 . into_iter ( )
405519 . map ( |variant| {
@@ -415,6 +529,7 @@ fn impl_abstract_enum_type(
415529 Ok ( quote ! { stringify!( #ident) , #discriminant as u64 } )
416530 } )
417531 . collect :: < Result < Vec < _ > > > ( ) ?;
532+
418533 Ok ( quote ! {
419534 impl :: binaryninja:: types:: AbstractType for #name {
420535 fn resolve_type( ) -> :: binaryninja:: rc:: Ref <:: binaryninja:: types:: Type > {
0 commit comments