diff --git a/CodeHawk/CH/chlib/cHNumerical.mli b/CodeHawk/CH/chlib/cHNumerical.mli index 5079710e..4db6ca00 100644 --- a/CodeHawk/CH/chlib/cHNumerical.mli +++ b/CodeHawk/CH/chlib/cHNumerical.mli @@ -138,7 +138,7 @@ val mkNumericalFromInt64 : int64 -> numerical_t val mkNumericalFromString: string -> numerical_t -(** [mkNumericalPowerOf2 n] returns the numerical for 2^n] *) +(** [mkNumericalPowerOf2 n] returns the numerical for [2^n] *) val mkNumericalPowerOf2: int -> numerical_t diff --git a/CodeHawk/CH/chutil/cHPrettyUtil.mli b/CodeHawk/CH/chutil/cHPrettyUtil.mli index 47f90bd9..489f20d0 100644 --- a/CodeHawk/CH/chutil/cHPrettyUtil.mli +++ b/CodeHawk/CH/chutil/cHPrettyUtil.mli @@ -70,7 +70,7 @@ val fixed_length_int_string: string -> int -> string (** [string_suffix s n] returns the suffix of [s] that starts at the n'th character of [s] (zero-based). - @raise Invalid_argument if [n] is larger than the length of [s] + raise Invalid_argument if [n] is larger than the length of [s] *) val string_suffix: string -> int -> string diff --git a/CodeHawk/CH/chutil/cHTraceResult.mli b/CodeHawk/CH/chutil/cHTraceResult.mli index 49dd40a0..26d0b22d 100644 --- a/CodeHawk/CH/chutil/cHTraceResult.mli +++ b/CodeHawk/CH/chutil/cHTraceResult.mli @@ -34,12 +34,12 @@ type 'a traceresult = ('a, string list) result (** [tget_ok r] is [v] if [r] is [Ok v] and - @raise [Invalid_argument] otherwise.*) + raise [Invalid_argument] otherwise.*) val tget_ok: 'a traceresult -> 'a (** [tget_error r] is [e] if [r] is [Error e] and - @raise [Invalid_argument] otherwise.*) + raise [Invalid_argument] otherwise.*) val tget_error: 'a traceresult -> string list diff --git a/CodeHawk/CH/xprlib/xsimplify.ml b/CodeHawk/CH/xprlib/xsimplify.ml index 9b5d77cd..04f73bec 100644 --- a/CodeHawk/CH/xprlib/xsimplify.ml +++ b/CodeHawk/CH/xprlib/xsimplify.ml @@ -175,7 +175,8 @@ and reduce_neg (m: bool) (e1: xpr_t): (bool * xpr_t) = | _ -> default () -(* Note that for values other than zero the result depends on word size.*) +(* Note that, in general, for values other than zero the result depends on word + size. Here we assume that the word is at least 8 bits wide. *) and reduce_bitwise_not (m: bool) (e1: xpr_t): (bool * xpr_t) = let default () = (m, XOp (XBNot, [e1])) in match e1 with @@ -183,6 +184,8 @@ and reduce_bitwise_not (m: bool) (e1: xpr_t): (bool * xpr_t) = (true, XConst (IntConst numerical_one#neg)) | XConst (IntConst num) when num#equal numerical_one -> (true, XConst (IntConst (mkNumerical 2)#neg)) + | XConst (IntConst n) when n#lt numerical_e8 -> + (true, XConst (IntConst (n#neg#sub numerical_one))) | _ -> default () diff --git a/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli b/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli index 22ec5506..035ffb46 100644 --- a/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli +++ b/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli @@ -85,16 +85,16 @@ val t_charptr: btype_t val ptr_deref: btype_t -> btype_t -(** {2 Array types)*) +(** {2 Array types} *) val t_array: btype_t -> int -> btype_t -(** {2 Type definition}*) +(** {2 Type definition} *) val t_named: string -> btype_t -(** {2 Composite types}*) +(** {2 Composite types} *) val t_comp: ?name_space:string list -> string -> btype_t val t_enum: ?name_space:string list -> string -> btype_t diff --git a/CodeHawk/CHB/bchlib/bCHBCTypes.mli b/CodeHawk/CHB/bchlib/bCHBCTypes.mli index b143f4ca..f6361466 100644 --- a/CodeHawk/CHB/bchlib/bCHBCTypes.mli +++ b/CodeHawk/CHB/bchlib/bCHBCTypes.mli @@ -604,7 +604,7 @@ class type bcfiles_int = (** [get_varinfo name] returns the varinfo with name [name]. - @raise BCH_failure if no varinfo exists with name [name].*) + raise BCH_failure if no varinfo exists with name [name].*) method get_varinfo: ?prefix:bool -> string -> bvarinfo_t (** Returns all global varinfos (including functions) *) @@ -622,7 +622,7 @@ class type bcfiles_int = (** [get_typedef name] returns the (not necessarily fully expanded) type definition associated with [name]. - @raise BCH_failure if no typedef exists with name [name]. + raise BCH_failure if no typedef exists with name [name]. *) method get_typedef: string -> btype_t @@ -640,7 +640,7 @@ class type bcfiles_int = (** [get_compinfo key] returns the compinfo structure associated with (CIL-assigned) key [key]. - @raise BCH_failure if no compinfo definition or declaration exists + raise BCH_failure if no compinfo definition or declaration exists with key [key]. *) method get_compinfo: int -> bcompinfo_t @@ -655,7 +655,7 @@ class type bcfiles_int = (** [get_enuminfo name] returns the enuminfo structure with name [name]. - @raise BCH_failure if no enuminfo definition or declaration exists with + raise BCH_failure if no enuminfo definition or declaration exists with name [name] *) method get_enuminfo: string -> benuminfo_t diff --git a/CodeHawk/CHB/bchlib/bCHBasicTypes.mli b/CodeHawk/CHB/bchlib/bCHBasicTypes.mli index 4fc62a8e..1dade856 100644 --- a/CodeHawk/CHB/bchlib/bCHBasicTypes.mli +++ b/CodeHawk/CHB/bchlib/bCHBasicTypes.mli @@ -40,6 +40,7 @@ open BCHLibTypes exception BCH_failure of pretty_t +(** basic exception in all of CHB *) (* raised in cases an internal inconsistency is encountered *) exception Internal_error of string diff --git a/CodeHawk/CHB/bchlib/bCHByteUtilities.mli b/CodeHawk/CHB/bchlib/bCHByteUtilities.mli index 4f50e57a..97b1130c 100644 --- a/CodeHawk/CHB/bchlib/bCHByteUtilities.mli +++ b/CodeHawk/CHB/bchlib/bCHByteUtilities.mli @@ -51,8 +51,8 @@ val rawdata_to_string: (** [write_xml_raw_data elt s dw] writes byte string [s] to the xml element - [elt] in the form of sub elements with tag {aline} and attributes - {bytes} and {print} that contain 4 space-separated groups of 4 bytes in + [elt] in the form of sub elements with tag [aline] and attributes + [bytes] and [print] that contain 4 space-separated groups of 4 bytes in hexadecimal and a print represenation of those 16 bytes, respectively. This format is used in saving binary content of the sections to xml.*) diff --git a/CodeHawk/CHB/bchlib/bCHCPURegisters.mli b/CodeHawk/CHB/bchlib/bCHCPURegisters.mli index 097e5a67..a2aa07a0 100644 --- a/CodeHawk/CHB/bchlib/bCHCPURegisters.mli +++ b/CodeHawk/CHB/bchlib/bCHCPURegisters.mli @@ -127,13 +127,13 @@ val full_registers: cpureg_t list (** [mk_arm_sp_reg i] returns an ARM single-precision floating point register (S) - @raise BCH_failure if [i] is negative or i is greater than 31.*) + raise BCHBasicTypes.BCH_failure if [i] is negative or i is greater than 31.*) val mk_arm_sp_reg: int -> arm_extension_register_t (** [mk_arm_dp_reg i] returns an ARM double-precision floating point register (D) - @raise BCH_failure if [i] is negative or i is greater than 15.*) + raise BCH_failure if [i] is negative or i is greater than 15.*) val mk_arm_dp_reg: int -> arm_extension_register_t diff --git a/CodeHawk/CHB/bchlib/bCHCallSemanticsRecorder.ml b/CodeHawk/CHB/bchlib/bCHCallSemanticsRecorder.ml index 58795e47..fb674b29 100644 --- a/CodeHawk/CHB/bchlib/bCHCallSemanticsRecorder.ml +++ b/CodeHawk/CHB/bchlib/bCHCallSemanticsRecorder.ml @@ -99,6 +99,7 @@ object (self) | XPONotNull x -> (match x with | XConst (IntConst n) when n#gt numerical_zero -> + let n = n#modulo numerical_e32 in Discharged ("non-null constant address: " ^ TR.tget_ok (numerical_to_hex_string n)) @@ -109,7 +110,7 @@ object (self) | XPONullTerminated x -> (match x with | XConst (IntConst n) -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in if string_table#has_string dw then Discharged ("constant string: " ^ (string_table#get_string dw)) else @@ -119,7 +120,7 @@ object (self) (match (ptr, size) with | (XConst (IntConst n1), XOp ((Xf "ntpos"), [XConst (IntConst n2)])) when n1#equal n2 -> - let dw = TR.tget_ok (numerical_to_doubleword n1) in + let dw = numerical_mod_to_doubleword n1 in if string_table#has_string dw then Discharged ("constant string: " ^ (string_table#get_string dw)) else @@ -129,7 +130,7 @@ object (self) (match (ptr, size) with | (XConst (IntConst n1), XOp ((Xf "ntpos"), [XConst (IntConst n2)])) when n1#equal n2 -> - let dw = TR.tget_ok (numerical_to_doubleword n1) in + let dw = numerical_mod_to_doubleword n1 in if string_table#has_string dw then Discharged ("constant string: " ^ (string_table#get_string dw)) else @@ -138,7 +139,7 @@ object (self) | XPOOutputFormatString x -> (match x with | XConst (IntConst n) -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in if string_table#has_string dw then Discharged ("constant string: " ^ (string_table#get_string dw)) else @@ -170,7 +171,7 @@ object (self) let x = simplify_xpr x in (match self#xprxt#xpr_to_bterm t_voidptr x with | Some (NumConstant n) when n#gt numerical_zero -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in DelegatedGlobal (dw, XXNotNull (NumConstant n)) | Some t -> Delegated (XXNotNull t) | _ -> Open) @@ -178,14 +179,14 @@ object (self) let x = simplify_xpr x in (match self#xprxt#xpr_to_bterm t_voidptr x with | Some (NumConstant n) when n#gt numerical_zero -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in DelegatedGlobal (dw, XXNullTerminated (NumConstant n)) | Some t -> Delegated (XXNullTerminated t) | _ -> Open) | XPOBlockWrite (ty, ptr, size) -> (match self#xprxt#xpr_to_bterm t_voidptr ptr with | Some (NumConstant n) when n#gt numerical_zero -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in (match self#xprxt#xpr_to_bterm t_int size with | Some (NumConstant ns) -> DelegatedGlobal @@ -200,7 +201,7 @@ object (self) | XPOBuffer (ty, ptr, size) -> (match self#xprxt#xpr_to_bterm t_voidptr ptr with | Some (NumConstant n) when n#gt numerical_zero -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in (match self#xprxt#xpr_to_bterm t_int size with | Some (NumConstant ns) -> DelegatedGlobal @@ -215,7 +216,7 @@ object (self) | XPOInitializedRange (ty, ptr, size) -> (match self#xprxt#xpr_to_bterm t_voidptr ptr with | Some (NumConstant n) when n#gt numerical_zero -> - let dw = TR.tget_ok (numerical_to_doubleword n) in + let dw = numerical_mod_to_doubleword n in (match self#xprxt#xpr_to_bterm t_int size with | Some (NumConstant ns) -> DelegatedGlobal diff --git a/CodeHawk/CHB/bchlib/bCHConstantDefinitions.mli b/CodeHawk/CHB/bchlib/bCHConstantDefinitions.mli index edd790eb..2181c430 100644 --- a/CodeHawk/CHB/bchlib/bCHConstantDefinitions.mli +++ b/CodeHawk/CHB/bchlib/bCHConstantDefinitions.mli @@ -76,7 +76,7 @@ val has_symbolic_address: string -> bool (** [get_symbolic_address name] returns the address associated with name [name]. - @raise BCH_failure if no address exists with name [name]. + raise BCH_failure if no address exists with name [name]. *) val get_symbolic_address: string -> doubleword_int @@ -96,20 +96,20 @@ val get_untyped_symbolic_address_names: unit -> string list val update_symbolic_address_btype: string -> btype_t -> unit (** [has_symbolic_address_name dw] returns true if the address [dw] is - associated with a name.]*) + associated with a name.*) val has_symbolic_address_name: doubleword_int -> bool (** [get_symbolic_address_name dw] returns the name associated with address [dw]. - @raise BCH_failure if no name is associated with address [dw] + raise BCH_failure if no name is associated with address [dw] *) val get_symbolic_address_name: doubleword_int -> string (** [get_symbolic_address_type dw] returns the btype of the symbolic address associated with address value [dw]. - @raise BCH_failure if no symbolic address is associated with address [dw]. + raise BCH_failure if no symbolic address is associated with address [dw]. *) val get_symbolic_address_type: doubleword_int -> btype_t diff --git a/CodeHawk/CHB/bchlib/bCHDemangler.mli b/CodeHawk/CHB/bchlib/bCHDemangler.mli index 36ecb621..f49071eb 100644 --- a/CodeHawk/CHB/bchlib/bCHDemangler.mli +++ b/CodeHawk/CHB/bchlib/bCHDemangler.mli @@ -36,7 +36,7 @@ open BCHLibTypes (** - {[ +{v ============================================================================== Grammar ============================================================================== @@ -164,13 +164,13 @@ open BCHLibTypes U struct V class W enum - ]} +v} currently not supported yet: - fields - function pointers - *) +*) val demangle: string -> string val has_demangled_name: string -> bool diff --git a/CodeHawk/CHB/bchlib/bCHFloc.ml b/CodeHawk/CHB/bchlib/bCHFloc.ml index c759e605..8da29235 100644 --- a/CodeHawk/CHB/bchlib/bCHFloc.ml +++ b/CodeHawk/CHB/bchlib/bCHFloc.ml @@ -1284,61 +1284,77 @@ object (self) else if self#env#is_memory_variable v then let memref_r = self#env#get_memory_reference v in let memoff_r = self#env#get_memvar_offset v in - let basevar_r = - TR.tbind - ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) - (fun memref -> - match memref#get_base with - | BaseVar v -> Ok v - | b -> - Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ "memory-base: " ^ (p2s (memory_base_to_pretty b))]) - memref_r in - let basevar_type_r = - TR.tbind - ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) - self#get_variable_type - basevar_r in TR.tbind ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) - (fun basevartype -> - TR.tbind - (fun memoff -> - match memoff with - | NoOffset when is_pointer basevartype -> - Ok (ptr_deref basevartype) - | ConstantOffset (n, NoOffset) when is_pointer basevartype -> - let symmemoff_r = - address_memory_offset - (ptr_deref basevartype) (num_constant_expr n) in + (fun memref -> + match memref#get_base with + | BGlobal -> + (match memoff_r with + | Ok (ConstantOffset (num, NoOffset)) -> + let gvaddr = numerical_mod_to_doubleword num in + if memmap#has_location gvaddr then + let gloc = memmap#get_location gvaddr in + Ok (gloc#btype) + else + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "no global location found for address " + ^ gvaddr#to_hex_string] + | _ -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "not a constant offset"]) + | _ -> + let basevar_r = + match memref#get_base with + | BaseVar v -> Ok v + | b -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "memory-base: " ^ (p2s (memory_base_to_pretty b))] in + let basevar_type_r = + TR.tbind + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) + self#get_variable_type + basevar_r in + TR.tbind + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) + (fun basevartype -> TR.tbind - ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ "basevar type: " ^ (btype_to_string basevartype) - ^ "; offset: " ^ n#toString) - (fun off -> - match off with + (fun memoff -> + match memoff with + | NoOffset when is_pointer basevartype -> + Ok (ptr_deref basevartype) + | ConstantOffset (n, NoOffset) when is_pointer basevartype -> + let symmemoff_r = + address_memory_offset + (ptr_deref basevartype) (num_constant_expr n) in + TR.tbind + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "basevar type: " ^ (btype_to_string basevartype) + ^ "; offset: " ^ n#toString) + (fun off -> + match off with + | FieldOffset ((fname, ckey), NoOffset) -> + let cinfo = get_compinfo_by_key ckey in + let finfo = get_compinfo_field cinfo fname in + Ok finfo.bftype + | _ -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "symbolic offset: " + ^ (memory_offset_to_string off) + ^ " with basevar type: " + ^ (btype_to_string basevartype) + ^ " not yet handled"]) + symmemoff_r | FieldOffset ((fname, ckey), NoOffset) -> let cinfo = get_compinfo_by_key ckey in let finfo = get_compinfo_field cinfo fname in Ok finfo.bftype | _ -> Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ "symbolic offset: " - ^ (memory_offset_to_string off) - ^ " with basevar type: " - ^ (btype_to_string basevartype) + ^ "memoff: " ^ (memory_offset_to_string memoff) ^ " not yet handled"]) - symmemoff_r - | FieldOffset ((fname, ckey), NoOffset) -> - let cinfo = get_compinfo_by_key ckey in - let finfo = get_compinfo_field cinfo fname in - Ok finfo.bftype - | _ -> - Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ "memoff: " ^ (memory_offset_to_string memoff) - ^ " not yet handled"]) - memoff_r) - basevar_type_r + memoff_r) + basevar_type_r) + memref_r else if self#f#env#is_return_value v then let callsite_r = self#f#env#get_call_site v in TR.tbind diff --git a/CodeHawk/CHB/bchlib/bCHFtsParameter.mli b/CodeHawk/CHB/bchlib/bCHFtsParameter.mli index 63376017..2c4a215c 100644 --- a/CodeHawk/CHB/bchlib/bCHFtsParameter.mli +++ b/CodeHawk/CHB/bchlib/bCHFtsParameter.mli @@ -237,7 +237,7 @@ val pld_position_to_string: pld_position_t -> string (** Returns a string representation of a parameter location detail, which, depending on what information is available, may include: - - location slice: {size[, ]} + - location slice: [size\[, \]] - type of the location, if known - position list, in case of field positions or array positions.*) val parameter_location_detail_to_string: parameter_location_detail_t -> string diff --git a/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml b/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml index 90848ebb..eb6ad7c7 100644 --- a/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml +++ b/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml @@ -2021,7 +2021,21 @@ object (self) method set_test_variables (test_iaddr: ctxt_iaddress_t) (vars: (variable_t * variable_t) list) = - H.replace test_variables test_iaddr vars + (* Multiple jump (or other use) sites may be using the same test, so we + cannot just replace the set of test_variables, when a new set comes in. + *) + let entry = + if H.mem test_variables test_iaddr then + H.find test_variables test_iaddr + else + [] in + let newentry = + List.fold_left (fun acc (v1, v2) -> + if (List.exists (fun (v1', v2') -> v1#equal v1' && v2#equal v2') acc) then + acc + else + (v1, v2) :: acc) entry vars in + H.replace test_variables test_iaddr newentry method get_test_variables (test_iaddr:ctxt_iaddress_t) = if H.mem test_variables test_iaddr then diff --git a/CodeHawk/CHB/bchlib/bCHFunctionInfo.mli b/CodeHawk/CHB/bchlib/bCHFunctionInfo.mli index d194ded5..772a4ac9 100644 --- a/CodeHawk/CHB/bchlib/bCHFunctionInfo.mli +++ b/CodeHawk/CHB/bchlib/bCHFunctionInfo.mli @@ -60,7 +60,7 @@ val po_anchor_to_pretty: po_anchor_t -> pretty_t otherwise - a new function-info is created - @raise [BCH_failure] if + raise BCH_failure if - an error occurs when loading a function-info from file, or - address [dw] is not known to be a function entry point *) diff --git a/CodeHawk/CHB/bchlib/bCHImmediate.mli b/CodeHawk/CHB/bchlib/bCHImmediate.mli index 32461810..a139353d 100644 --- a/CodeHawk/CHB/bchlib/bCHImmediate.mli +++ b/CodeHawk/CHB/bchlib/bCHImmediate.mli @@ -44,7 +44,7 @@ val make_immediate: bool -> int -> numerical_t -> immediate_result (** [signed_immedidate_from_int size v] returns an immediate value of [size] bytes and value [v]. If the value [v] is outside the range that can be - represented by a signed integer with width [size] bytes, {Error] is + represented by a signed integer with width [size] bytes, [Error] is returned.*) val signed_immediate_from_int: ?size:int -> int -> immediate_result diff --git a/CodeHawk/CHB/bchlib/bCHLibTypes.mli b/CodeHawk/CHB/bchlib/bCHLibTypes.mli index 8863bf68..83148d7a 100644 --- a/CodeHawk/CHB/bchlib/bCHLibTypes.mli +++ b/CodeHawk/CHB/bchlib/bCHLibTypes.mli @@ -111,7 +111,7 @@ object ('a) doubleword with value the nearest value less than or equal to the value of [dw] that is a multiple of [n], and [d] the difference [dw - dw']. - @raises Invalid_argument if [n <= 0]. *) + raise Invalid_argument if [n <= 0]. *) method to_aligned: ?up:bool -> int -> ('a * int) (* conversion *) @@ -131,7 +131,7 @@ object ('a) (** Returns a date-time string by converting [dw#value] to a Unix.tm structure. - @raise Invalid_argument if the intermediate Unix.tm structure is invalid.*) + raise Invalid_argument if the intermediate Unix.tm structure is invalid.*) method to_time_date_string: string (** Returns a string representing a list of four characters if all four @@ -168,7 +168,7 @@ object ('a) (** Returns a standard hexadecimal notation of [dw#value] (without leading zeroes). - Example: [dw(0)#to_hex_string] returns "0x0"; dw(255)#to_hex_string] + Example: [dw(0)#to_hex_string] returns "0x0"; [dw(255)#to_hex_string] returns "0xff".*) method to_hex_string: string @@ -190,7 +190,7 @@ object ('a) returns Error.*) method subtract: 'a -> 'a traceresult - (** [dw#subtract i] returns dw(dw#value - i)] if [dw#value] is greater + (** [dw#subtract i] returns [dw(dw#value - i)] if [dw#value] is greater than or equal to [i], otherwise returns Error.*) method subtract_int: int -> 'a traceresult @@ -1205,9 +1205,9 @@ class type struct_tables_int = and associated userdata (in json): {[ - "call-back-tables": { - "0x4a5c30": "cgi_setobject_table" - } + "call-back-tables": { + "0x4a5c30": "cgi_setobject_table" + } ]} The call-back table addresses provided by the user-data are read in by @@ -1229,12 +1229,12 @@ class type struct_tables_int = For example: {[ - "call-targets": [ - {"ia": "0x40d5dc", - "fa": "0x40d510", - "tgts": [{"cba": "0x4a5c30:8"}] - } - ] + "call-targets": [ + {"ia": "0x40d5dc", + "fa": "0x40d510", + "tgts": [{"cba": "0x4a5c30:8"}] + } + ] ]} identifies the location of the indirect call instruction by its instruction address (ia) in function (fa), and indicates that the @@ -1264,17 +1264,17 @@ class type call_back_table_record_int = (** [cbtr#stringvalue n] returns the string value ([CBTag]) at offset [n]. - @raise BCH_failure if the field at offset [n] is not a [CBTag] value.*) + raise BCH_failure if the field at offset [n] is not a [CBTag] value.*) method stringvalue: int -> string (** [cbtr#intvalue n] returns the numerical value ([CBValue]) at offset [n]. - @raise BCH_failure if the field at offset [n] is not a [CBValue] value.*) + raise BCH_failure if the field at offset [n] is not a [CBValue] value .*) method intvalue: int -> numerical_t (** [cbtr#addrvalue n] returns the address value ([CBAddress]) at offset [n]. - @raise BCH_failure if the field at offset [n] is not a [CBAddress] value.*) + raise BCH_failure if the field at offset [n] is not a [CBAddress] value.*) method addrvalue: int -> string (* address at offset *) (** [cbtr#write_xml xnode] writes the field values and offset to [xnode].*) @@ -1308,12 +1308,12 @@ class type call_back_table_int = (** [cbt#type_at_offset n] returns the type of the field at offset [n]. - @raise BCH_failure if there is no field at offset [n].*) + raise BCH_failure if there is no field at offset [n].*) method type_at_offset: int -> btype_t (** [cbt#fieldname_at_offset n] returns the name of the field at offset [n]. - @raise BCH_failure if there is no field at offset [n].*) + raise BCH_failure if there is no field at offset [n].*) method fieldname_at_offset: int -> string (** Returns a list of field-offset, field-type pairs containing all fields.*) @@ -1357,7 +1357,7 @@ class type call_back_tables_int = (** [cbts#get_table addr] returns the call-back table with base address [addr]. - @raise BCH_failure if no call-back table is present at [addr].*) + raise BCH_failure if no call-back table is present at [addr].*) method get_table: string -> call_back_table_int (** [cbts#has_table addr] returns true if there is a call-back table with @@ -2201,14 +2201,20 @@ type function_stub_t = (** Identification of a call target in a call instruction.*) type call_target_t = - | StubTarget of function_stub_t (** call to dynamically linked function - external to the executable *) - | StaticStubTarget of doubleword_int * function_stub_t (** call to a statically - linked library function, with its address in the executable *) - | AppTarget of doubleword_int (** call to application function with the given address - in the executable *) - | InlinedAppTarget of doubleword_int * string (** [InlinedAppTarget (dw, name) is - call to an inlined application function with address [dw] and name [name] *) + | StubTarget of function_stub_t + (** call to dynamically linked function external to the executable *) + + | StaticStubTarget of doubleword_int * function_stub_t + (** call to a statically linked library function, with its address in + the executable *) + + | AppTarget of doubleword_int + (** call to application function with the given address in the executable *) + + | InlinedAppTarget of doubleword_int * string + (** [InlinedAppTarget (dw, name)] is + call to an inlined application function with address [dw] and name [name] *) + | WrappedTarget of doubleword_int * function_interface_t @@ -2218,18 +2224,22 @@ type call_target_t = is a thin wrapper for a call to another function with address [a] and api [fapi], (inner) call target [tgt], with [map] a map of the parameters of the inner function mapped to the values given to the outer call.*) - | VirtualTarget of function_interface_t (** call to a virtual function with a - known type signature *) + + | VirtualTarget of function_interface_t + (** call to a virtual function with a known type signature *) + | IndirectTarget of bterm_t option * call_target_t list - (** [IndirectTarget (t, tgts) is a call to a target expressed by + (** [IndirectTarget (t, tgts)] is a call to a target expressed by term [t] (if known, must be expressible by values external to the function, e.g., a global variable or a function argument, or a return value) and a list of concreate call targets.*) + | CallbackTableTarget of doubleword_int * int (** [CallbackTableTarget (dw, index)] is an indirect call where the target is in a table of pointers *) + | UnknownTarget (** indirect call to an unknown target *) @@ -2889,26 +2899,26 @@ class type function_summary_library_int = summaries before they are used in analysis.*) method read_summary_files: unit - (** {Query and access function summaries} *) + (** {1 Query and access function summaries} *) (** [get_function_dll fname] returns the name of the dll that contains a function by this name - @raise BCH_failure if [fname] is not an imported function + raise BCH_failure if [fname] is not an imported function *) method get_function_dll: string -> string (** [get_dll_function dll fname] returns the function summary for the dll function [fname] imported from dynamically loaded library [dll] - @raise BCH_failure if no summary is available for [fname] from [dll]. + raise BCH_failure if no summary is available for [fname] from [dll]. *) method get_dll_function: string -> string -> function_summary_int (** [get_so_function fname] returns the function summary for the shared object function [fname]. - @raise BCH_failure if no summary is available for [fname]. + raise BCH_failure if no summary is available for [fname]. *) method get_so_function: string -> function_summary_int @@ -2923,7 +2933,7 @@ class type function_summary_library_int = (** [get_syscall_function index] returns the function summary for the system call with index [index]. - @raise BCH_failure if no summary is available for the system call with + raise BCH_failure if no summary is available for the system call with index [index]. *) method get_syscall_function: int -> function_summary_int @@ -2931,7 +2941,7 @@ class type function_summary_library_int = (** [get_jni_function index] returns the function summary for the Java Native Method with index [index]. - @raise BCH_failure if no summary is available for the JNI function with + raise BCH_failure if no summary is available for the JNI function with index [index]. *) method get_jni_function: int -> function_summary_int @@ -3524,7 +3534,7 @@ and constant_value_variable_t = ctxt_iaddress_t (* callsite *) * string (* argument description *) * bool (* is-global address *) - (** [SideEffectValue (iaddr, name, is_global) represents the value + (** [SideEffectValue (iaddr, name, is_global)] represents the value assigned by the callee at call site [iaddr] to the argument with name [name].*) @@ -3839,7 +3849,7 @@ object method make_initial_register_value: register_t -> int -> assembly_variable_int (** [make_initial_memory_value var] returns the variable representing the - initial value of memory variable [var] at function entry.]*) + initial value of memory variable [var] at function entry.*) method make_initial_memory_value : variable_t -> assembly_variable_int (** [make_return_value addr] returns the variable representing the return @@ -3997,7 +4007,7 @@ object a constant numerical value or an index offset with fixed value variables. *) method has_fixed_value_offset: variable_t -> bool - (** Returns [true if [var] is a memory variable and its offset is a constant + (** Returns [true] if [var] is a memory variable and its offset is a constant numerical value. *) method has_constant_offset: variable_t -> bool @@ -4468,7 +4478,7 @@ class type global_memory_map_int = (* =========================================================== Function info === *) -(** @Deprecated Currenly used only for x86 *) +(** @deprecated Currently used only for x86 *) class type argument_values_int = object method add_argument_values : variable_t -> xpr_t list -> unit @@ -4653,7 +4663,7 @@ class type function_environment_int = (** [mk_offset_memory_variable memref memoff] returns a memory variable with [memref] as basis and a generic memory offset. - @raise [BCH_failure] if [memref] is an unknown memory reference. + raise BCH_failure if [memref] is an unknown memory reference. Note: eventually unknown memory references should be eliminated. *) method mk_offset_memory_variable: @@ -4751,7 +4761,7 @@ class type function_environment_int = (** {2 Memory offsets} *) - (** Returns [true if [var] is a memory variable and its offset is a constant + (** Returns [true] if [var] is a memory variable and its offset is a constant numerical value. *) method has_constant_offset: variable_t -> bool @@ -4877,7 +4887,7 @@ class type function_environment_int = (** {2 Other symbolic values} *) - (** {3 Function-call related) *) + (** {3 Function-call related} *) (** Returns [true] if [var] is the return value of a function call. *) method is_return_value: variable_t -> bool @@ -5372,7 +5382,7 @@ object representation of the instruction at address [iaddr] with that address.*) method set_instruction_bytes: ctxt_iaddress_t -> string -> unit - (** finfo#get_instruction_bytes iaddr] returns the hexadecimal + (** [finfo#get_instruction_bytes iaddr] returns the hexadecimal representation of the instruction bytes for the instruction at address [iaddr].*) method get_instruction_bytes: ctxt_iaddress_t -> string @@ -5392,7 +5402,7 @@ object (** [finfo#get_constant var] returns the constant value registered earlier for auxiliary variable [var]. - @raise [Invocation_error] if no constant is associated with [var]. + raise !Stdlib.Invocation_error if no constant is associated with [var]. *) method get_constant: variable_t -> numerical_t @@ -5536,7 +5546,7 @@ object instruction that sets the condition code used by the instruction at [iaddr]. - @raise [BCH_failure] if the instruction at [iaddr] does not have an + raise BCH_failure if the instruction at [iaddr] does not have an associated setter.*) method get_associated_cc_setter: ctxt_iaddress_t -> ctxt_iaddress_t @@ -5548,7 +5558,7 @@ object (** [finfo#get_associated_cc_user iaddr] returns the address of the instruction that uses the condition set by the instruction at [iaddr]. - @raise [BCH_failure] if the instruction at [iaddr] does not have an + raise BCH_failure if the instruction at [iaddr] does not have an associated user.*) method get_associated_cc_user: ctxt_iaddress_t -> ctxt_iaddress_t diff --git a/CodeHawk/CHB/bchlib/bCHLocation.mli b/CodeHawk/CHB/bchlib/bCHLocation.mli index db395b61..9686df65 100644 --- a/CodeHawk/CHB/bchlib/bCHLocation.mli +++ b/CodeHawk/CHB/bchlib/bCHLocation.mli @@ -50,11 +50,12 @@ makes the call to the inlined function ([ctxt_callsite]), and the address of the instruction where the inlined function should return to (usually the instruction following the call) ([ctxt_returnsite]): -[type fcontext_t = { +{[type fcontext_t = { ctxt_faddr: doubleword_int; ctxt_callsite: doubleword_int; ctxt_returnsite: doubleword_int - }] + } +]} A base location can also be wrapped into a block context to facilitate creating a delayed join by duplicating blocks. In this case the context @@ -67,7 +68,7 @@ holds the address of the block that is duplicated. A [ctxt_iaddress_t] is a string representation of an instruction address with context, with spec -[ i ( [], { faddr,iaddr } ) = iaddr +{v i ( [], { faddr,iaddr } ) = iaddr i ( [ F{ fa,cs,rs } ], { faddr,iaddr }) = iaddr i ( [ B{ js } ], { faddr,iaddr }) = iaddr @@ -81,7 +82,7 @@ context, with spec ci ( [ F{ fa1,cs1,rs1 },F{ fa2,cs2,rs2 } ], { faddr,iaddr } ) = F:cs1_F:cs2_iaddr ci ( [ B{ js } ], { faddr,iaddr }) = B:js_iaddr ci ( [ B{ js1 }, B{ js2 } ], { faddr,iaddr }) = B:js1_B:js2_iaddr -] +v} where [fa], [faddr] stand for function address, [cs] stands for call-site address, [rs] stands for return-site address, [js] stands for jump-site address (the @@ -99,7 +100,7 @@ val mk_function_context: -> context_t -(** [mk_base_location [faddr] [iaddr] creates a base_location from function +(** [mk_base_location faddr iaddr] creates a base_location from function address [faddr] and instruction address [iaddr] *) val mk_base_location: doubleword_int -> doubleword_int -> base_location_t diff --git a/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.mli b/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.mli index f68f062f..50e4dbf1 100644 --- a/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.mli +++ b/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.mli @@ -85,7 +85,7 @@ val join_integer_btypes: btype_t list -> btype_t option val mk_function_typevar: string -> type_variable_t -(** mk_data_address_typevar gaddr] returns a type variable for the global +(** [mk_data_address_typevar gaddr] returns a type variable for the global address [gaddr] (in hex). Note that this address may be either the address of a global variable, diff --git a/CodeHawk/CHB/bchlib/bCHUtilities.mli b/CodeHawk/CHB/bchlib/bCHUtilities.mli index abcd7322..b91bbc71 100644 --- a/CodeHawk/CHB/bchlib/bCHUtilities.mli +++ b/CodeHawk/CHB/bchlib/bCHUtilities.mli @@ -49,14 +49,14 @@ val track_function: (** Return the current time represented as [day year-month-mday at hr:min]. - @raise Invalid_argument if the time cannot be obtained or is invalid. + raise Invalid_argument if the time cannot be obtained or is invalid. *) val get_date_and_time: unit -> string (** Convert a Unix [tm] type into a string [weekday month mday hr:min:sec year]. - @raise Invalid_argument if the [tm] structure is invalid.*) + raise Invalid_argument if the [tm] structure is invalid.*) val make_date_and_time_string: Unix.tm -> string (* raises Invalid_argument *) diff --git a/CodeHawk/CHB/bchlib/bCHVersion.ml b/CodeHawk/CHB/bchlib/bCHVersion.ml index 4403b540..427a45c8 100644 --- a/CodeHawk/CHB/bchlib/bCHVersion.ml +++ b/CodeHawk/CHB/bchlib/bCHVersion.ml @@ -95,8 +95,8 @@ end let version = new version_info_t - ~version:"0.6.0_20250608" - ~date:"2025-06-08" + ~version:"0.6.0_20250702" + ~date:"2025-07-02" ~licensee: None ~maxfilesize: None () diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAnalysisResults.ml b/CodeHawk/CHB/bchlibarm32/bCHARMAnalysisResults.ml index 97c8e8e5..287bc44a 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAnalysisResults.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAnalysisResults.ml @@ -72,12 +72,32 @@ object (self) let loc = ctxt_string_to_location faddr ctxtiaddr in let floc = get_floc loc in let espoffset = floc#get_stackpointer_offset "arm" in + let has_control_flow = + instr#has_opcode_condition + && (match instr#get_opcode with + | Branch _ | BranchExchange _ -> false + | _ -> true) + && (Option.is_none instr#is_in_aggregate) in begin arm_dictionary#write_xml_arm_opcode node instr#get_opcode; id#write_xml_instr node instr floc; id#write_xml_sp_offset node espoffset; arm_dictionary#write_xml_arm_bytestring - node (byte_string_to_printed_string instr#get_instruction_bytes) + node (byte_string_to_printed_string instr#get_instruction_bytes); + (if has_control_flow then + let optcc = instr#get_opcode_condition in + match optcc with + | Some cc -> + let pcc = BCHARMOpcodeRecords.get_cond_mnemonic_extension cc in + let _ = node#setAttribute "brcc" pcc in + if floc#has_test_expr then + let csetter = floc#f#get_associated_cc_setter floc#cia in + node#setAttribute "brsetter" csetter + else + () + | _ -> () + else + ()) end method private write_xml_instructions (node:xml_element_int) = diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstruction.ml b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstruction.ml index c16982d8..1ca00f2b 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstruction.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstruction.ml @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs, LLC + Copyright (c) 2021-2025 Aarno Labs, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -87,8 +87,16 @@ object (self) method is_in_aggregate = in_aggregate + method has_opcode_condition = + BCHARMOpcodeRecords.is_opcode_conditional opcode + + method get_opcode_condition = + BCHARMOpcodeRecords.get_arm_opcode_condition opcode + + (* applies only to Thumb IT instruction *) method set_block_condition = blockcondition <- true + (* applies only to Thumb IT instruction *) method is_block_condition = blockcondition method set_condition_covered_by (iaddr: doubleword_int) = diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml index c19c1bb3..d2fe2a89 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs, LLC + Copyright (c) 2021-2025 Aarno Labs, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -1138,3 +1138,24 @@ let get_aggregate (iaddr: doubleword_int): arm_instruction_aggregate_int = let get_arm_jumptables (): (doubleword_int * arm_jumptable_int) list = !arm_assembly_instructions#get_jumptables + + +let get_associated_test_instr + (finfo: function_info_int) + (ctxtiaddr: ctxt_iaddress_t) + : (location_int * arm_assembly_instruction_int) option = + if finfo#has_associated_cc_setter ctxtiaddr then + let faddr = finfo#get_address in + let testiaddr = finfo#get_associated_cc_setter ctxtiaddr in + let testloc = BCHLocation.ctxt_string_to_location faddr testiaddr in + let testaddr = testloc#i in + TR.tfold + ~ok:(fun testinstr -> Some (testloc, testinstr)) + ~error:(fun e -> + begin + log_error_result __FILE__ __LINE__ e; + None + end) + (get_arm_assembly_instruction testaddr) + else + None diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.mli b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.mli index 99652843..7f1307b9 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.mli @@ -102,9 +102,20 @@ val has_aggregate: doubleword_int -> bool (** [get_aggregate iaddr] returns the aggregate registered at virtual address [iaddr]. - @raises [BCH_failure] if no aggregate is registered at [iaddr].*) + raise BCH_failure if no aggregate is registered at [iaddr].*) val get_aggregate: doubleword_int -> arm_instruction_aggregate_int val get_arm_jumptables: unit -> (doubleword_int * arm_jumptable_int) list + + +(** [get_associated_test_instr finfo iaddr] returns the location and instruction + that provides the test that is associated with the condition code of the + instruction at address [iaddr] + + If no test instruction is associated with the instruction at [iaddr] [None] + is returned. + *) +val get_associated_test_instr: + function_info_int -> ctxt_iaddress_t -> (location_int * arm_assembly_instruction_int) option diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml b/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml index d804228f..e34627eb 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml @@ -94,6 +94,20 @@ module TR = CHTraceResult CMP x, 0 GE (x >= 0 & x < e31) (N = 0, V = 0) HI (x > y) (C = 1 and Z = 0) + + + CMN x, y: + --------- + (result, carry, overflow) = AddWithCarry(x, y, 0) + N = result<31> signed(x + y) < 0 + Z = result == 0 x + y = 0 , mod(x + y) = e32 + C = carry unsigned(x + y) >= e32 + V = overflow signed(x + y) >= e31 \/ signed(x + y) <= -e31 + + setter condition predicate (on unsigned) + CMN x, 1 LE x < 0 + CMN x, 1 GT x >= 0 + *) let x2p = xpr_formatter#pr_expr @@ -178,6 +192,9 @@ let cc_expr (testopc: arm_opcode_t) (cc: arm_opcode_cc_t): (bool * xpr_t option * arm_operand_int list) = let found = ref true in + let is_one (x: xpr_t) = + match x with + | XConst (IntConst n) -> n#equal CHNumerical.numerical_one | _ -> false in let (expr, opsused) = match (testopc, cc) with @@ -189,6 +206,9 @@ let cc_expr | (BitwiseAnd (true, ACCAlways, _, x, y, _), ACCEqual) -> (XOp (XEq, [XOp (XBAnd, [vu x; vu y]); zero_constant_expr]), [x; y]) + | (BitwiseAnd (true, ACCAlways, _, x, y, _), ACCNotEqual) -> + (XOp (XNe, [XOp (XBAnd, [vu x; vu y]); zero_constant_expr]), [x; y]) + (* ---------------------------------------------------------- Compare --- *) | (Compare (ACCAlways, x, y, _), ACCEqual) -> @@ -304,6 +324,12 @@ let cc_expr | (CompareNegative (ACCAlways, x, y), ACCUnsignedHigher) -> (XOp (XGt, [XOp (XPlus, [vu x; vu y]); max32_constant_expr]), [x; y]) + | (CompareNegative (ACCAlways, x, y), ACCSignedGT) when (is_one (v y)) -> + (XOp (XGe, [v x; zero_constant_expr]), [x; y]) + + | (CompareNegative (ACCAlways, x, y), ACCSignedLE) when (is_one (v y)) -> + (XOp (XLt, [v x; zero_constant_expr]), [x; y]) + | (CompareNegative (ACCAlways, x, y), ACCCarryClear) -> (XOp (XLOr, [XOp (XLt, [XOp (XPlus, [v x; v y]); zero_constant_expr]); XOp (XGe, [v x; zero_constant_expr])]), [x; y]) @@ -407,7 +433,11 @@ let arm_conditional_expr else begin (if collect_diagnostics () then - ch_diagnostics_log#add "condition" (x2p expr)); + ch_diagnostics_log#add + "condition" + (LBLOCK [condloc#toPretty; STR ": "; (x2p expr); STR ". "; + pretty_print_list frozenVars#listOfValues + (fun v -> v#toPretty) "[" ", " "]" ])); condfloc#set_test_expr expr; testfloc#set_test_variables frozenVars#listOfPairs; (frozenVars#listOfValues, (Some expr), opsused) @@ -478,7 +508,8 @@ let arm_conditional_conditional_expr STR "; cond3: "; x2p cond3]) in - let xpr = XOp (XLOr, [XOp (XLAnd, [XOp (XLNot, [cond1]); cond3]); cond2]) in + let xpr = XOp (XLOr, [XOp (XLAnd, [XOp (XLNot, [cond1]); cond3]); + XOp (XLAnd, [cond1; cond2])]) in begin (if collect_diagnostics () then ch_diagnostics_log#add "condition" (x2p xpr)); diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.mli b/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.mli index 965b679f..96f0fa8b 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.mli @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -37,6 +37,21 @@ open BCHLibTypes (* bchlibarm32 *) open BCHARMTypes + +(** Returns the predicate condition and associated info for a condidional, + in particular it returns a tuple consisting of: + - a list of temporary variables created to preserve the (frozen) values at + the test location (testloc) for the location of the conditional (condloc) + - the predicate that expresses the joint condition of test and condition + code (cc, e.g., EQ), and + - a list of the operands used in the creation of the predicate. + + The arguments to the function are: + - [condopc]: the opcode of the conditional instruction (e.g., Branch) + - [testopc]: the opcode of the test instruction (e.g., Compare) + - [condloc]: the location of conditional instruction + - [testloc]: the location of the test instruction. + *) val arm_conditional_expr: condopc: arm_opcode_t -> testopc:arm_opcode_t @@ -45,6 +60,34 @@ val arm_conditional_expr: -> (variable_t list * xpr_t option * arm_operand_int list) +(** Returns the predicate condition and associated info as above for a composite + condtitional of the form +{v + I1: CMP x, #0 + I2: CMPEQ y, z + I3: BGT +v} + where the ultimate predicate condition depends is a composite of potentially + three test - cc combinations: + - if test1-cc1 is true, I2 is executed and the branch predicate is test2-cc2 + - if test2-cc1 is false I2 is not executed and branch predicate is test2-cc1 + leading to the composite expression +{v + (test1-cc1 /\ test2-cc2) \/ ((not test1-cc1) /\ test1-cc2) +v} + or, in the case of the example above: +{v + (x = 0 /\ y > z) \/ (x != 0 /\ x > 0) +or + (x = 0 /\ y > z) \/ (x > 0) +v} + - [condopc]: the opcode of the conditional instruction (above: BGT) + - [testopc] the opcode of the second test instruction (above: CMPEQ) + - [testtestopc]: the opcode of the first test instruction (above: CMP) + - [condloc]: the location of [condopc] + - [testloc]: the location of [testopc] + - [testtestloc]: the location of [testtestopc] + *) val arm_conditional_conditional_expr: condopc: arm_opcode_t -> testopc: arm_opcode_t diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml b/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml index 66b41a8f..9c2bbadb 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2022-2024 Aarno Labs, LLC + Copyright (c) 2022-2025 Aarno Labs, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -66,6 +66,13 @@ let arm_aggregate_kind_to_string (k: arm_aggregate_kind_t) = | ARMPredicateAssignment (inverse, op) -> let inv = if inverse then " (inverse)" else "" in "predicate assignment to " ^ op#toString ^ inv + | ARMTernaryAssignment (dst, op1, op2) -> + "ternary assignment: " + ^ dst#toString + ^ " = cond ? " + ^ op1#toString + ^ " : " + ^ op2#toString | BXCall (_, i2) -> "BXCall at " ^ i2#get_address#to_hex_string @@ -140,6 +147,11 @@ object (self) | ARMPredicateAssignment _ -> true | _ -> false + method is_ternary_assign = + match self#kind with + | ARMTernaryAssignment _ -> true + | _ -> false + method write_xml (_node: xml_element_int) = () method toCHIF (_faddr: doubleword_int) = [] @@ -149,6 +161,8 @@ object (self) STR "Aggregate of "; INT (List.length self#instrs); STR " with anchor "; + self#anchor#get_address#toPretty; + STR " "; self#anchor#toPretty; STR ": "; STR (arm_aggregate_kind_to_string self#kind)] @@ -256,6 +270,21 @@ let make_predassign_aggregate ~anchor:mov2 +let make_ternassign_aggregate + (mov1: arm_assembly_instruction_int) + (mov2: arm_assembly_instruction_int) + (dstop: arm_operand_int) + (n1: numerical_t) + (n2: numerical_t): arm_instruction_aggregate_int = + let kind = ARMTernaryAssignment(dstop, n1, n2) in + make_arm_instruction_aggregate + ~kind + ~instrs:[mov1; mov2] + ~entry:mov1 + ~exitinstr:mov2 + ~anchor:mov2 + + let disassemble_arm_instructions (ch: pushback_stream_int) (iaddr: doubleword_int) (n: int) = for _i = 1 to n do @@ -291,6 +320,8 @@ let identify_jumptable | Add (_, ACCNotUnsignedHigher, rd, rn, _, false) when rd#is_pc_register && rn#is_pc_register -> create_addls_pc_jumptable ch instr + | Add (_, ACCAlways, rd, _, _, false) when rd#is_pc_register -> + create_arm_add_pc_adr_jumptable ch instr | BranchExchange (ACCAlways, regop) when regop#is_register -> create_arm_bx_jumptable ch instr | _ -> None @@ -492,6 +523,67 @@ let identify_predicate_assignment | _ -> None +(* format of ternary assignment (in ARM): assigns one value or another depending + on the result of a test: + + MOVNE Rx, imm1 + MOVEQ Rx, imm2 + +or + + MVNEQ Rx, imm1 + MOVNE Rx, imm2 + *) +let identify_ternary_assignment + (_ch: pushback_stream_int) + (instr: arm_assembly_instruction_int): + (arm_assembly_instruction_int + * arm_assembly_instruction_int + * arm_operand_int + * numerical_t + * numerical_t) option = + let negval (n: numerical_t) = + match Xsimplify.simplify_xpr (XOp (XBNot, [Xprt.num_constant_expr n])) with + | XConst (IntConst nn) -> Some nn + | _ -> None in + match instr#get_opcode with + | Move (false, c2, rd, imm2, _, _) + | BitwiseNot (false, c2, rd, imm2, _) + when imm2#is_immediate && (has_inverse_cc c2) -> + let optn2 = + let n2 = imm2#to_numerical in + match instr#get_opcode with + | Move _ -> Some n2 + | BitwiseNot _ -> negval n2 + | _ -> None in + (match optn2 with + | Some n2 -> + let rdreg = rd#get_register in + let addr = instr#get_address in + let movinstr_r = get_arm_assembly_instruction (addr#add_int (-4)) in + (match TR.to_option movinstr_r with + | Some movinstr -> + (match movinstr#get_opcode with + | Move (false, c1, rd, imm1, _, _) + when imm1#is_immediate + && (rd#get_register = rdreg) + && (has_inverse_cc c1) + && ((Option.get (get_inverse_cc c1)) = c2) -> + Some (movinstr, instr, rd, imm1#to_numerical, n2) + | BitwiseNot (false, c1, rd, imm1, _) + when imm1#is_immediate + && (rd#get_register = rdreg) + && (has_inverse_cc c1) + && ((Option.get (get_inverse_cc c1)) = c2) -> + (match (negval imm1#to_numerical) with + | Some n1 -> Some (movinstr, instr, rd, n1, n2) + | _ -> None) + | _ -> None) + | _ -> None) + | _ -> None) + | _ -> None + + let identify_arm_aggregate (ch: pushback_stream_int) (instr: arm_assembly_instruction_int): @@ -556,4 +648,12 @@ let identify_arm_aggregate | Some (inverse, mov1, mov2, dstop) -> Some (make_predassign_aggregate inverse mov1 mov2 dstop) | _ -> None in + let result = + match result with + | Some _ -> result + | _ -> + match identify_ternary_assignment ch instr with + | Some (mov1, mov2, dstop, n1, n2) -> + Some (make_ternassign_aggregate mov1 mov2 dstop n1 n2) + | _ -> None in result diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.mli b/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.mli index d4a1ad78..9022b112 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.mli @@ -1,10 +1,10 @@ (* ============================================================================= - CodeHawk Binary Analyzer + CodeHawk Binary Analyzer Author: Henny Sipma ------------------------------------------------------------------------------ The MIT License (MIT) - - Copyright (c) 2022-2024 Aarno Labs, LLC + + Copyright (c) 2022-2025 Aarno Labs, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -12,10 +12,10 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -25,18 +25,169 @@ SOFTWARE. ============================================================================= *) -(** Sequence of consecutive assembly instructions that represents a semantic unit. - - Examples: - - switch statement constructed by - - table branch instructions (TBB/TBH), or - - load from table into pc - - indirect jump from table - - - question expression constructed by Thumb-2 if-then instruction - - In translation to CHIF, semantics are obtained from the anchor, all other - instructions belonging to the aggregate are ignored. + +(** {1 Instruction aggregates: Overview} *) + +(** {2 Description} + + An instruction aggregate is a (usually, but not always, contiguous) + sequence of two or more instructions that is treated as a single + semantic unit. The semantics for the entire sequence is assigned to + one of the instructions (often the last one in the sequence) and all + other instructions are considere no-ops by all subsequent analyses. + + Instruction aggregates cover a variety of constructs ranging from + predicate and ternary assignments to jump tables. + + {2 Motivation} + + The rationale behind combining a sequence of instructions into an + aggregate is that the collective action of the individual instructions + combined is not easily apparent from considering their semantics in + isolation. The instructions usually collaborate in a very specific + way to achieve a particular result, with operands from different instructions + playing playing a specific role. This is especially true of the jump + table aggregates. For some of the other aggregate kinds the semantics + of the individual instructions combined would be similar, but analysis + of the aggregate is more efficient and precise. For example, this is + the case with the predicate assignment: + {[ + + MOVcc Rx, #1 + MOVcc' Rx, #0 where cc' is (not cc) + ]} + The two MOV instructions can be combined into one assignment of the + boolean : + {[ + Rx := + ]} + Translating and analyzing the two instructions in isolation produces + two joins, and potentially three reaching definitions for a subsequent + use of Rx. In contrast, the two instructions combined into one + predicate assignment produces zero joins, and only a single reaching + definition for a subsequent use of Rx, a considerable improvement. + + {2 Identification} + + Instruction aggregates are identified during disassembly by pattern + matching. Identification is entirely syntactic, so only the information + present in the instructions themselves, such as opcode and operands, + are used; not the possible values that those operands may have, unless + they are immediates. + + {2 Representation} + + The aggregate is represented by objects of the class type + [arm_instruction_aggregate_int]. These objects contain the kind of + aggregate, a list of the contributing instructions, and information + about its entry, exit, and anchor address. The anchor is the + instruction to which the full semantics of the aggregate is assigned. + + Cross references to instances are recorded in the [arm_assembly_instruction_int] + instances of the instructions that make up the aggregate to enable + triggering proper translation and analysis. The collection of all + aggregates themselves is maintained in the singleton object of type + [arm_assembly_instructions_int]. + + Most aggregates are contained within a single basic block and do not + affect control flow. The exceptions are jump tables, whose components + are directly incorporated into the CFG during disassembly and require + no further semantic handling. + + {2 Translation into CHIF} + + The semantics of the aggregate is explicated in the translation into + CHIF. In general, instructions are individually translated into CHIF + (in the function [translate_arm_instruction]). In the case of aggregates + CHIF is generated for the full semantics at the anchor instruction, + while all other other instructions in the aggregate are treated as + no-ops. In the case of jump tables the full semantics is already + incorporated in the CFG during disassembly and all instructions in + the aggregate are treated as no-ops. + + {2 Transfer to front end} + + The kind and structure of aggregates and analysis results involving + their components are communicated to the (python) front end in the + instruction results data contained in the function opcode dictionary + (see bCHFnARMDictionary). The format varies with the kind of aggregate, + as each kind has different types of values that characterize its + operation. In the (python) front end these values are made accessible + to the various instructions in the [InstrXData] objects created for + each instruction. The documentation of each aggregate kind below and + elsewhere presents more details on the actual format for each of these. + + {2 Tests} + + Currenly only two of the kinds of aggregates have associated unit tests: + - bCHARMJumpTableTest + - bCHThumbITSequenceTest + These tests contain instances of code fragments encountered in actual + binaries and give some insight in their use. + + + {1 Predicate Assignments} + + {2 Description} + + A predicate assignment aggregate consists of two assignment instructions + that combine into a single assignment of a boolean predicate. The pattern + for the aggregate is two adjacent MOV instructions of the form: + {[ + + MOVcc Rx, #1 + MOVcc' Rx, #0 where cc' is (not cc) + ]} + or + {[ + + MOVcc' Rx, #0 + MOVcc Rx, #1 where cc is not (cc') + ]} + In all executions exactly one of these two MOV instructions is executed. + If the joint condition is true Rx is assigned 1, if it is false + Rx is assigned 0, and thus the postcondition of these two instructions + is essentially the assignment of the boolean . + + {3 Example} + + {[ + + MOVNE R1, #0 + MOVEQ R1, #1 + ]} + is translated into + {[ R1 := (R0 == 5) ]} + + {2 Representation} + + The [arm_instruction_aggregate] instance of the predicate assignment + records the address of the second instruction as the anchor, the + destination operand, and whether the predicate is to be + inverted, where cc is the condition code for the anchor instruction. + The predicate is to be inverted if the anchor instruction assigns 0. + + {2 Translation to CHIF} + + The first MOV instruction is translated into a no-op. The second + MOV instruction is translated into the assignment + {[ Rx := [not] predicate ]} + with the single defined variable Rx, and with variables used all + variables referenced in the predicate. + + If a predicate cannot be constructed or derived, the destination + variable Rx is abstracted. + + {2 Transfer to front end} + + The first instruction is tagged as 'subsumed' by the second + instruction, and thus to be treated as a no-op. The second + instruction is tagged with 'agg:predassign' with a list of + dependents (the first instruction in this case). It lists as + a variable the destination operand, along with its (high) + uses. If a predicate was construction, it lists + (the possibly inverse of) the predicate in its original and + rewritten form, along with its reaching definitions. *) (* bchlib *) diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.ml b/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.ml index 1b315804..a02b6461 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.ml @@ -240,6 +240,40 @@ let ldrtest | _ -> false +let ldrs2test + (tmpreg: arm_reg_t) + (basereg: arm_reg_t) + (indexreg: arm_reg_t) + (instr: arm_assembly_instruction_int) = + match instr#get_opcode with + | LoadRegister (ACCAlways, rt, rn, rm, mem, false) -> + let _ = + chlog#add "ldrs2-test" + (LBLOCK [instr#get_address#toPretty; + STR ": "; + STR (BCHARMOpcodeRecords.arm_opcode_to_string instr#get_opcode); + STR " with "; + STR (BCHCPURegisters.armreg_to_string tmpreg); + STR ", "; + STR (BCHCPURegisters.armreg_to_string basereg); + STR ", "; + STR (BCHCPURegisters.armreg_to_string indexreg) + ]) in + rt#is_register + && rt#get_register = tmpreg + && rn#is_register + && rn#get_register = basereg + && rm#is_register + && rm#get_register = indexreg + && (match mem#get_kind with + | ARMOffsetAddress (_, _, offset, _, _, _, _) -> + (match offset with + | ARMShiftedIndexOffset (_, ARMImmSRT (SRType_LSL, 2), _) -> true + | _ -> false) + | _ -> false) + | _ -> false + + let ldrbtest (basereg: arm_reg_t) (indexreg: arm_reg_t) @@ -899,6 +933,140 @@ let create_arm_add_pc_jumptable create_arm_add_pc_h_jumptable ch addpcinstr +(* ADD-PC-LDR patterns (in ARM) + + size is obtained from CMP instruction's immediate value + table start address is obtained from the ADR instruction + + [4 bytes] CMP indexreg, maxcase + [4 bytes] BHI default address + [4 bytes] ADR basereg, start_address + [4 bytes] LDR scindexreg, [basereg, indexreg, LSL#2] + [4 bytes] ADD PC, basereg, scindexreg + + Notes: These instructions always appear in the same order, but they may + be arbitrarily interleaved with other instructions, hence the large number + of offsets in finding the various instructions below. + + Notes: encountered in a binary compiled with clang 18.1.3 on an arm64 host, + cross-compiled to arm32. + *) +let is_arm_add_pc_adr_jumptable + (addpcinstr: arm_assembly_instruction_int): + (arm_assembly_instruction_int (* CMP instr *) + * arm_assembly_instruction_int (* BHI instr *) + * arm_assembly_instruction_int (* ADR instr *) + * arm_assembly_instruction_int) option = (* LDR instr *) + match addpcinstr#get_opcode with + | Add (_, ACCAlways, rd, baseregop, scindexregop, false) + when rd#is_pc_register + && baseregop#is_register + && scindexregop#is_register -> + let addr = addpcinstr#get_address in + let basereg = baseregop#get_register in + let scindexreg = scindexregop#get_register in (* scaled index register *) + let cmptestf (instr: arm_assembly_instruction_int) = + match instr#get_opcode with + | Compare (_, rn, imm, _) -> rn#is_register && imm#is_immediate + | _ -> false in + let cmpinstr_o = + find_instr cmptestf [(-16); (-20); (-24); (-28); (-32); (-36)] addr in + (match cmpinstr_o with + | Some cmpinstr -> + (match cmpinstr#get_opcode with + | Compare (_, indexregop, _imm, _) -> + let indexreg = indexregop#get_register in + let adrtestf = adr_reg_test basereg in + let ldrs2testf = ldrs2test scindexreg basereg indexreg in + let adrinstr_o = find_instr adrtestf [(-8); (-12)] addr in + let bhiinstr_o = find_instr bhi_test [(-12); (-16)] addr in + let ldrs2instr_o = find_instr ldrs2testf [(-4); (-8)] addr in + let _ = + if Option.is_none adrinstr_o then + chlog#add "adr-jump table failure" + (LBLOCK [addpcinstr#get_address#toPretty; STR ": adrinstr"]) in + let _ = + if Option.is_none bhiinstr_o then + chlog#add "adr-jump table failure" + (LBLOCK [addpcinstr#get_address#toPretty; STR ": bhiinstr"]) in + let _ = + if Option.is_none ldrs2instr_o then + chlog#add "adr-jump table failure" + (LBLOCK [addpcinstr#get_address#toPretty; STR ": ldrs2instr"]) in + (match (bhiinstr_o, adrinstr_o, ldrs2instr_o) with + | (Some bhiinstr, Some adrinstr, Some ldrs2instr) -> + Some (cmpinstr, bhiinstr, adrinstr, ldrs2instr) + | _ -> None) + | _ -> + let _ = + chlog#add "arm-jumptable cmpinstr failure" + (LBLOCK [addpcinstr#get_address#toPretty]) in + None) + | _ -> + let _ = + chlog#add "arm-jumptable cmpinstr failure (not found)" + (LBLOCK [addpcinstr#get_address#toPretty]) in + None) + | _ -> None + + +let create_arm_add_pc_adr_jumptable + (ch: pushback_stream_int) + (addpcinstr: arm_assembly_instruction_int): + (arm_assembly_instruction_int list * arm_jumptable_int) option = + match is_arm_add_pc_adr_jumptable addpcinstr with + | None -> + begin + chlog#add "arm-jumptable: add-pc-addr (None)" + (LBLOCK [addpcinstr#get_address#toPretty]); + None + end + | Some (cmpinstr, bhiinstr, adrinstr, ldrinstr) -> + match (cmpinstr#get_opcode, + bhiinstr#get_opcode, + adrinstr#get_opcode, + ldrinstr#get_opcode) with + | (Compare (_, _, imm, _), + Branch (_, tgtop, _), + Adr (_, _, adrop), + LoadRegister (_, dstregop, _, _, _, _)) + when tgtop#is_absolute_address && adrop#is_absolute_address -> + let iaddr = addpcinstr#get_address in + let defaulttgt = tgtop#get_absolute_address in + let size = imm#to_numerical#toInt in + let jtaddr = adrop#get_absolute_address in + let targets = ref [] in + let _ = + for i = 0 to size do + let offset = ch#read_num_signed_doubleword in + targets := (iaddr#add_int (4 + offset#toInt), i) :: !targets + done in + let endaddr = jtaddr#add_int (4 + (4 * size)) in + let jt:arm_jumptable_int = + make_arm_jumptable + ~end_address:endaddr + ~start_address:jtaddr + ~default_target:defaulttgt + ~targets:(List.rev !targets) + ~index_operand:dstregop + () in + let instrs = + [cmpinstr; bhiinstr; adrinstr; ldrinstr; addpcinstr] in + begin + chlog#add "arm-jumptable: add_pc_addr" + (LBLOCK [addpcinstr#get_address#toPretty; + STR ": of size: "; + INT size]); + Some (instrs, jt) + end + | _ -> + begin + chlog#add "arm-jumptable: add_pc_addr" + (LBLOCK [addpcinstr#get_address#toPretty; + STR ": unsuccesful"]); + None + end + (* format of BX-based jumptable (in Thumb-22): Type 1 diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.mli b/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.mli index e060b0fa..64ff5c89 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMJumptable.mli @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2022-2024 Aarno Labs LLC + Copyright (c) 2022-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -80,6 +80,12 @@ val create_arm_add_pc_jumptable: -> (arm_assembly_instruction_int list * arm_jumptable_int) option +val create_arm_add_pc_adr_jumptable: + pushback_stream_int + -> arm_assembly_instruction_int + -> (arm_assembly_instruction_int list * arm_jumptable_int) option + + (** [create_bx_jumptable ch instr] creates a jump table targeted by a BX instruction [instr] by processing the associated list of addresses from stream [ch].*) diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMOpcodeRecords.ml b/CodeHawk/CHB/bchlibarm32/bCHARMOpcodeRecords.ml index e6d0d1d2..eea9de06 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMOpcodeRecords.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMOpcodeRecords.ml @@ -198,7 +198,7 @@ let get_record (opc:arm_opcode_t): 'a opcode_record_t = operands = [ rd; rn; imm ]; flags_set = if s then [APSR_N; APSR_Z; APSR_C] else []; ccode = Some c; - ida_asm = (fun f -> f#opscc ~thumbw:tw "AND" c [rd; rn; imm]) + ida_asm = (fun f -> f#opscc ~thumbw:tw "AND" ~writeback:s c [rd; rn; imm]) } | BitwiseBitClear (s, c, rd, rn, rm, tw) -> { mnemonic = "BIC"; diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli b/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli index dae4ee32..6a0d4330 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs, LLC + Copyright (c) 2021-2025 Aarno Labs, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -1423,6 +1423,7 @@ class type arm_assembly_instruction_int = method get_instruction_bytes: string method get_bytes_ashexstring: string method get_non_code_block: not_code_t + method get_opcode_condition: arm_opcode_cc_t option method condition_covered_by: doubleword_int (* predicates *) @@ -1438,6 +1439,7 @@ class type arm_assembly_instruction_int = method is_aggregate_entry: bool method is_aggregate_exit: bool method is_aggregate_anchor: bool + method has_opcode_condition: bool (* i/o *) method write_xml: xml_element_int -> unit @@ -1447,6 +1449,20 @@ class type arm_assembly_instruction_int = end +(** {1 Instruction aggregates} *) + +(** An instruction aggregate is a (usually, but not always, contiguous) + sequence of two or more instructions that is treate as a single + semantic unit. The semantics for the entire sequence is assigned to + one of the instructions (often the last one in the sequence) and all + other instructions are considere no-ops by all subsequent analyses. + + Instruction aggregates cover a variety of constructs ranging from + predicate and ternary assignments to jump tables. + + A more detailed description is provided in [bCHARMInstructionAggregate]. + *) + type arm_assembly_instruction_result = arm_assembly_instruction_int traceresult @@ -1532,6 +1548,7 @@ type arm_aggregate_kind_t = * arm_assembly_instruction_int * arm_assembly_instruction_int | ARMPredicateAssignment of bool * arm_operand_int + | ARMTernaryAssignment of arm_operand_int * numerical_t * numerical_t | BXCall of arm_assembly_instruction_int * arm_assembly_instruction_int @@ -1558,6 +1575,7 @@ class type arm_instruction_aggregate_int = method is_pseudo_ldrsh: bool method is_pseudo_ldrsb: bool method is_predicate_assign: bool + method is_ternary_assign: bool (* i/o *) method write_xml: xml_element_int -> unit diff --git a/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml b/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml index 6c6ab00d..43fc688b 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml @@ -163,7 +163,7 @@ let get_successors | Branch (_, op, _) | BranchExchange (_, op) when op#is_register && op#get_register = ARLR -> - (next ()) + (next ()) @ [wordmax] (* return via Move *) | Move (_, ACCAlways, dst, src, _, _) diff --git a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARMInstruction.ml b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARMInstruction.ml index 64e1fb57..7482df6f 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARMInstruction.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARMInstruction.ml @@ -1703,6 +1703,18 @@ let parse_misc_6_type (instr: doubleword_int) (cond: int) = (* VMOV , , *) VectorMoveDDS (c, VfpNone, rt WR, rt2 WR, rtd WR, dm RD) + (* FLDMIAX{}{}>Rn>{!}, *) + (* <6>01001DW11011<-imm7>1 *) + | (1, 1, 11) when (bv 22) = 0 && (bv 0) = 1 -> + let d = prefix_bit (bv 22) (b 15 12) in + let rnreg = get_arm_reg (b 19 16) in + let rn = arm_register_op rnreg in + let regs = (b 7 1) in + let rl = arm_extension_register_list_op XDouble d regs in + let mem = mk_arm_mem_multiple_op ~size:8 rnreg regs in + (* FLXMIAX, *) + FLoadMultipleIncrementAfter (false, c, rn RD, rl WR, mem RD) + (* <6>01D11<13><10><-imm8-> *) (* VPOP - A2 *) | (1, 3, 10) when (b 19 16) = 13 -> let d = postfix_bit (bv 22) (b 15 12) in @@ -1824,6 +1836,23 @@ let parse_misc_6_type (instr: doubleword_int) (cond: int) = (* FSTMIAX , *) FStoreMultipleIncrementAfter (false, c, rn RD, rl RD, mem WR) + (* <6>01D10<-imm8-> *) (* STCL - A1 *) + | (1, 2, 1) -> + let isindex = (bv 24) = 1 in + let isadd = (bv 23) = 1 in + let iswback = (bv 21) = 1 in + let islong = (bv 22) = 1 in + let crd = b 15 12 in + let coproc = b 11 8 in + let imm32 = 4 * (b 7 0) in + let offset = ARMImmOffset imm32 in + let rnreg = get_arm_reg (b 19 16) in + let mem = + mk_arm_offset_address_op + ~align:4 rnreg offset ~isadd ~isindex ~iswback in + (* STC{L} , , [, #+/-]{!} *) + StoreCoprocessor (islong, false, c, coproc, crd, mem WR, None) + (* <6>01D00<11><-imm8-> *) (* VSTM - A1-wb *) | (1, 2, 11) when (bv 0) = 0 -> let d = prefix_bit (bv 22) (b 15 12) in @@ -4738,6 +4767,27 @@ let parse_cond15 (instr: doubleword_int) (iaddr: doubleword_int) = (* BLX