diff --git a/CodeHawk/CH/xprlib/xsimplify.ml b/CodeHawk/CH/xprlib/xsimplify.ml index 8a88ecd9..611d1049 100644 --- a/CodeHawk/CH/xprlib/xsimplify.ml +++ b/CodeHawk/CH/xprlib/xsimplify.ml @@ -45,7 +45,7 @@ exception XSimplificationProblem of CHPretty.pretty_t let xpr_to_pretty e = xpr_printer#pr_expr e -let x2p = xpr_to_pretty +(* let x2p = xpr_to_pretty *) type e_struct_t = @@ -378,8 +378,6 @@ and reduce_minus (m: bool) (e1: xpr_t) (e2: xpr_t) = (* ((&x + y) - z) ==> (&x + (y - z)) *) | (XOp (XPlus, [XOp ((Xf "addressofvar"), [x]); y]), _) -> - let _ = pr_debug [STR " DEBUG: reduce_minus: "; - STR "e1: "; x2p e1; STR "; e2: "; x2p e2; NL] in rs XPlus [XOp ((Xf "addressofvar"), [x]); XOp (XMinus, [y; e2])] (* (x << 3) - x) --> (7 * x) *) diff --git a/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml b/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml index f46f564c..ff8e47a5 100644 --- a/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml +++ b/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml @@ -6,7 +6,7 @@ Copyright (c) 2005-2020 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -604,6 +604,8 @@ let main () = pr_timing [STR "dictionary saved"]; save_global_memory_map (); pr_timing [STR "global-locations saved"]; + arm_analysis_results#save; + pr_timing [STR "analysis results saved"]; save_interface_dictionary (); pr_timing [STR "interface dictionary saved"]; save_bcdictionary (); diff --git a/CodeHawk/CHB/bchlib/bCHFloc.ml b/CodeHawk/CHB/bchlib/bCHFloc.ml index 3f94ab67..364c67d5 100644 --- a/CodeHawk/CHB/bchlib/bCHFloc.ml +++ b/CodeHawk/CHB/bchlib/bCHFloc.ml @@ -676,7 +676,7 @@ object (self) let memoff_r = TR.tbind (fun memvar -> - let memtype = self#get_variable_type memvar in + let memtype = self#env#get_variable_type memvar in let memtype = match memtype with | Some t -> t @@ -703,7 +703,8 @@ object (self) match optbasetype with | Some t when is_pointer t -> ptr_deref t | _ -> t_unknown in - address_memory_offset basetype (num_constant_expr memoffset)) + address_memory_offset basetype + ~tgtsize:(Some size) (num_constant_expr memoffset)) (self#env#get_variable base#getSeqNumber) in mk_memvar memref_r memoff_r @@ -717,6 +718,9 @@ object (self) let addr = XOp (XPlus, [varx; num_constant_expr numoffset]) in let address = simplify_xpr (inv#rewrite_expr addr) in match address with + | XConst (IntConst n) when n#equal CHNumerical.numerical_zero -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Address is zero"] | XConst (IntConst n) -> let dw = numerical_mod_to_doubleword n in if system_info#get_image_base#le dw then @@ -1319,23 +1323,139 @@ object (self) method private get_variable_type (v: variable_t): btype_t option = if self#f#env#is_initial_register_value v then let reg_r = self#f#env#get_initial_register_value_register v in - let param_r = - TR.tbind - (fun reg -> - if self#f#get_summary#has_parameter_for_register reg then - Ok (self#f#get_summary#get_parameter_for_register reg) - else - Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ (p2s v#toPretty) - ^ " does not have an associated parameter"]) - reg_r in TR.tfold_default - (fun param -> Some param.apar_type) - (self#env#get_variable_type v) - param_r + (fun reg -> + if self#f#get_summary#has_parameter_for_register reg then + let param = self#f#get_summary#get_parameter_for_register reg in + Some param.apar_type + else + self#env#get_variable_type v) + None + reg_r + else if self#env#is_initial_memory_value v then + let memvar_r = self#env#get_init_value_variable v in + TR.tfold + ~ok:self#get_variable_type + ~error:(fun e -> + begin log_error_result __FILE__ __LINE__ e; None end) + memvar_r else self#env#get_variable_type v + method convert_variable_offsets + ?(size=None) (v: variable_t): variable_t traceresult = + if self#env#is_basevar_memory_variable v then + let basevar_r = self#env#get_memvar_basevar v in + let offset_r = self#env#get_memvar_offset v in + let cbasevar_r = TR.tbind self#convert_value_offsets basevar_r in + let basetype_r = TR.tmap self#get_variable_type cbasevar_r in + let tgttype_r = + TR.tbind + (fun basetype -> + match basetype with + | Some (TPtr (t, _)) -> Ok t + | Some t -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Type " ^ (btype_to_string t) + ^ " is not a pointer"] + | _ -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "No type for variable " + ^ (p2s v#toPretty) + ^ "with basevar " + ^ (p2s (TR.tget_ok cbasevar_r)#toPretty)]) basetype_r in + let coffset_r = + TR.tbind + (fun offset -> + match offset with + | ConstantOffset (n, NoOffset) -> + TR.tbind + (fun tgttype -> + address_memory_offset + ~tgtsize:size tgttype (num_constant_expr n)) tgttype_r + | _ -> Ok offset) offset_r in + TR.tbind + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " ^ (p2s v#toPretty)) + (fun cbasevar -> + TR.tbind + (fun coffset -> + self#env#mk_basevar_memory_variable cbasevar coffset + ) coffset_r) + cbasevar_r + else + Ok v + + method convert_value_offsets + ?(size=None) (v: variable_t): variable_t traceresult = + if self#env#is_basevar_memory_value v then + let basevar_r = self#env#get_memval_basevar v in + let offset_r = self#env#get_memval_offset v in + let cbasevar_r = TR.tbind self#convert_value_offsets basevar_r in + let basetype_r = TR.tmap self#get_variable_type cbasevar_r in + let tgttype_r = + TR.tbind + (fun basetype -> + match basetype with + | Some (TPtr (t, _)) -> Ok t + | Some t -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Type " ^ (btype_to_string t) + ^ " is not a pointer"] + | _ -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "No type for variable " + ^ (p2s v#toPretty) + ^ "with basevar " + ^ (p2s (TR.tget_ok cbasevar_r)#toPretty)]) basetype_r in + let coffset_r = + TR.tbind + (fun offset -> + match offset with + | NoOffset -> + TR.tbind + (fun tgttype -> + address_memory_offset + ~tgtsize:size tgttype (int_constant_expr 0)) tgttype_r + | ConstantOffset (n, NoOffset) -> + TR.tbind + (fun tgttype -> + address_memory_offset + ~tgtsize:size tgttype (num_constant_expr n)) tgttype_r + | _ -> Ok offset) offset_r in + TR.tbind + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " ^ (p2s v#toPretty)) + (fun cbasevar -> + TR.tbind + (fun coffset -> + let memvar_r = + self#env#mk_basevar_memory_variable cbasevar coffset in + TR.tbind self#env#mk_initial_memory_value memvar_r + ) coffset_r) + cbasevar_r + else + Ok v + + method convert_xpr_offsets ?(size=None) (x: xpr_t): xpr_t traceresult = + let rec aux exp = + match exp with + | XVar v when self#env#is_basevar_memory_value v -> + TR.tmap + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) + (fun v -> XVar v) (self#convert_value_offsets ~size v) + | XVar v when self#env#is_basevar_memory_variable v -> + TR.tmap + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) + (fun v -> XVar v) (self#convert_variable_offsets ~size v) + | XOp ((Xf "addressofvar"), [XVar v]) -> + TR.tmap + ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) + (fun v -> XVar v) (self#convert_variable_offsets ~size v) + | XOp (op, [xx]) -> TR.tmap (fun x -> XOp (op, [x])) (aux xx) + | XOp (op, [x1; x2]) -> + TR.tmap2 (fun x1 x2 -> XOp (op, [x1; x2])) (aux x1) (aux x2) + | _ -> Ok exp in + aux x + method get_xpr_type (x: xpr_t): btype_t option = match x with | XVar v -> self#get_variable_type v @@ -1351,9 +1471,12 @@ object (self) | [base] -> let offset = simplify_xpr (XOp (XMinus, [x; XVar base])) in let memref_r = self#env#mk_base_variable_reference base in - let vartype = self#get_variable_type base in + let vartype = self#env#get_variable_type base in let vartype = match vartype with None -> t_unknown | Some t -> t in - let memoff_r = address_memory_offset vartype offset in + let rvartype = TR.tvalue (resolve_type vartype) ~default:t_unknown in + let basetype = + if is_pointer rvartype then ptr_deref rvartype else t_unknown in + let memoff_r = address_memory_offset basetype offset in (* (match offset with | XConst (IntConst n) -> Ok (ConstantOffset (n, NoOffset)) @@ -1784,7 +1907,7 @@ object (self) (* if rhs is the address of a global variable create an address-of expression for that global variable. *) match rhs with - | XConst (IntConst n) -> + | XConst (IntConst n) when n#gt CHNumerical.numerical_zero -> let dw = numerical_mod_to_doubleword n in if memmap#has_location dw then TR.tfold @@ -1971,7 +2094,7 @@ object (self) | [RegisterParameter (r, _)] -> let argvar = self#env#mk_register_variable r in self#rewrite_variable_to_external argvar - | [GlobalParameter (a, _)] -> + | [GlobalParameter (a, _)] when not (a#equal wordzero) -> let argvar = self#env#mk_global_variable a#to_numerical in (match argvar with | Error e -> @@ -2013,7 +2136,7 @@ object (self) method evaluate_summary_address_term (t:bterm_t) = match t with | ArgValue p -> self#evaluate_fts_address_argument p - | NumConstant num -> + | NumConstant num when num#gt CHNumerical.numerical_zero -> log_tfold_default (mk_tracelog_spec ~tag:"evaluate_summary_address_term" @@ -2031,7 +2154,7 @@ object (self) None) None (numerical_to_doubleword num) - | ArgAddressedValue (subT,NumConstant offset) -> + | ArgAddressedValue (subT, NumConstant offset) -> let optBase = self#evaluate_summary_address_term subT in begin match optBase with diff --git a/CodeHawk/CHB/bchlib/bCHFunctionData.ml b/CodeHawk/CHB/bchlib/bCHFunctionData.ml index 8185afb0..44b0cf74 100644 --- a/CodeHawk/CHB/bchlib/bCHFunctionData.ml +++ b/CodeHawk/CHB/bchlib/bCHFunctionData.ml @@ -78,6 +78,7 @@ let stackvar_intro_to_string (svi: stackvar_intro_t) = let function_annotation_to_string (a: function_annotation_t) = (String.concat "\n" (List.map regvar_intro_to_string a.regvarintros)) + ^ "\n" ^ (String.concat "\n" (List.map stackvar_intro_to_string a.stackvarintros)) @@ -174,6 +175,16 @@ object (self) None a.regvarintros | _ -> None + method get_stackvar_intro (offset: int): stackvar_intro_t option = + match self#get_function_annotation with + | Some a -> + List.fold_left (fun acc svi -> + match acc with + | Some _ -> acc + | _ -> if svi.svi_offset = offset then Some svi else None) + None a.stackvarintros + | _ -> None + method has_regvar_type_annotation (iaddr: doubleword_int): bool = match self#get_function_annotation with | Some a -> @@ -182,6 +193,14 @@ object (self) a.regvarintros | _ -> false + method has_stackvar_type_annotation (offset: int): bool = + match self#get_function_annotation with + | Some a -> + List.exists + (fun svi -> svi.svi_offset = offset && Option.is_some svi.svi_vartype) + a.stackvarintros + | _ -> false + method has_regvar_type_cast (iaddr: doubleword_int): bool = match self#get_function_annotation with | Some a -> @@ -189,6 +208,13 @@ object (self) (fun rvi -> rvi.rvi_iaddr#equal iaddr && rvi.rvi_cast) a.regvarintros | _ -> false + method has_stackvar_type_cast (offset: int): bool = + match self#get_function_annotation with + | Some a -> + List.exists + (fun svi -> svi.svi_offset = offset && svi.svi_cast) a.stackvarintros + | _ -> false + method get_regvar_type_annotation (iaddr: doubleword_int): btype_t traceresult = let opttype = match self#get_function_annotation with @@ -221,6 +247,39 @@ object (self) __FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " ^ "No register var annotation found at " ^ iaddr#to_hex_string] + method get_stackvar_type_annotation (offset: int): btype_t traceresult = + let opttype = + match self#get_function_annotation with + | None -> + Some + (Error [ + __FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Function " ^ faddr#to_hex_string ^ " does not have annotations"]) + | Some a -> + List.fold_left + (fun acc svi -> + match acc with + | Some _ -> acc + | _ -> + if svi.svi_offset = offset then + match svi.svi_vartype with + | Some t -> Some (Ok t) + | _ -> + Some + (Error [ + __FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Stack var annotation at offset " + ^ (string_of_int offset) + ^ " does not have a type"]) + else + acc) None a.stackvarintros in + match opttype with + | Some r -> r + | None -> + Error [ + __FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "No stackvar annotation found at offset " ^ (string_of_int offset)] + method add_inlined_block (baddr:doubleword_int) = inlined_blocks <- baddr :: inlined_blocks @@ -504,31 +563,33 @@ let read_xml_stackvar_intro (node: xml_element_int): stackvar_intro_t traceresul else if not (has "name") then Error ["stackvar intro without name"] else - let svi_offset = geti "offset" in + let svi_offset = (-(geti "offset")) in let svi_name = get "name" in - let svi_vartype = + let (svi_vartype, svi_cast) = if has "typename" then let typename = get "typename" in + let iscast = (has "cast") && ((get "cast") = "yes") in TR.tfold ~ok:(fun btype -> if has "ptrto" && (get "ptrto") = "yes" then - Some (t_ptrto btype) + (Some (t_ptrto btype), iscast) else if has "arraysize" then let arraysize = geti "arraysize" in - Some (t_array btype arraysize) + (Some (t_array btype arraysize), iscast) else - Some btype) + (Some btype, iscast)) ~error:(fun e -> begin log_error_result __FILE__ __LINE__ e; - None + (None, false) end) (convert_string_to_type typename) else - None in + (None, false) in Ok {svi_offset = svi_offset; svi_name = svi_name; - svi_vartype = svi_vartype} + svi_vartype = svi_vartype; + svi_cast = svi_cast} let read_xml_function_annotation (node: xml_element_int) = diff --git a/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml b/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml index aafe5c33..90848ebb 100644 --- a/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml +++ b/CodeHawk/CHB/bchlib/bCHFunctionInfo.ml @@ -800,6 +800,14 @@ object (self) let avar = varmgr#make_memory_variable memref ~size offset in self#mk_variable avar + method mk_basevar_memory_variable + ?(size=4) + (basevar: variable_t) + (offset: memory_offset_t): variable_t traceresult = + let memref_r = self#mk_base_variable_reference basevar in + TR.tmap + (fun memref -> self#mk_offset_memory_variable ~size memref offset) + memref_r (* method mk_index_offset_global_memory_variable diff --git a/CodeHawk/CHB/bchlib/bCHGlobalMemoryMap.ml b/CodeHawk/CHB/bchlib/bCHGlobalMemoryMap.ml index b6e9470d..40943541 100644 --- a/CodeHawk/CHB/bchlib/bCHGlobalMemoryMap.ml +++ b/CodeHawk/CHB/bchlib/bCHGlobalMemoryMap.ml @@ -483,7 +483,10 @@ object (self) ?(initialvalue = None) ?(size = None) (address: doubleword_int): global_location_int traceresult = - if H.mem locations address#index then + if address#lt (TR.tget_ok (BCHDoubleword.int_to_doubleword 20)) then + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Illegal global address: " ^ address#to_hex_string] + else if H.mem locations address#index then begin ch_error_log#add "duplicate global location" diff --git a/CodeHawk/CHB/bchlib/bCHLibTypes.mli b/CodeHawk/CHB/bchlib/bCHLibTypes.mli index d5eb51f9..7f51ba65 100644 --- a/CodeHawk/CHB/bchlib/bCHLibTypes.mli +++ b/CodeHawk/CHB/bchlib/bCHLibTypes.mli @@ -1507,7 +1507,8 @@ type regvar_intro_t = { type stackvar_intro_t = { svi_offset: int; svi_name: string; - svi_vartype: btype_t option + svi_vartype: btype_t option; + svi_cast: bool } @@ -1551,7 +1552,9 @@ class type function_data_int = method get_function_name: string (* demangled or combination of all names *) method get_function_annotation: function_annotation_t option method get_regvar_type_annotation: doubleword_int -> btype_t traceresult + method get_stackvar_type_annotation: int -> btype_t traceresult method get_regvar_intro: doubleword_int -> regvar_intro_t option + method get_stackvar_intro: int -> stackvar_intro_t option method get_inlined_blocks: doubleword_int list method get_function_type: btype_t method get_path_contexts: (string * string list) list @@ -1562,6 +1565,8 @@ class type function_data_int = method has_function_annotation: bool method has_regvar_type_annotation: doubleword_int -> bool method has_regvar_type_cast: doubleword_int -> bool + method has_stackvar_type_annotation: int -> bool + method has_stackvar_type_cast: int -> bool method has_class_info: bool method has_callsites: bool method has_path_contexts: bool @@ -4629,6 +4634,13 @@ class type function_environment_int = -> numerical_t -> variable_t + (** [mk_basevar_memory_variable offset] returns a memory variable with + [basevar] as base variable and offset [offset]. + + If [basevar] is not a valid base variable an error is returned.*) + method mk_basevar_memory_variable: + ?size:int -> variable_t -> memory_offset_t -> variable_t traceresult + (** [mk_offset_memory_variable memref memoff] returns a memory variable with [memref] as basis and a generic memory offset. @@ -5919,6 +5931,14 @@ class type floc_int = *) method decompose_address: xpr_t -> (memory_reference_int * memory_offset_t) + method convert_value_offsets: + ?size:int option -> variable_t -> variable_t traceresult + + method convert_variable_offsets: + ?size:int option -> variable_t -> variable_t traceresult + + method convert_xpr_offsets: ?size:int option -> xpr_t -> xpr_t traceresult + (* returns the variable associated with the address expression *) method get_lhs_from_address: xpr_t -> variable_t diff --git a/CodeHawk/CHB/bchlib/bCHMemoryReference.ml b/CodeHawk/CHB/bchlib/bCHMemoryReference.ml index 0d09a20e..12242c3b 100644 --- a/CodeHawk/CHB/bchlib/bCHMemoryReference.ml +++ b/CodeHawk/CHB/bchlib/bCHMemoryReference.ml @@ -186,6 +186,15 @@ let rec address_memory_offset Ok NoOffset | XConst (IntConst n) when is_unknown_type rbasetype -> Ok (ConstantOffset (n, NoOffset)) + | XConst (IntConst n) -> + let tgtbtype = + if is_unknown_type tgtbtype then None else Some tgtbtype in + if is_struct_type rbasetype then + structvar_memory_offset ~tgtsize ~tgtbtype rbasetype xoffset + else if is_array_type rbasetype then + arrayvar_memory_offset ~tgtsize ~tgtbtype rbasetype xoffset + else + Ok (ConstantOffset (n, NoOffset)) | _ -> let tgtbtype = if is_unknown_type tgtbtype then None else Some tgtbtype in @@ -195,9 +204,7 @@ let rec address_memory_offset arrayvar_memory_offset ~tgtsize ~tgtbtype rbasetype xoffset else Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ (btype_to_string basetype) - ^ " (" ^ (btype_to_string rbasetype) ^ ")" - ^ " is not known to be a struct or array"] + ^ "Offset " ^ (x2s xoffset) ^ " not yet supported"] and structvar_memory_offset ~(tgtsize: int option) diff --git a/CodeHawk/CHB/bchlib/bCHTypeConstraintStore.ml b/CodeHawk/CHB/bchlib/bCHTypeConstraintStore.ml index d8fad46b..57d1ed5d 100644 --- a/CodeHawk/CHB/bchlib/bCHTypeConstraintStore.ml +++ b/CodeHawk/CHB/bchlib/bCHTypeConstraintStore.ml @@ -556,60 +556,64 @@ object (self) | None -> None | Some (TArray (TComp _ as ty, _, _) as tstructarray) -> let cinfo = get_struct_type_compinfo ty in - let finfo0 = List.hd cinfo.bcfields in - let ftype = resolve_type finfo0.bftype in - (match ftype with - | Error _ -> None - | Ok ftype -> - let _ixftype = bcd#index_typ ftype in - let _ixctype = bcd#index_typ ty in - let _ = - chlog#add - "first field struct check" - (LBLOCK [ - INT offset; - STR ": "; - pretty_print_list - s#toList - (fun i -> STR (btype_to_string (bcd#get_typ i))) - "{" "; " "}"; - STR ": compinfo: "; - STR cinfo.bcname; - STR ": first field type: "; - STR (btype_to_string ftype)]) in - (* TBD: restore this check in a better way - if s#fold (fun acc i -> acc && (i = ixftype || i = ixctype)) true then - Some tstructarray + (match cinfo.bcfields with + | [] -> None + | finfo0::_ -> + let ftype = resolve_type finfo0.bftype in + (match ftype with + | Error _ -> None + | Ok ftype -> + let _ixftype = bcd#index_typ ftype in + let _ixctype = bcd#index_typ ty in + let _ = + chlog#add + "first field struct check" + (LBLOCK [ + INT offset; + STR ": "; + pretty_print_list + s#toList + (fun i -> STR (btype_to_string (bcd#get_typ i))) + "{" "; " "}"; + STR ": compinfo: "; + STR cinfo.bcname; + STR ": first field type: "; + STR (btype_to_string ftype)]) in + (* TBD: restore this check in a better way + if s#fold (fun acc i -> acc && (i = ixftype || i = ixctype)) true then + Some tstructarray else - None)*) - Some tstructarray) + None)*) + Some tstructarray)) | Some (TComp _ as ty) -> let cinfo = get_struct_type_compinfo ty in - let finfo0 = List.hd cinfo.bcfields in - let ftype = resolve_type finfo0.bftype in - (match ftype with - | Error _ -> None - | Ok ftype -> - let ixftype = bcd#index_typ ftype in - let ixctype = bcd#index_typ ty in - let _ = - chlog#add - "first field struct check (TComp case)" - (LBLOCK [ - INT offset; - STR ": "; - pretty_print_list - s#toList - (fun i -> STR (btype_to_string (bcd#get_typ i))) - "{" "; " "}"; - STR ": compinfo: "; - STR cinfo.bcname; - STR ": first field type: "; - STR (btype_to_string ftype)]) in - if s#fold (fun acc i -> acc && (i = ixftype || i = ixctype)) true then - Some ftype - else - None) + (match cinfo.bcfields with + | [] -> None + | finfo0::_ -> + let ftype = resolve_type finfo0.bftype in + (match ftype with + | Error _ -> None + | Ok ftype -> + let ixftype = bcd#index_typ ftype in + let ixctype = bcd#index_typ ty in + let _ = + chlog#add + "first field struct check (TComp case)" + (LBLOCK [ + INT offset; + STR ": "; + pretty_print_list + s#toList + (fun i -> STR (btype_to_string (bcd#get_typ i))) + "{" "; " "}"; + STR ": compinfo: "; + STR cinfo.bcname; + STR ": first field type: "; + STR (btype_to_string ftype)]) in + if s#fold (fun acc i -> acc && (i = ixftype || i = ixctype)) true then + Some ftype + else + None)) | _ -> None in let result = new IntCollections.set_t in begin diff --git a/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.ml b/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.ml index 6578bbc5..29bedea6 100644 --- a/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.ml +++ b/CodeHawk/CHB/bchlib/bCHTypeConstraintUtil.ml @@ -437,6 +437,10 @@ let rec mk_btype_constraint (tv: type_variable_t) (ty: btype_t) | TPtr (pty, _) -> let ptv = add_deref_capability tv in mk_btype_constraint ptv pty + | TArray (elty, Some (Const (CInt (i64, _, _))), _) -> + let size = Int64.to_int i64 in + let atv = add_array_access_capability size tv in + mk_btype_constraint atv elty | TArray (elty, _, _) -> let size_r = size_of_btype elty in (match size_r with diff --git a/CodeHawk/CHB/bchlib/bCHVariable.ml b/CodeHawk/CHB/bchlib/bCHVariable.ml index e4db3e9a..0a18cc42 100644 --- a/CodeHawk/CHB/bchlib/bCHVariable.ml +++ b/CodeHawk/CHB/bchlib/bCHVariable.ml @@ -97,7 +97,8 @@ object (self:'a) | _ -> (match offset with | NoOffset -> "__pderef_" ^ basename ^ "_" - | _ -> basename ^ (memory_offset_to_string offset))) + | ConstantOffset _ -> basename ^ (memory_offset_to_string offset) + | _ -> "__pderef_" ^ basename ^ (memory_offset_to_string offset))) | RegisterVariable reg -> register_to_string reg | CPUFlagVariable flag -> flag_to_string flag | AuxiliaryVariable a -> @@ -1004,7 +1005,10 @@ object (self) method has_constant_offset (v: variable_t) = (self#is_memory_variable v) - && (tfold_default is_constant_offset false (self#get_memvar_offset v)) + && (tfold_default + (fun off -> is_constant_offset off || is_field_offset off) + false + (self#get_memvar_offset v)) method is_unknown_base_memory_variable (v: variable_t) = (self#is_memory_variable v) diff --git a/CodeHawk/CHB/bchlib/bCHVersion.ml b/CodeHawk/CHB/bchlib/bCHVersion.ml index d015baaa..0d616543 100644 --- a/CodeHawk/CHB/bchlib/bCHVersion.ml +++ b/CodeHawk/CHB/bchlib/bCHVersion.ml @@ -6,7 +6,7 @@ Copyright (c) 2005-2019 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -95,8 +95,8 @@ end let version = new version_info_t - ~version:"0.6.0_20250210" - ~date:"2025-02-10" + ~version:"0.6.0_20250308" + ~date:"2025-03-08" ~licensee: None ~maxfilesize: None () diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml b/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml index 0972b737..c7720b36 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMConditionalExpr.ml @@ -313,6 +313,11 @@ let cc_expr | (Move (true, ACCAlways, _, y, _, _), ACCNotEqual) -> (XOp (XNe, [v y; zero_constant_expr]), [y]) + (* ------------------------------------------------- Reverse Subtract --- *) + + | (ReverseSubtract (true, ACCAlways, _, x, y, _), ACCNonNegative) -> + (XOp (XGe, [XOp (XMinus, [v y; v x]); zero_constant_expr]), [x; y]) + (* --------------------------------------------------------- Subtract --- *) | (Subtract (true, ACCAlways, _, x, y, _, _), ACCEqual) -> diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMOperand.ml b/CodeHawk/CHB/bchlibarm32/bCHARMOperand.ml index 9b45cf1d..411ebd54 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMOperand.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMOperand.ml @@ -458,18 +458,21 @@ object (self:'a) TR.tprop (floc#env#mk_global_variable dw#to_numerical) (__FILE__ ^ ":" ^ (string_of_int __LINE__)) - | ARMOffsetAddress (r, align, offset, isadd, _iswback, _isindex, size) -> + | ARMOffsetAddress (r, align, offset, isadd, _iswback, isindex, size) -> (match offset with | ARMImmOffset _ -> let rvar = env#mk_arm_register_variable r in let numoffset_r = - match (offset, isadd) with - | (ARMImmOffset i, true) -> Ok (mkNumerical i) - | (ARMImmOffset i, false) -> Ok (mkNumerical i)#neg - | _ -> - Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " - ^ "Immediate offset not yet implemented for offset " - ^ (arm_memory_offset_to_string offset)] in + if isindex then + match (offset, isadd) with + | (ARMImmOffset i, true) -> Ok (mkNumerical i) + | (ARMImmOffset i, false) -> Ok (mkNumerical i)#neg + | _ -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Immediate offset not yet implemented for offset " + ^ (arm_memory_offset_to_string offset)] + else + Ok numerical_zero in TR.tbind ~msg:(__FILE__ ^ ":" ^ (string_of_int __LINE__)) (fun memoff -> @@ -484,6 +487,9 @@ object (self:'a) let ivax = floc#inv#rewrite_expr (XVar ivar) in let xoffset = simplify_xpr (XOp (XPlus, [rx; ivax])) in (match (xoffset, i) with + | (XConst (IntConst n), 0) when n#equal CHNumerical.numerical_zero -> + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Illegal address (zero) for ARMOffsetAddress"] | (XConst (IntConst n), 0) -> floc#env#mk_global_variable ~size n | _ -> @@ -1131,22 +1137,22 @@ let arm_sp_deref ?(with_offset=0) (mode:arm_operand_mode_t) = if with_offset >= 0 then let offset = ARMImmOffset with_offset in mk_arm_offset_address_op - ARSP offset ~isadd:true ~iswback:false ~isindex:false mode + ARSP offset ~isadd:true ~iswback:false ~isindex:true mode else let offset = ARMImmOffset (-with_offset) in mk_arm_offset_address_op - ARSP offset ~isadd:false ~iswback:false ~isindex:false mode + ARSP offset ~isadd:false ~iswback:false ~isindex:true mode let arm_reg_deref ?(with_offset=0) (reg: arm_reg_t) (mode:arm_operand_mode_t) = if with_offset >= 0 then let offset = ARMImmOffset with_offset in mk_arm_offset_address_op - reg offset ~isadd:true ~iswback:false ~isindex:false mode + reg offset ~isadd:true ~iswback:false ~isindex:true mode else let offset = ARMImmOffset (-with_offset) in mk_arm_offset_address_op - reg offset ~isadd:false ~iswback:false ~isindex:false mode + reg offset ~isadd:false ~iswback:false ~isindex:true mode let equal_register_lists (op1: arm_operand_int) (op2: arm_operand_int): bool = diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMOperand.mli b/CodeHawk/CHB/bchlibarm32/bCHARMOperand.mli index 625783ce..2b963cb6 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMOperand.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMOperand.mli @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs, LLC + Copyright (c) 2021-2025 Aarno Labs, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml index b815f2ec..0b08abae 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml @@ -163,7 +163,7 @@ let disassemble_arm_section INT sectionsize]) in try begin - while ch#pos + 2 < sectionsize do (* <= causes problems at section end *) + while ch#pos < sectionsize do (* <= causes problems at section end *) let prevPos = ch#pos in let iaddr = sectionbase#add_int ch#pos in let _ = diff --git a/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml b/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml index 99a3887d..115b3a1d 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml @@ -95,6 +95,7 @@ object (self) val sp_offset_table = mk_index_table "sp-offset-table" val instrx_table = mk_index_table "instrx-table" val xd = vard#xd + val fndata = BCHFunctionData.functions_data#get_function faddr val mutable tables = [] @@ -127,6 +128,20 @@ object (self) let varinv = floc#varinv in let e16_c = int_constant_expr e16 in let e32_c = int_constant_expr e32 in + + let get_regvar_type_annotation (): btype_t option = + if fndata#has_regvar_type_annotation floc#l#i then + TR.tfold + ~ok:(fun t -> Some t) + ~error:(fun e -> + begin + log_error_result __FILE__ __LINE__ e; + None + end) + (fndata#get_regvar_type_annotation floc#l#i) + else + None in + let log_dc_error_result (file: string) (line: int) (e: string list) = if BCHSystemSettings.system_settings#collect_data then log_error_result ~msg:(p2s floc#l#toPretty) file line e @@ -621,6 +636,7 @@ object (self) (floc#get_var_at_address ~btype:(ptr_deref ptype) xx) else xx in + let xx = TR.tvalue (floc#convert_xpr_offsets xx) ~default:xx in let rdef = get_rdef_r xvar_r in (xx :: xprs, xvar_r :: xvars, rdef :: rdefs, index + 1)) ([], [], [], 1) callargs in @@ -707,6 +723,10 @@ object (self) [get_rdef_r xrn_r; get_rdef_r xrm_r] @ (get_all_rdefs_r rresult_r) in let uses = get_def_use_r vrd_r in let useshigh = get_def_use_high_r vrd_r in + let vrtype = + match get_regvar_type_annotation () with + | Some t -> t + | _ -> t_unknown in (* let rresult_r = TR.tmap @@ -720,6 +740,7 @@ object (self) let (tagstring, args) = mk_instrx_data_r ~vars_r:[vrd_r] + ~types:[vrtype] ~xprs_r:[xrn_r; xrm_r; result_r; rresult_r; xxrn_r; xxrm_r] ~rdefs:rdefs ~uses:[uses] @@ -1042,6 +1063,8 @@ object (self) let csetter = floc#f#get_associated_cc_setter floc#cia in let tcond = rewrite_test_expr csetter txpr in let fcond = rewrite_test_expr csetter fxpr in + let tcond_r = floc#convert_xpr_offsets ~size:(Some 4) tcond in + let fcond_r = floc#convert_xpr_offsets ~size:(Some 4) fcond in let bytestr = try let instr = @@ -1059,7 +1082,7 @@ object (self) let rdefs = (get_all_rdefs txpr) @ (get_all_rdefs tcond) in let (tagstring, args) = mk_instrx_data_r - ~xprs_r:[Ok txpr; Ok fxpr; Ok tcond; Ok fcond; xtgt_r] + ~xprs_r:[Ok txpr; Ok fxpr; tcond_r; fcond_r; xtgt_r] ~rdefs () in let (tags, args) = (tagstring :: ["TF"; csetter; bytestr], args) in @@ -1156,6 +1179,8 @@ object (self) let xresult_r = TR.tmap2 (fun xrn xrm -> XOp (XMinus, [xrn; xrm])) xrn_r xrm_r in let result_r = TR.tmap rewrite_expr xresult_r in + let result_r = + TR.tbind (floc#convert_xpr_offsets ~size:(Some 4)) result_r in let rdefs = [get_rdef_r xrn_r; get_rdef_r xrm_r] @ (get_all_rdefs_r result_r) in let (tagstring, args) = @@ -1493,6 +1518,8 @@ object (self) let useshigh = [get_def_use_high_r vrt_r] in let xxaddr_r = TR.tmap rewrite_expr xaddr_r in let xrmem_r = TR.tmap rewrite_expr xmem_r in + let xrmem_r = + TR.tbind (floc#convert_xpr_offsets ~size:(Some 4)) xrmem_r in let _ = TR.tfold_default (fun xrmem -> ignore (get_string_reference floc xrmem)) () xrmem_r in @@ -1697,6 +1724,7 @@ object (self) let vmem_r = mem#to_variable floc in let xmem_r = mem#to_expr floc in let xrmem_r = TR.tmap rewrite_expr xmem_r in + let xrmem_r = TR.tbind floc#convert_xpr_offsets xrmem_r in let xxaddr_r = TR.tmap rewrite_expr xaddr_r in let rdefs = [get_rdef_r xrn_r; get_rdef_r xrm_r; get_rdef_memvar_r vmem_r] @@ -2629,11 +2657,14 @@ object (self) | StoreRegister (c, rt, rn, rm, mem, _) -> let vmem_r = mem#to_variable floc in + let vmem_r = + TR.tbind (floc#convert_variable_offsets ~size:(Some 4)) vmem_r in let xaddr_r = mem#to_address floc in let xrt_r = rt#to_expr floc in let xrn_r = rn#to_expr floc in let xrm_r = rm#to_expr floc in let xxrt_r = TR.tmap rewrite_expr xrt_r in + let xxrtc_r = TR.tbind floc#convert_xpr_offsets xxrt_r in let xxaddr_r = TR.tmap rewrite_expr xaddr_r in let lhsvar_r = TR.tbind floc#get_var_at_address xxaddr_r in let rdefs = @@ -2643,7 +2674,7 @@ object (self) get_rdef_r xxrt_r] in let uses = [get_def_use_r vmem_r] in let useshigh = [get_def_use_high_r vmem_r] in - let xprs_r = [xrn_r; xrm_r; xrt_r; xxrt_r; xaddr_r] in + let xprs_r = [xrn_r; xrm_r; xrt_r; xxrt_r; xxrtc_r; xaddr_r] in let vars_r = [vmem_r; lhsvar_r] in let _ = floc#memrecorder#record_store_r @@ -2807,11 +2838,13 @@ object (self) | StoreRegisterHalfword (c, rt, rn, rm, mem, _) -> let vmem_r = mem#to_variable floc in + let vmem_r = TR.tbind floc#convert_variable_offsets vmem_r in let xaddr_r = mem#to_address floc in let xrt_r = rt#to_expr floc in let xrn_r = rn#to_expr floc in let xrm_r = rm#to_expr floc in let xxrt_r = TR.tmap rewrite_expr xrt_r in + let xxrt_r = TR.tbind floc#convert_xpr_offsets xxrt_r in let xxaddr_r = TR.tmap rewrite_expr xaddr_r in let rdefs = [get_rdef_r xrn_r; diff --git a/CodeHawk/CHB/bchlibarm32/bCHFnARMTypeConstraints.ml b/CodeHawk/CHB/bchlibarm32/bCHFnARMTypeConstraints.ml index f1fe2528..8d859b8f 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHFnARMTypeConstraints.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHFnARMTypeConstraints.ml @@ -110,6 +110,19 @@ object (self) else None in + let get_stackvar_type_annotation (offset: int): btype_t option = + if fndata#has_stackvar_type_annotation offset then + TR.tfold + ~ok:(fun t -> Some t) + ~error:(fun e -> + begin + log_error_result __FILE__ __LINE__ e; + None + end) + (fndata#get_stackvar_type_annotation offset) + else + None in + let rdef_pairs_to_pretty (pairs: (symbol_t * symbol_t) list) = pretty_print_list pairs @@ -242,6 +255,20 @@ object (self) let xrn_r = rn#to_expr floc in begin + (match get_regvar_type_annotation () with + | Some t -> + let rdreg = rd#to_register in + let lhstypevar = mk_reglhs_typevar rdreg faddr iaddr in + let opttc = mk_btype_constraint lhstypevar t in + (match opttc with + | Some tc -> + begin + log_type_constraint "ADD-rvintro" tc; + store#add_constraint tc + end + | _ -> ()) + | _ -> ()); + (if rm#is_immediate && (rm#to_numerical#toInt < 256) then let rdreg = rd#to_register in let lhstypevar = mk_reglhs_typevar rdreg faddr iaddr in @@ -483,17 +510,28 @@ object (self) | Some offset -> let lhstypevar = mk_localstack_lhs_typevar offset faddr iaddr in - if is_pointer ptype then - let eltype = ptr_deref ptype in - let atype = t_array eltype 1 in - let opttc = mk_btype_constraint lhstypevar atype in - match opttc with - | Some tc -> - begin - log_type_constraint "BL-reg-arg" tc; - store#add_constraint tc - end - | _ -> ()) + match get_stackvar_type_annotation offset with + | Some t -> + let opttc = mk_btype_constraint lhstypevar t in + (match opttc with + | Some tc -> + begin + log_type_constraint "BL-stack-vintro" tc; + store#add_constraint tc + end + | _ -> ()) + | _ -> + if is_pointer ptype then + let eltype = ptr_deref ptype in + let atype = t_array eltype 1 in + let opttc = mk_btype_constraint lhstypevar atype in + match opttc with + | Some tc -> + begin + log_type_constraint "BL-reg-arg" tc; + store#add_constraint tc + end + | _ -> ()) end ) callargs @@ -787,11 +825,29 @@ object (self) end - | LoadRegisterHalfword (_, rt, rn, rm, _, _) when rm#is_immediate -> + | LoadRegisterHalfword (_, rt, rn, rm, memop, _) when rm#is_immediate -> let rtreg = rt#to_register in let rttypevar = mk_reglhs_typevar rtreg faddr iaddr in begin + (* loaded type may be known *) + (let xmem_r = memop#to_expr floc in + let xrmem_r = + TR.tmap (fun x -> simplify_xpr (floc#inv#rewrite_expr x)) xmem_r in + let xtype_r = TR.tmap floc#get_xpr_type xrmem_r in + let xtype_opt = TR.tvalue xtype_r ~default:None in + match xtype_opt with + | Some t -> + let opttc = mk_btype_constraint rttypevar t in + (match opttc with + | Some tc -> + begin + log_type_constraint "LDRH-var" tc; + store#add_constraint tc + end + | _ -> ()) + | _ -> ()); + (* LDRH rt, [rn, rm] : X_rndef.load <: X_rt *) (let xrdef = get_variable_rdefs_r (rn#to_variable floc) in let rnreg = rn#to_register in diff --git a/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml b/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml index 965e6322..cf87b0f6 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -1751,7 +1751,7 @@ let translate_arm_instruction ~use:usevars ~usehigh:usehigh ctxtiaddr in - let cmds = defcmds @ cmds @ updatecmds in + let cmds = defcmds @ cmds in (match c with | ACCAlways -> default cmds | _ -> make_conditional_commands c cmds) diff --git a/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.mli b/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.mli index f5e94708..990dd518 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.mli @@ -4,7 +4,7 @@ ------------------------------------------------------------------------------ The MIT License (MIT) - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.ml b/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.ml index f768a2db..3e01d2a6 100644 --- a/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.ml +++ b/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.ml @@ -105,12 +105,17 @@ let ud_get_offset (sectionname: string) = (LBLOCK [STR "No offset found for "; STR sectionname])) -let assumption_violation (p:pretty_t) = - let msg = LBLOCK [STR "Section header creation assumption violation: "; p] in - begin - ch_error_log#add "section header creation" msg; - raise (BCH_failure msg) - end +let assumption_violation + (line: int) (s: elf_dynamic_segment_int) (p:pretty_t) = + let msg = + LBLOCK [STR "bCHELFSectionHeaderCreator:"; INT line; STR ": "; + STR "Assumption violation: "; p] in + begin + ch_error_log#add + "section header creation" + (LBLOCK [msg; NL; STR "Dynamic table: "; NL; s#toPretty;NL]); + raise (BCH_failure msg) + end class section_header_creator_t @@ -162,23 +167,31 @@ object (self) method private get_offset_2 (vaddr:doubleword_int): doubleword_int = match loadsegments with - | [] -> raise (BCH_failure (LBLOCK [ STR "No load segments found" ])) - | [ (_,_,_) ] -> - assumption_violation (STR "Only one load segment found" ) + | [] -> + assumption_violation + __LINE__ dynamicsegment (LBLOCK [ STR "No load segments found" ]) + | [ (_, _, _) ] -> + assumption_violation + __LINE__ + dynamicsegment + (STR "Only one load segment found") | (_,_,_)::(_,ph,_)::_ -> let base2 = ph#get_vaddr in let offset2 = ph#get_offset in - let basediff = - fail_tvalue - (trerror_record - (LBLOCK [ - STR "BCHELFSectionHeaderCreator#get_offset_2: "; - STR "vaddr: "; - vaddr#toPretty; - STR "; base2: "; - base2#toPretty])) - (vaddr#subtract base2) in - basediff#add offset2 + TR.tfold + ~ok:(fun basediff -> basediff#add offset2) + ~error:(fun e -> + assumption_violation + __LINE__ + dynamicsegment + (LBLOCK [ + STR "Base2 address: "; + base2#toPretty; + STR " cannot be subtracted from vaddr: "; + vaddr#toPretty; + STR ": "; + STR (String.concat "; " e)])) + (vaddr#subtract base2) method private has_interp_program_header = List.exists @@ -196,8 +209,10 @@ object (self) | _ -> false) phdrs in ph with | Not_found -> - raise - (BCH_failure (LBLOCK [ STR "PT_INTERP program header not found" ])) + assumption_violation + __LINE__ + dynamicsegment + (LBLOCK [STR "PT_INTERP program header not found"]) method private get_dynamic_program_header = try @@ -208,7 +223,10 @@ object (self) | _ -> false) phdrs in ph with | Not_found -> - assumption_violation (STR "PT_DYNAMIC program header not found") + assumption_violation + __LINE__ + dynamicsegment + (STR "PT_DYNAMIC program header not found") method private has_reginfo_program_header = List.exists (fun (_,ph,_) -> @@ -225,8 +243,10 @@ object (self) | _ -> false) phdrs in ph with | Not_found -> - raise - (BCH_failure (LBLOCK [ STR "PT_REGINFO program header not found" ])) + assumption_violation + __LINE__ + dynamicsegment + (LBLOCK [STR "PT_REGINFO program header not found"]) method get_section_headers = List.mapi @@ -383,7 +403,7 @@ object (self) let offset = self#get_offset_1 vaddr in let trsize = symtabaddr#subtract vaddr in if Result.is_error trsize then - assumption_violation (STR "DT_SYMTAB < DT_HASH") + assumption_violation __LINE__ dynamicsegment (STR "DT_SYMTAB < DT_HASH") else let size = TR.tget_ok trsize in let entsize = s2d "0x4" in @@ -395,6 +415,8 @@ object (self) end else assumption_violation + __LINE__ + dynamicsegment (STR "DT_HASH or DT_SYMTAB not present, or DT_HASH is zero") (* inputs: from dynamic table, program header, type PT_Load (1) @@ -422,14 +444,10 @@ object (self) else if ud_has_size sectionname then ud_get_size sectionname else - begin - chlog#add "dynamic table" (dynamicsegment#toPretty); - assumption_violation - (LBLOCK [ - STR "Unable to determine size of dynamic symbol table"; - NL; - dynamicsegment#toPretty]) - end in + assumption_violation + __LINE__ + dynamicsegment + (LBLOCK [STR "Unable to determine size of dynamic symbol table"]) in let sh = mk_elf_section_header () in let stype = s2d "0xb" in let flags = s2d "0x2" in @@ -491,7 +509,15 @@ object (self) let addr = vaddr in let offset = self#get_offset_1 vaddr in let size = - TR.tget_ok (numerical_to_doubleword dynamicsegment#get_string_table_size) in + TR.tfold + ~ok:Fun.id + ~error:(fun e -> + assumption_violation + __LINE__ + dynamicsegment + (LBLOCK [STR "Illegal size of string table: "; + STR (String.concat "; " e)])) + (numerical_to_doubleword dynamicsegment#get_string_table_size) in let addralign = s2d "0x1" in begin sh#set_fields @@ -693,7 +719,7 @@ object (self) section_headers <- sh :: section_headers end else - assumption_violation (STR "DT_INIT not present") + assumption_violation __LINE__ dynamicsegment (STR "DT_INIT not present") (* inputs: from elf file header, program header, type PT_Load (1) * - addr: fh#get_program_entry_point ? @@ -726,18 +752,22 @@ object (self) let finiaddr = dynamicsegment#get_fini_address in let finidiff = finiaddr#subtract vaddr in if Result.is_error finidiff then - assumption_violation (STR "DT_FINI < program entry point") + assumption_violation + __LINE__ dynamicsegment (STR "DT_FINI < program entry point") else TR.tget_ok finidiff else if dynamicsegment#has_init_address then let initaddress = dynamicsegment#get_init_address in let initdiff = initaddress#subtract vaddr in if Result.is_error initdiff then - assumption_violation (STR "DT_INIT < program entry point") + assumption_violation + __LINE__ dynamicsegment (STR "DT_INIT < program entry point") else TR.tget_ok initdiff else assumption_violation + __LINE__ + dynamicsegment (LBLOCK [ STR "DT_INIT and DT_FINI not present; "; STR "please provide size of .text section in fixup data"]) in @@ -806,17 +836,21 @@ object (self) let vaddr = finiaddr#add finisize in let phenddiff = phend#subtract finiaddr in if Result.is_error phenddiff then - assumption_violation (STR "PT_Load(end) < finiaddr") + assumption_violation + __LINE__ dynamicsegment (STR "PT_Load(end) < finiaddr") else let trsize = (TR.tget_ok phenddiff)#subtract finisize in if Result.is_error trsize then - assumption_violation (STR "PT_Load(end) < finiaddr") + assumption_violation + __LINE__ dynamicsegment (STR "PT_Load(end) < finiaddr") else let size = TR.tget_ok trsize in (vaddr, size) else begin assumption_violation + __LINE__ + dynamicsegment (LBLOCK [ STR "No addr/size information for .rodata; "; STR "please supply in fixup data"]) @@ -976,7 +1010,8 @@ object (self) let offset = self#get_offset_2 vaddr in let trsize = rldmapaddr#subtract vaddr in if Result.is_error trsize then - assumption_violation (STR "DT_MIPS_RLD_MAP < data header address") + assumption_violation + __LINE__ dynamicsegment (STR "DT_MIPS_RLD_MAP < data header address") else let size = TR.tget_ok trsize in let addralign = s2d "0x4" in @@ -1050,7 +1085,7 @@ object (self) let offset = self#get_offset_2 vaddr in let trsize = (ph#get_vaddr#add ph#get_file_size)#subtract vaddr in if Result.is_error trsize then - assumption_violation (STR "filesize < vaddr" ) + assumption_violation __LINE__ dynamicsegment (STR "filesize < vaddr") else let size = TR.tget_ok trsize in let addralign = s2d "0x4" in diff --git a/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.mli b/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.mli index ce717b2d..a814f519 100644 --- a/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.mli +++ b/CodeHawk/CHB/bchlibelf/bCHELFSectionHeaderCreator.mli @@ -6,7 +6,7 @@ Copyright (c) 2005-2020 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/CodeHawk/CHB/bchlibmips32/bCHDisassembleMIPS.ml b/CodeHawk/CHB/bchlibmips32/bCHDisassembleMIPS.ml index 5fff5eed..fac28489 100644 --- a/CodeHawk/CHB/bchlibmips32/bCHDisassembleMIPS.ml +++ b/CodeHawk/CHB/bchlibmips32/bCHDisassembleMIPS.ml @@ -243,6 +243,14 @@ let disassemble_mips_sections () = STR "disassemble section:"; NL; h#toPretty; NL; NL] in let displacement = TR.tget_ok (h#get_addr#subtract_to_int startOfCode) in + let _ = + chlog#add + "disassembly" + (LBLOCK [ + STR "disassemble section: "; + h#toPretty; + STR " with displacement: "; + INT displacement]) in let _ = pverbose [ STR "disassemble section at displacement: "; @@ -582,7 +590,10 @@ let get_successors (faddr:doubleword_int) (iaddr:doubleword_int) = (get_mips_assembly_instruction iaddr) -let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = +let trace_block + (faddr:doubleword_int) + (baddr:doubleword_int): + (mips_assembly_block_int list * mips_assembly_block_int) TR.traceresult = let set_block_entry (va: doubleword_int) = TR.titer @@ -590,7 +601,8 @@ let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = ~error:(fun e -> log_error_result __FILE__ __LINE__ e) (get_mips_assembly_instruction va) in - let get_instr iaddr = get_mips_assembly_instruction iaddr in + let get_instr (iaddr: doubleword_int): mips_assembly_instruction_result = + get_mips_assembly_instruction iaddr in let get_next_instr_addr a = a#add_int 4 in @@ -598,7 +610,12 @@ let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = List.map (fun va -> (make_location {loc_faddr = faddr ; loc_iaddr = va})#ci) l in - let rec find_last_instr (va: doubleword_int) (prev: doubleword_int) = + let rec find_last_instr + (va: doubleword_int) + (prev: doubleword_int): + (ctxt_iaddress_t list option + * doubleword_int + * mips_assembly_block_int list) TR.traceresult = let instr = fail_tvalue (trerror_record @@ -610,24 +627,24 @@ let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = if va#equal wordzero || not (!mips_assembly_instructions#is_code_address va) then - (Some [],prev,[]) + Ok (Some [], prev, []) else if is_return_instruction instr#get_opcode then - (Some [],va#add_int 4,[]) + Ok (Some [], va#add_int 4, []) else if instr#is_block_entry then - (None,prev,[]) + Ok (None, prev, []) else if is_nr_call_instruction instr then - (Some [],va#add_int 4,[]) + Ok (Some [], va#add_int 4, []) else if is_conditional_jump_instruction instr#get_opcode || is_fp_conditional_jump_instruction instr#get_opcode then let nextblock = va#add_int 8 in let tgtblock = get_direct_jump_target_address instr#get_opcode in - (Some (mk_ci_succ [ nextblock ; tgtblock ]),va#add_int 4,[]) + Ok (Some (mk_ci_succ [nextblock; tgtblock]), va#add_int 4, []) else if is_direct_jump_instruction instr#get_opcode then let tgtblock = get_direct_jump_target_address instr#get_opcode in if functions_data#is_function_entry_point tgtblock then - (Some [], va#add_int 4, []) (* function chaining *) + Ok (Some [], va#add_int 4, []) (* function chaining *) else - (Some (mk_ci_succ [tgtblock]), va#add_int 4, []) + Ok (Some (mk_ci_succ [tgtblock]), va#add_int 4, []) else if is_indirect_jump_instruction instr#get_opcode then if system_info#has_jump_table_target faddr va then let loc = make_location { loc_faddr = faddr ; loc_iaddr = va } in @@ -638,16 +655,16 @@ let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = let reg = MIPSRegister (get_indirect_jump_instruction_register instr#get_opcode) in let _ = finfo#set_jumptable_target ctxtiaddr jt#get_start_address jt reg in - (Some (mk_ci_succ targets), va#add_int 4, []) + Ok (Some (mk_ci_succ targets), va#add_int 4, []) else if system_info#has_indirect_jump_targets faddr va then let targets = system_info#get_indirect_jump_targets faddr va in - (Some (mk_ci_succ targets), va#add_int 4, []) + Ok (Some (mk_ci_succ targets), va#add_int 4, []) else - (Some [], va#add_int 4, []) + Ok (Some [], va#add_int 4, []) else if instr#is_delay_slot then - (None, va, []) + Ok (None, va, []) else if is_halt_instruction instr#get_opcode then - (Some [], va, []) + Ok (Some [], va, []) else if instr#is_inlined_call then let a = match instr#get_opcode with | BranchLTZeroLink (_,tgt) @@ -677,11 +694,15 @@ let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = [(make_location {loc_faddr = faddr; loc_iaddr = returnsite})#ci] | l -> List.map (fun s -> add_ctxt_to_ctxt_string faddr s ctxt) l in make_ctxt_mips_assembly_block ctxt b succ) fn#get_blocks in - (Some [ callsucc ],va,inlinedblocks) + Ok (Some [callsucc], va, inlinedblocks) else find_last_instr (nextva ()) va in - let (succ, lastaddr, inlinedblocks) = + let result_r: + (ctxt_iaddress_t list option + * doubleword_int + * mips_assembly_block_int list) TR.traceresult = + (* let (succ, lastaddr, inlinedblocks) = *) let instr = fail_tvalue (trerror_record @@ -689,32 +710,37 @@ let trace_block (faddr:doubleword_int) (baddr:doubleword_int) = (get_instr baddr) in let opcode = instr#get_opcode in if is_return_instruction opcode then - (Some [],baddr#add_int 4,[]) + Ok (Some [], baddr#add_int 4, []) else if system_info#is_nonreturning_call faddr baddr then - (Some [], baddr#add_int 4, []) + Ok (Some [], baddr#add_int 4, []) else if is_indirect_jump_instruction opcode then if system_info#has_jump_table_target faddr baddr then let (jt,_,lb,ub) = system_info#get_jump_table_target faddr baddr in let targets = jt#get_targets jt#get_start_address lb ub in - (Some (mk_ci_succ targets), baddr#add_int 4, []) + Ok (Some (mk_ci_succ targets), baddr#add_int 4, []) else - (Some [], baddr#add_int 4, []) + Ok (Some [], baddr#add_int 4, []) else if is_conditional_jump_instruction opcode then let nextblock = baddr#add_int 8 in let tgtblock = get_direct_jump_target_address opcode in - (Some (mk_ci_succ [ nextblock ; tgtblock ]),baddr#add_int 4,[]) + Ok (Some (mk_ci_succ [nextblock; tgtblock]), baddr#add_int 4, []) else if is_direct_jump_instruction opcode then let tgtblock = get_direct_jump_target_address opcode in - (Some (mk_ci_succ [ tgtblock ]),baddr#add_int 4,[]) + Ok (Some (mk_ci_succ [tgtblock ]), baddr#add_int 4, []) else find_last_instr (get_next_instr_addr baddr) baddr in - let successors = - match succ with Some s -> s | _ -> get_successors faddr lastaddr in - (inlinedblocks, make_mips_assembly_block faddr baddr lastaddr successors) + TR.tmap + (fun (succ, lastaddr, inlinedblocks) -> + let successors = + match succ with + | Some s -> s + | _ -> get_successors faddr lastaddr in + (inlinedblocks, make_mips_assembly_block faddr baddr lastaddr successors)) + result_r -let trace_function (faddr:doubleword_int) = +let trace_function (faddr:doubleword_int): mips_assembly_function_int = let workSet = new DoublewordCollections.set_t in let doneSet = new DoublewordCollections.set_t in let set_block_entry (baddr: doubleword_int) = @@ -726,17 +752,26 @@ let trace_function (faddr:doubleword_int) = let add_to_workset l = List.iter (fun a -> if doneSet#has a then () else workSet#add a) l in let blocks = ref [] in - let rec add_block (entry:doubleword_int) = - let (inlinedblocks,block) = trace_block faddr entry in - let blocksuccessors = block#get_successors in - begin - set_block_entry entry ; - workSet#remove entry ; - doneSet#add entry ; - blocks := (block :: inlinedblocks) @ !blocks ; - add_to_workset (List.map get_iaddr blocksuccessors) ; - match workSet#choose with Some a -> add_block a | _ -> () - end in + let rec add_block (entry: doubleword_int) = + let result_r: (mips_assembly_block_int list + * mips_assembly_block_int) TR.traceresult = + trace_block faddr entry in + TR.titer + ~ok:(fun (inlinedblocks, block) -> + let blocksuccessors = block#get_successors in + begin + set_block_entry entry; + workSet#remove entry; + doneSet#add entry; + blocks := (block :: inlinedblocks) @ !blocks; + add_to_workset (List.map get_iaddr blocksuccessors); + match workSet#choose with Some a -> add_block a | _ -> () + end) + ~error:(fun e -> + log_error_result + ~tag:"trace_function:add block" __FILE__ __LINE__ + (("faddr: " ^ faddr#to_hex_string) :: e)) + result_r in let _ = add_block faddr in let blocklist = List.sort (fun b1 b2 -> @@ -750,9 +785,7 @@ let trace_function (faddr:doubleword_int) = let construct_mips_assembly_function (_count: int) (faddr: doubleword_int) = try - let _ = pverbose [STR " trace function "; faddr#toPretty; NL] in let fn = trace_function faddr in - let _ = pverbose [STR " add function "; faddr#toPretty; NL] in mips_assembly_functions#add_function fn with | BCH_failure p -> @@ -875,7 +908,6 @@ let record_call_targets () = (LBLOCK [STR "function "; faddr#toPretty; STR ": "; p])) - let decorate_functions () = begin record_call_targets () diff --git a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunction.ml b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunction.ml index 554fe18b..14f8dd49 100644 --- a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunction.ml +++ b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunction.ml @@ -6,7 +6,7 @@ Copyright (c) 2005-2020 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -78,6 +78,7 @@ object (self) raise (BCH_failure (LBLOCK [ + STR __FILE__; STR ":"; INT __LINE__; STR ": "; STR "No assembly block found at "; STR bctxt; STR " in function "; @@ -92,7 +93,12 @@ object (self) with | Not_found -> let msg = - LBLOCK [STR "assembly_function#get_instruction: "; iaddr#toPretty] in + LBLOCK [ + STR __FILE__; STR ":"; INT __LINE__; STR ": "; + STR "assembly_function#get_instruction: "; + iaddr#toPretty; + STR " in function "; + faddr#toPretty] in begin ch_error_log#add "invocation error" msg; raise (BCH_failure msg) diff --git a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunctions.ml b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunctions.ml index cb806863..edaabeee 100644 --- a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunctions.ml +++ b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyFunctions.ml @@ -6,7 +6,7 @@ Copyright (c) 2005-2020 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -105,32 +105,52 @@ let create_ordering Not_found -> begin ch_error_log#add "pivot node not found" - (LBLOCK [ pivotNode#toPretty ]) ; + (LBLOCK [pivotNode#toPretty]); raise Not_found end in let newCalls = List.filter (fun (e1,e2) -> (not (e1#equal (fst edge))) || (not (e2#equal (snd edge)))) cs in - let _ = chlog#add "break cycle" - (LBLOCK [ STR "remove " ; STR "(" ; - (fst edge)#toPretty ; STR "," ; (snd edge)#toPretty ; - STR ") with " ; INT incoming ; STR " edges (size of cycle: " ; - INT (List.length fns) ; STR ")" ]) in + let _ = + chlog#add "break cycle" + (LBLOCK [ + STR "remove "; + STR "(" ; + (fst edge)#toPretty; + STR ","; + (snd edge)#toPretty; + STR ") with "; + INT incoming; + STR " edges (size of cycle: "; + INT (List.length fns); + STR ")"]) in aux nonleaves newCalls result ((-1)::stats) true | _ -> let newCalls = - List.filter (fun (_,callee) -> + List.filter (fun (_, callee) -> List.for_all (fun f -> not (callee#equal f)) leaves) cs in - aux nonleaves newCalls (result@leaves) ((List.length leaves)::stats) cycle + aux + nonleaves + newCalls + (result @ leaves) + ((List.length leaves) :: stats) + cycle with Not_found -> begin ch_error_log#add "error in find cycle" - (LBLOCK [ STR "calls: " ; pretty_print_list cs - (fun (a1,a2) -> - LBLOCK [ STR "(" ; a1#toPretty ; STR "," ; a2#toPretty ; STR ")" ]) - " [" ", " "]" ]) ; - (result,stats,cycle) + (LBLOCK [ + STR "calls: "; + pretty_print_list cs + (fun (a1, a2) -> + LBLOCK [ + STR "("; + a1#toPretty; + STR ","; + a2#toPretty; + STR ")"]) + " [" ", " "]" ]); + (result, stats, cycle) end in aux functions calls [] [] false @@ -157,7 +177,7 @@ object (self) functions f#get_address#index (inline_blocks_mips_assembly_function - fdata#get_inlined_blocks f) ; + fdata#get_inlined_blocks f); chlog#add "mips assembly function:inline blocks" f#get_address#toPretty end) @@ -169,10 +189,11 @@ object (self) H.find functions index with | Not_found -> - let msg = [ STR "Unable to find function with index: " ; - dw_index_to_pretty index ] in + let msg = [ + STR "Unable to find function with index: "; + dw_index_to_pretty index] in begin - pr_debug (msg @ [ NL ]) ; + pr_debug (msg @ [NL]); raise (BCH_failure (LBLOCK msg)) end @@ -181,10 +202,11 @@ object (self) self#get_function faddr#index with | BCH_failure _ -> - let msg = [ STR "Unable to find function with address: " ; - faddr#toPretty ] in + let msg = [ + STR "Unable to find function with address: "; + faddr#toPretty] in begin - pr_debug (msg @ [ NL ]) ; + pr_debug (msg @ [NL]); raise (BCH_failure (LBLOCK msg)) end @@ -242,12 +264,21 @@ object (self) H.replace table a#index ((H.find table a#index) + 1) else H.add table a#index 1 in - let _ = List.iter (fun f -> f#iteri (fun faddr a _ -> add faddr a)) self#get_functions in + let _ = + List.iter + (fun f -> f#iteri (fun faddr a _ -> add faddr a)) self#get_functions in let overlap = ref 0 in let multiple = ref 0 in let _ = - H.iter (fun _ v -> if v = 1 then () else - begin overlap := !overlap + 1 ; multiple := !multiple + (v-1) end) table in + H.iter + (fun _ v -> + if v = 1 then + () + else + begin + overlap := !overlap + 1; + multiple := !multiple + (v-1) + end) table in (H.length table, !overlap, !multiple) method add_functions_by_preamble = @@ -256,25 +287,32 @@ object (self) let preamble_instrs = H.create 3 in let _ = (* collect preambles of regular functions *) self#itera (fun faddr f -> - let instr = f#get_instruction faddr in - let instrs = instr#get_instruction_bytes in - let entry = - if H.mem preambles instrs then - H.find preambles instrs - else - begin - H.add preambles instrs 0; - H.add preamble_instrs instrs instr; - 0 - end in - H.replace preambles instrs (entry + 1)) in + try + let instr = f#get_instruction faddr in + let instrs = instr#get_instruction_bytes in + let entry = + if H.mem preambles instrs then + H.find preambles instrs + else + begin + H.add preambles instrs 0; + H.add preamble_instrs instrs instr; + 0 + end in + H.replace preambles instrs (entry + 1) + with + | _ -> () + ) in let _ = (* log the results *) H.iter (fun k v -> chlog#add "function preambles" - (LBLOCK [ (H.find preamble_instrs k)#toPretty ; STR " (" ; - STR (byte_string_to_printed_string k) ; STR "): "; - INT v ])) preambles in + (LBLOCK [ + (H.find preamble_instrs k)#toPretty; + STR " ("; + STR (byte_string_to_printed_string k); + STR "):"; + INT v])) preambles in let maxentry = ref 0 in let maxpreamble = ref "" in let _ = (* find the most common preamble *) @@ -308,10 +346,12 @@ object (self) let _ = chlog#add "initialization" - (LBLOCK [ STR "Add " ; INT (List.length !fnsAdded) ; - STR " functions by preamble (from " ; - INT (List.length commonpreambles) ; - STR " common preambles)" ]) in + (LBLOCK [ + STR "Add "; + INT (List.length !fnsAdded); + STR " functions by preamble (from "; + INT (List.length commonpreambles); + STR " common preambles)"]) in !fnsAdded method private get_live_instructions = @@ -322,10 +362,9 @@ object (self) H.replace table a#index ((H.find table a#index) + 1) else H.add table a#index 1 in - let _ = List.iter (fun f -> - f#iteri - (fun faddr a _ -> add faddr a)) - self#get_functions in + let _ = + List.iter (fun f -> f#iteri (fun faddr a _ -> add faddr a)) + self#get_functions in table method dark_matter_to_string = @@ -367,7 +406,8 @@ let get_export_metrics () = exports_metrics_handler#init_value let get_mips_disassembly_metrics () = - let (coverage,overlap,alloverlap) = mips_assembly_functions#get_function_coverage in + let (coverage,overlap,alloverlap) = + mips_assembly_functions#get_function_coverage in let instrs = !mips_assembly_instructions#get_num_instructions in let imported_imports = [] in let loaded_imports = [] in diff --git a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstruction.ml b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstruction.ml index 5df08c7f..6b547266 100644 --- a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstruction.ml +++ b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstruction.ml @@ -1,12 +1,12 @@ (* ============================================================================= - CodeHawk Binary Analyzer + CodeHawk Binary Analyzer Author: Henny Sipma ------------------------------------------------------------------------------ The MIT License (MIT) - + Copyright (c) 2005-2019 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -14,10 +14,10 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -46,43 +46,46 @@ open BCHMIPSTypes class mips_assembly_instruction_t (virtual_address:doubleword_int) - (opcode:mips_opcode_t) + (opcode:mips_opcode_t) (instruction_bytes:string):mips_assembly_instruction_int = object (self) - + val mutable block_entry = false val mutable delay_slot = false val mutable inlined_call = false - + method set_block_entry = block_entry <- true method set_inlined_call = inlined_call <- true method set_delay_slot = delay_slot <- true - + method is_block_entry = block_entry method is_delay_slot = delay_slot - + method get_address = virtual_address - + method get_opcode = opcode - + + method is_invalid = + match opcode with | OpInvalid -> true | _ -> false + method get_instruction_bytes = instruction_bytes method get_bytes_ashexstring = byte_string_to_printed_string instruction_bytes - + method private is_locked = system_info#is_locked_instruction virtual_address method is_inlined_call = inlined_call method private is_function_entry_point = functions_data#is_function_entry_point self#get_address - + method toString = (if self#is_locked then "lock " else "") ^ (mips_opcode_to_string opcode) - - method toPretty = LBLOCK [ STR self#toString ] + + method toPretty = LBLOCK [STR self#toString] method write_xml (node:xml_element_int) = let opc = self#get_opcode in @@ -98,12 +101,12 @@ object (self) mips_dictionary#write_xml_mips_bytestring node ((byte_string_to_printed_string self#get_instruction_bytes)) end - + end - + + let make_mips_assembly_instruction (va:doubleword_int) (opcode:mips_opcode_t) (instructionBytes:string) = new mips_assembly_instruction_t va opcode instructionBytes - diff --git a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstructions.ml b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstructions.ml index 016358fb..1a78b7a0 100644 --- a/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstructions.ml +++ b/CodeHawk/CHB/bchlibmips32/bCHMIPSAssemblyInstructions.ml @@ -6,7 +6,7 @@ Copyright (c) 2005-2019 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -165,8 +165,7 @@ object (self) ^ codeEnd#to_hex_string] method private at_index (index: int): mips_assembly_instruction_result = - TR.tmap - ~msg:"at_index" + TR.tbind (fun instr -> if instr#get_address#equal wordzero then let newInstr = @@ -174,10 +173,15 @@ object (self) (codeBase#add_int index) NoOperation "" in begin set_instruction index newInstr; - newInstr + Ok newInstr end + else if instr#is_invalid then + Error [__FILE__ ^ ":" ^ (string_of_int __LINE__) ^ ": " + ^ "Instruction at address " + ^ instr#get_address#to_hex_string + ^ " is invalid"] else - instr) + Ok instr) (get_instruction index) method set_instruction @@ -203,15 +207,9 @@ object (self) method private set_not_code_block (db:data_block_int) = let startaddr = db#get_start_address in let endaddr = db#get_end_address in - log_tfold - (mk_tracelog_spec - ~tag:"disassembly" - ("set_not_code_block:startaddr:" ^ startaddr#to_hex_string)) + TR.tfold ~ok:(fun startindex -> - log_tfold - (mk_tracelog_spec - ~tag:"disassembly" - ("set_not_code_block:endaddr:" ^ endaddr#to_hex_string)) + TR.tfold ~ok:(fun endindex -> let startinstr = make_mips_assembly_instruction @@ -229,9 +227,15 @@ object (self) "not code (data block)" (LBLOCK [startaddr#toPretty; STR " - "; endaddr#toPretty])) end) - ~error:(fun _ -> ()) + ~error:(fun e -> + log_error_result + ~tag:"disassembly" __FILE__ __LINE__ + (("endaddr: " ^ endaddr#to_hex_string) :: e)) (self#indexresult endaddr)) - ~error:(fun _ -> ()) + ~error:(fun e -> + log_error_result + ~tag:"disassembly" __FILE__ __LINE__ + (("startaddr: " ^ startaddr#to_hex_string) :: e)) (self#indexresult startaddr) method get_code_addresses_rev ?(low=codeBase) ?(high=wordmax) () = @@ -329,7 +333,9 @@ object (self) let spacedstring = byte_string_to_spaced_string instrbytes in let len = String.length spacedstring in let bytestring = - if len <= 16 then + if len = 0 then + "" + else if len <= 16 then let s = Bytes.make 16 ' ' in begin Bytes.blit (Bytes.of_string spacedstring) 0 s 0 len; @@ -341,11 +347,7 @@ object (self) | NotCode None -> () | NotCode (Some (DataBlock db)) -> stringList := db#toString :: !stringList - | OpInvalid -> - let line = (Bytes.to_string statusString) ^ va#to_hex_string ^ " " - ^ bytestring ^ " " ^ "**invalid**" in - stringList := line :: !stringList - + | OpInvalid -> () | _ -> let _ = if !firstNew then @@ -375,7 +377,8 @@ end let mips_assembly_instructions = ref (new mips_assembly_instructions_t 1 wordzero) -let get_mips_assembly_instruction (va: doubleword_int) = +let get_mips_assembly_instruction + (va: doubleword_int): mips_assembly_instruction_result = !mips_assembly_instructions#get_instruction va diff --git a/CodeHawk/CHB/bchlibmips32/bCHMIPSTypes.mli b/CodeHawk/CHB/bchlibmips32/bCHMIPSTypes.mli index 36de36dd..af7806b1 100644 --- a/CodeHawk/CHB/bchlibmips32/bCHMIPSTypes.mli +++ b/CodeHawk/CHB/bchlibmips32/bCHMIPSTypes.mli @@ -6,7 +6,7 @@ Copyright (c) 2005-2020 Kestrel Technology LLC Copyright (c) 2020 Henny Sipma - Copyright (c) 2021-2024 Aarno Labs LLC + Copyright (c) 2021-2025 Aarno Labs LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -683,8 +683,10 @@ class type mips_dictionary_int = method index_mips_bytestring: string -> int method index_mips_instr_format: mips_instr_format_t -> int - method write_xml_mips_bytestring: ?tag:string -> xml_element_int -> string -> unit - method write_xml_mips_opcode: ?tag:string -> xml_element_int -> mips_opcode_t -> unit + method write_xml_mips_bytestring: + ?tag:string -> xml_element_int -> string -> unit + method write_xml_mips_opcode: + ?tag:string -> xml_element_int -> mips_opcode_t -> unit method write_xml: xml_element_int -> unit method read_xml: xml_element_int -> unit @@ -702,14 +704,15 @@ object (* accessors *) method get_address: doubleword_int - method get_opcode : mips_opcode_t + method get_opcode: mips_opcode_t method get_instruction_bytes: string method get_bytes_ashexstring: string (* predicates *) - method is_block_entry : bool - method is_delay_slot : bool + method is_block_entry: bool + method is_delay_slot: bool method is_inlined_call: bool + method is_invalid: bool (* i/o *) method write_xml: xml_element_int -> unit @@ -871,7 +874,10 @@ class type mips_assembly_function_int = method iter: (mips_assembly_block_int -> unit) -> unit method itera: (ctxt_iaddress_t -> mips_assembly_block_int -> unit) -> unit method iteri: - (doubleword_int -> ctxt_iaddress_t -> mips_assembly_instruction_int -> unit) + (doubleword_int + -> ctxt_iaddress_t + -> mips_assembly_instruction_int + -> unit) -> unit method populate_callgraph: callgraph_int -> unit @@ -907,8 +913,10 @@ class type mips_assembly_functions_int = (* iterators *) method iter: (mips_assembly_function_int -> unit) -> unit method itera: (doubleword_int -> mips_assembly_function_int -> unit) -> unit - method bottom_up_itera: (doubleword_int -> mips_assembly_function_int -> unit) -> unit - method top_down_itera: (doubleword_int -> mips_assembly_function_int -> unit) -> unit + method bottom_up_itera: + (doubleword_int -> mips_assembly_function_int -> unit) -> unit + method top_down_itera: + (doubleword_int -> mips_assembly_function_int -> unit) -> unit (* predicates *) method has_function_by_address: doubleword_int -> bool @@ -995,6 +1003,7 @@ class type mips_opcode_dictionary_int = method toPretty: pretty_t end + class type mips_analysis_results_int = object diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_CIPHER_CTX_free.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_CIPHER_CTX_free.xml deleted file mode 100644 index c8b45d61..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_CIPHER_CTX_free.xml +++ /dev/null @@ -1,32 +0,0 @@ - - -
- - - - Clears all information from a cipher context and frees any - allocated memory associated with it, including ctx - itself. This function should be called after all operations - using a cipher are complete so sensitive information does not - remain in memory. - - - void EVP_CIPHER_CTX_free (EVP_CIPHER_CTX *ctx) - cipher context to be freed - - - - - - EVP_CIPHER_CTX - - void - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_CIPHER_CTX_new.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_CIPHER_CTX_new.xml deleted file mode 100644 index c161d257..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_CIPHER_CTX_new.xml +++ /dev/null @@ -1,25 +0,0 @@ - - -
- - - Allocates and returns a cipher context. - - EVP_CIPHER_CTX *EVP_CIPHER_CTX_new (void) - - not null - null - - - - - EVP_CIPHER_CTX_new - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptFinal_ex.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptFinal_ex.xml deleted file mode 100644 index c18a0dc6..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptFinal_ex.xml +++ /dev/null @@ -1,43 +0,0 @@ - - -
- - - Decrypts the final partial block - - - int EVP_DecryptFinal_ex( - EVP_CIPHER_CTX *ctx - unsigned char *outm - int *outl - ) - - cipher context - output buffer - characters written - - 1 - 0 - - - - - - EVP_CIPHER_CTX - - - char - - - int - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptInit_ex.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptInit_ex.xml deleted file mode 100644 index 9c191ece..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptInit_ex.xml +++ /dev/null @@ -1,53 +0,0 @@ - - -
- - - Sets up a cipher context for decryption - - - int EVP_DecryptInit_ex( - EVP_CIPHER_CTX *ctx - EVP_CIPHER *type - ENGINE *impl - const unsigned char *key - const unsigned char *iv - ) - - cipher context - cipher type - implementation engine - symmetric encryption key - initialization vector - - 1 - 0 - - - - - - EVP_CIPHER_CTX - - - EVP_CIPHER - - - ENGINE - - - char - - - char - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptUpdate.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptUpdate.xml deleted file mode 100644 index 1ad84dac..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DecryptUpdate.xml +++ /dev/null @@ -1,53 +0,0 @@ - - -
- - - Decrypts inl bytes and writes the decrypted version to out - - - int EVP_DecryptUpdate( - EVP CIPHER_CTX *ctx - unsigned char *out - int *outl - const unsigned char *in - int inl - ) - - cipher context - output buffer - bytes written - input buffer - number of bytes in input buffer - - 1 - 0 - - - - - - EVP_CIPHER_CTX - - - char - - - int - - - char - - - int - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestFinal_ex.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestFinal_ex.xml deleted file mode 100644 index e581b746..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestFinal_ex.xml +++ /dev/null @@ -1,54 +0,0 @@ - - -
- - - - Retrieves the digest value from ctx and places it in md. If - the s parameter is not NULL then the number of bytes of data - written (i.e. the length of the digest) will be written to the - integer at s, at most EVP_MAX_MD_SIZE bytes will be - written. After calling EVP_DigestFinal_ex() no additional - calls to EVP_DigestUpdate() can be made, but - EVP_DigestInit_ex2() can be called to initialize a new digest - operation. - - - - EVP_DigestFinal_ex( - EVP_MD_CTX *ctx - unsigned char *md - unsigned int *s - ) - - digest context - message digest - number of bytes written (length of digest) - - 1 - 0 - - - - - - EVP_MD_CTX -
-
- - unsigned char -
-
- - int - - int -
- - - - - - -
- diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestInit_ex.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestInit_ex.xml deleted file mode 100644 index 93e1dbd1..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestInit_ex.xml +++ /dev/null @@ -1,56 +0,0 @@ - - -
- - - - Sets up digest context ctx to use a digest type. type is - typically supplied by a function such as EVP_sha1(), or a - value explicitly fetched with EVP_MD_fetch(). - - If impl is non-NULL, its implementation of the digest type is - used if there is one, and if not, the default implementation - is used. - - The type parameter can be NULL if ctx has been already - initialized with another EVP_DigestInit_ex() call and has not - been reset with EVP_MD_CTX_reset(). - - - - EVP_DigestInit_ex( - EVP_MD_CTX *ctx - const EVP_MD *type - ENGINE *impl - ) - - digest context - digest type - implementation of digest type - - 1 - 0 - - - - - - EVP_MD_CTX -
-
- - EVP_MD - - - ENGINE - - int -
- - - - - - -
- diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestUpdate.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestUpdate.xml deleted file mode 100644 index 5aceacd3..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_DigestUpdate.xml +++ /dev/null @@ -1,49 +0,0 @@ - - -
- - - - Hashes cnt bytes of data at d into the digest context - ctx. This function can be called several times on the same ctx - to hash additional data. - - - - EVP_DigestUpdate( - EVP_MD_CTX *ctx - const void *d - size_t cnt - ) - - digest context - data to be hashed - size of d in bytes - - 1 - 0 - - - - - - EVP_MX_CTX -
-
- - void -
-
- - size_t - - int -
- - - - - - -
- diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptFinal_ex.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptFinal_ex.xml deleted file mode 100644 index cb84684b..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptFinal_ex.xml +++ /dev/null @@ -1,43 +0,0 @@ - - -
- - - Encrypts the final data that remains in a partial block - - - int EVP_EncryptFinal_ex( - EVP_CIPHER_CTX *ctx - unsigned char *out - int *outl - ) - - cipher context - output buffer - number of bytes written - - 1 - 0 - - - - - - EVP_CIPHER_CTX - - - char - - - int - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptInit_ex.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptInit_ex.xml deleted file mode 100644 index 2c5f7822..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptInit_ex.xml +++ /dev/null @@ -1,53 +0,0 @@ - - -
- - - Sets up a cipher context for encryption - - - int EVP_EncryptInit_ex( - EVP_CIPHER_CTX *ctx - const EVP_CIPHER *type - ENGINE *impl - const unsigned char *key - const unsigned char *iv - ) - - cipher context - cipher type - implementation engine - symmetric key - initialization vector - - 1 - 0 - - - - - - EVP_CIPHER_CTX - - - EVP_CIPHER - - - ENGINE - - - char - - - char> - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptUpdate.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptUpdate.xml deleted file mode 100644 index 482ada32..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_EncryptUpdate.xml +++ /dev/null @@ -1,53 +0,0 @@ - - -
- - - Encrypts inl bytes and writes them to out - - - int EVP_EncryptUpdate( - EVP_CIPHER_CTX *ctx - unsigned char *out - int *outl - unsigned char *in - int inl - ) - - cipher context - output buffer - number of characters written - input buffer - number of input characters - - 1 - 0 - - - - - - EVP_CIPHER_CTX - - - char - - - int - - - char - - - int - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_CTX_free.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_CTX_free.xml deleted file mode 100644 index 97ee6692..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_CTX_free.xml +++ /dev/null @@ -1,28 +0,0 @@ - - -
- - - - Cleans up digest context ctx and frees up the space allocated to it. - - - void EVP_MD_CTX_free (EVP_MD_CTX *ctx) - the context to be unallocated - - - - - - EVP_MD_CTX - - void - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_CTX_new.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_CTX_new.xml deleted file mode 100644 index f8a4bd96..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_CTX_new.xml +++ /dev/null @@ -1,25 +0,0 @@ - - -
- - - Allocates and returns a digest context - - EVP_MD_CTX *EVP_MD_CTX_new (void) - - not null - null - - - - - EVP_MD_CTX - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_size.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_size.xml deleted file mode 100644 index a74dffbb..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_MD_size.xml +++ /dev/null @@ -1,32 +0,0 @@ - - -
- - - - Return the size of the message digest when passed an EVP_MD or - an EVP_MD_CTX structure, i.e. the size of the hash. - - - int EVP_MD_size (const EVP_MD *md) - message digest - - size of the hash - ? - - - - - - EVP_MD - - int - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_aes_256_cbc.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_aes_256_cbc.xml deleted file mode 100644 index db0d7422..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_aes_256_cbc.xml +++ /dev/null @@ -1,29 +0,0 @@ - - -
- - - - AES for 128, 192 and 256 bit keys in the following modes: CBC, - CFB with 128-bit shift, CFB with 1-bit shift, CFB with 8-bit - shift, CTR, ECB, and OFB. - - - const EVP_CIPHER *EVP_aes_256_cbc (void) - - not null - null - - - - - EVP_CIPHER - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_des_ede3_cbc.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_des_ede3_cbc.xml deleted file mode 100644 index 8ca7df81..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_des_ede3_cbc.xml +++ /dev/null @@ -1,28 +0,0 @@ - - -
- - - - Three-key triple DES in ECB, CBC, CFB with 64-bit shift, CFB - with 1-bit shift, CFB with 8-bit shift and OFB modes. - - - const EVP_CIPHER *EVP_des_ede3_cbc (void) - - not null - null - - - - - EVP_CIPHER - - - - - - - - - diff --git a/CodeHawk/CHB/bchsummaries/so_functions/EVP_sha256.xml b/CodeHawk/CHB/bchsummaries/so_functions/EVP_sha256.xml deleted file mode 100644 index d832319e..00000000 --- a/CodeHawk/CHB/bchsummaries/so_functions/EVP_sha256.xml +++ /dev/null @@ -1,25 +0,0 @@ - - -
- - - The sha256 algorithm which generates 256 bits of output - - EVP_MD *EVP_sha256 (void) - - not null - null - - - - - EVP_MD - - - - - - - - -