diff --git a/chb/app/CHVersion.py b/chb/app/CHVersion.py index f099e124..17d72569 100644 --- a/chb/app/CHVersion.py +++ b/chb/app/CHVersion.py @@ -1 +1 @@ -chbversion: str = "0.3.0-20250805" +chbversion: str = "0.3.0-20250808" diff --git a/chb/app/Cfg.py b/chb/app/Cfg.py index d958f882..09ba1d47 100644 --- a/chb/app/Cfg.py +++ b/chb/app/Cfg.py @@ -815,6 +815,14 @@ def successors(self, src: str) -> Sequence[str]: else: return [] + def exitblocks(self) -> Sequence[str]: + blocks = list(self.blocks.keys()) + result: List[str] = [] + for b in blocks: + if not b in self.edges or len(self.edges[b]) == 0: + result.append(b) + return result + def __str__(self) -> str: lines: List[str] = [] lines.append("Basic blocks: ") diff --git a/chb/app/Function.py b/chb/app/Function.py index dc2d8a18..e1318bbe 100644 --- a/chb/app/Function.py +++ b/chb/app/Function.py @@ -474,6 +474,12 @@ def block(self, baddr: str) -> BasicBlock: else: raise UF.CHBError("Block " + baddr + " not found in " + self.faddr) + def containing_block(self, iaddr: str) -> str: + for (baddr, b) in self.blocks.items(): + if b.has_instruction(iaddr): + return baddr + raise UF.CHBError("Containing block not found for instruction address " + iaddr) + def load_instructions(self) -> Mapping[str, Sequence[Instruction]]: """Return a mapping of block address to instructions that save to memory.""" diff --git a/chb/app/Instruction.py b/chb/app/Instruction.py index b2ff2ec0..b5a6cdb2 100644 --- a/chb/app/Instruction.py +++ b/chb/app/Instruction.py @@ -291,6 +291,17 @@ def rhs_expressions(self, filter: Callable[[XXpr], bool]) -> List[XXpr]: def return_value(self) -> Optional[XXpr]: return None + def reaching_definitions(self, var: str) -> List[str]: + rdefs = self.xdata.reachingdefs + result: List[str] = [] + for rdef in rdefs: + if rdef is not None: + if str(rdef.variable) == var: + for loc in rdef.deflocations: + if str(loc) not in result: + result.append(str(loc)) + return result + def assembly_ast(self, astree: ASTInterface) -> List[AST.ASTInstruction]: raise UF.CHBError("assembly-ast not defined") diff --git a/chb/arm/opcodes/ARMBranch.py b/chb/arm/opcodes/ARMBranch.py index 8e2d149f..ce275902 100644 --- a/chb/arm/opcodes/ARMBranch.py +++ b/chb/arm/opcodes/ARMBranch.py @@ -216,7 +216,9 @@ def opargs(self) -> List[ARMOperand]: def ft_conditions(self, xdata: InstrXData) -> Sequence[XXpr]: xd = ARMBranchXData(xdata) if xdata.has_branch_conditions(): - if xd.is_ok: + if xd.is_ctcond_ok: + return [xd.cfcond, xd.ctcond] + elif xd.is_ok: return [xd.fcond, xd.tcond] else: return [xd.fxpr, xd.txpr] diff --git a/chb/astinterface/ASTInterface.py b/chb/astinterface/ASTInterface.py index de7e67f7..857de76e 100644 --- a/chb/astinterface/ASTInterface.py +++ b/chb/astinterface/ASTInterface.py @@ -988,6 +988,7 @@ def introduce_ssa_variables( for (reg, locs) in rdeflocs.items(): for lst in locs: if len(lst) > 0: + # print("DEBUG: " + str(reg) + ": [" + ", ".join(str(loc) for loc in lst) + "]") loc1 = lst[0] vtype = None if loc1 in ftypes: diff --git a/chb/cmdline/astcmds.py b/chb/cmdline/astcmds.py index be042b10..202723d6 100644 --- a/chb/cmdline/astcmds.py +++ b/chb/cmdline/astcmds.py @@ -57,9 +57,13 @@ from chb.cmdline.PatchResults import PatchResults, PatchEvent import chb.cmdline.XInfo as XI +from chb.graphics.DotRdefPath import DotRdefPath + from chb.userdata.UserHints import UserHints +import chb.util.dotutil as UD import chb.util.fileutil as UF +import chb.util.graphutil as UG from chb.util.loggingutil import chklogger, LogLevel @@ -144,8 +148,8 @@ def buildast(args: argparse.Namespace) -> NoReturn: xpatchresultsfile = args.patch_results_file hide_globals: bool = args.hide_globals hide_annotations: bool = args.hide_annotations - remove_edges: List[str] = args.remove_edges - add_edges: List[str] = args.add_edges + show_reachingdefs: str = args.show_reachingdefs + output_reachingdefs: str = args.output_reachingdefs verbose: bool = args.verbose loglevel: str = args.loglevel logfilename: Optional[str] = args.logfilename @@ -386,6 +390,57 @@ def buildast(args: argparse.Namespace) -> NoReturn: functions_failed += 1 continue + if show_reachingdefs is not None: + if output_reachingdefs is None: + UC.print_error("\nSpecify a file to save the reaching defs") + continue + + rdefspec = show_reachingdefs.split(":") + if len(rdefspec) != 2: + UC.print_error( + "\nArgument to show_reachingdefs not recognized") + continue + + useloc = rdefspec[0] + register = rdefspec[1] + + if not f.has_instruction(useloc): + UC.print_status_update("Useloc: " + useloc + " not found") + continue + + tgtinstr = f.instruction(useloc) + + if not register in f.rdef_locations(): + UC.print_status_update( + "Register " + register + " not found in rdeflocations") + continue + + cblock = f.containing_block(useloc) + graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges) + rdefs = tgtinstr.reaching_definitions(register) + dotpaths: List[DotRdefPath] = [] + graph.find_paths(f.faddr, cblock) + for (i, p) in enumerate( + sorted(graph.get_paths(), key=lambda p: len(p))): + cfgpath = DotRdefPath( + "path" + str(i), + f, + astinterface, + p, + subgraph=True, + nodeprefix = str(i) +":", + rdefinstrs = rdefs) + dotpaths.append(cfgpath) + + pdffilename = UD.print_dot_subgraphs( + app.path, + "paths", + output_reachingdefs, + "pdf", + [dotcfg.build() for dotcfg in dotpaths]) + + UC.print_status_update("Printed " + pdffilename) + else: UC.print_error("Unable to find function " + faddr) functions_failed += 1 diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index 3947584f..0678108a 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -730,19 +730,15 @@ def parse() -> argparse.Namespace: "--hide_annotations", help="do not include annotations in printed C code", action="store_true") - buildast.add_argument( - "--remove_edges", - nargs="*", - default=[], - help="list of edges to be removed (in the form faddr:src-addr:tgt-addr in hex)") - buildast.add_argument( - "--add_edges", - nargs="*", - default=[], - help="list of edges to be added (in the form faddr:src-addr:tgt-addr in hex)") buildast.add_argument( "--verbose", "-v", action="store_true") + buildast.add_argument( + "--show_reachingdefs", + help="create a dot file for the reaching defs of :") + buildast.add_argument( + "--output_reachingdefs", + help="name of output file (without extension) to store dot/pdf file of reachingdefs") buildast.add_argument( "--loglevel", "-log", choices=UL.LogLevel.options(), diff --git a/chb/graphics/DotRdefPath.py b/chb/graphics/DotRdefPath.py new file mode 100644 index 00000000..a93c302c --- /dev/null +++ b/chb/graphics/DotRdefPath.py @@ -0,0 +1,209 @@ +# ------------------------------------------------------------------------------ +# CodeHawk Binary Analyzer +# Author: Henny Sipma +# ------------------------------------------------------------------------------ +# The MIT License (MIT) +# +# Copyright (c) 2025 Aarno Labs LLC +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ------------------------------------------------------------------------------ + +from typing import Dict, List, Optional, TYPE_CHECKING + +import chb.util.fileutil as UF + +from chb.util.DotGraph import DotGraph + +if TYPE_CHECKING: + from chb.app.Function import Function + from chb.app.Instruction import Instruction + from chb.astinterface.ASTInterface import ASTInterface + + +class DotRdefPath: + + def __init__( + self, + graphname: str, + fn: "Function", + astree: "ASTInterface", + path: List[str], + nodeprefix: str = "", + replacements: Dict[str, str] = {}, + rdefinstrs: List[str] = [], + subgraph: bool = False) -> None: + + self._fn = fn + self._graphname = graphname + self._astree = astree + self._path = path + self._nodeprefix = nodeprefix + self._subgraph = subgraph + self._replacements = replacements + self._rdefinstrs = rdefinstrs + self._dotgraph = DotGraph(graphname, subgraph=self.subgraph) + + @property + def function(self) -> "Function": + return self._fn + + @property + def graphname(self) -> str: + return self._graphname + + @property + def astree(self) -> "ASTInterface": + return self._astree + + @property + def path(self) -> List[str]: + return self._path + + @property + def nodeprefix(self) -> str: + return self._nodeprefix + + @property + def subgraph(self) -> bool: + return self._subgraph + + def pathindex(self, baddr: str) -> int: + for (i, n) in enumerate(self.path): + if n == baddr: + return i + raise UF.CHBError("Address " + baddr + " not found in path") + + def build(self) -> DotGraph: + for n in self.path: + self.add_node(n) + + for i in range(len(self.path) - 1): + self.add_edge(self.path[i], self.path[i+1]) + + if self.init_is_exposed(): + (fvar, _) = self.astree.get_formal_locindices(0) + btype = fvar.bctyp + self._dotgraph.add_node( + self.nodeprefix + "init", + labeltxt="{ init | " + str(btype) + " " + fvar.vname + "}", + shaded=True, + color="orange", + recordformat=True) + self._dotgraph.add_edge( + self.nodeprefix + "init", self.nodeprefix + self.path[0]) + + return self._dotgraph + + def init_is_exposed(self) -> bool: + result = True + for p in self.path: + instrs = self.rdef_instructions(p) + if any(not instr.has_control_flow() for instr in instrs): + result = False + return result + + def is_exposed(self, n: str) -> bool: + index = self.pathindex(n) + for i in range(index + 1, len(self.path)): + node = self.path[i] + instrs = self.rdef_instructions(node) + if any(not instr.has_control_flow() for instr in instrs): + return False + return True + + def replace_text(self, txt: str) -> str: + result = txt + for src in sorted(self._replacements, key=lambda x: len(x), reverse=True): + result = result.replace(src, self._replacements[src]) + return result + + def get_branch_instruction(self, n: str) -> Optional["Instruction"]: + src = self.function.cfg.blocks[n] + instraddr = src.lastaddr + return self.function.instruction(instraddr) + + def rdef_instructions(self, n: str) -> List["Instruction"]: + block = self.function.blocks[n] + lastaddr = block.lastaddr + baddr = int(n, 16) + xaddr = int(lastaddr, 16) + result: List["Instruction"] = [] + for i in self._rdefinstrs: + if i == "init": + continue + ix = int(i, 16) + if ix >= baddr and ix <= xaddr: + instr = block.instructions[i] + result.append(instr) + return result + + def add_node(self, n: str) -> None: + nodename = self.nodeprefix + n + rdefinstrs = self.rdef_instructions(n) + blocktxt = n + color: Optional[str] = None + fillcolor: Optional[str] = None + if len(rdefinstrs) > 0: + conditions: List[str] = [] + pinstrs: List[str] = [] + for instr in rdefinstrs: + (hlinstrs, _) = instr.ast_prov(self.astree) + pinstrs.extend(str(hlinstr) for hlinstr in hlinstrs) + if instr.has_control_flow(): + (cc, _) = instr.ast_cc_condition_prov(self.astree) + conditions.append(str(cc)) + if self.is_exposed(n): + if any(instr.has_control_flow() for instr in rdefinstrs): + fillcolor = "yellow" + else: + fillcolor = "orange" + if len(conditions) > 0: + blocktxt = ( + "{" + n + "|" + ("if " + "\\n".join(conditions)) + + "|" + "\\n".join(pinstrs) + "}") + else: + blocktxt = ("{" + n + "|" + "\\n".join(pinstrs) + "}") + self._dotgraph.add_node( + str(nodename), + labeltxt=blocktxt, + shaded=True, + color=color, + fillcolor=fillcolor, + recordformat=True) + + def add_edge(self, n1: str, n2: str) -> None: + nodename1 = self.nodeprefix + n1 + nodename2 = self.nodeprefix + n2 + srcblock = self.function.block(n1) + labeltxt: Optional[str] = None + if len(self.function.cfg.edges[n1]) == 2: + tgtedges = self.function.cfg.edges[n1] + branchinstr = self.get_branch_instruction(n1) + if branchinstr and branchinstr.is_branch_instruction: + ftconds = branchinstr.ft_conditions + if len(ftconds) == 2: + if n2 == tgtedges[0]: + astcond = branchinstr.ast_condition_prov( + self.astree, reverse=True) + else: + astcond = branchinstr.ast_condition_prov( + self.astree, reverse=False) + labeltxt = str(astcond[0]) + self._dotgraph.add_edge(nodename1, nodename2, labeltxt=labeltxt) diff --git a/chb/userdata/UserHints.py b/chb/userdata/UserHints.py index 33867586..5ae8407c 100644 --- a/chb/userdata/UserHints.py +++ b/chb/userdata/UserHints.py @@ -635,7 +635,41 @@ def __str__(self) -> str: "action: " + self.action + "; locs: [" + ", ".join(self.locs) + "]" + "; name: " + self.name) - + + +class RemoveReachingDefinitions: + + def __init__(self, rdefspec: Dict[str, Any]) -> None: + self._rdefspec = rdefspec + + @property + def rdefspec(self) -> Dict[str, Any]: + return self._rdefspec + + @property + def uselocs(self) -> List[str]: + return self.rdefspec.get("uselocs", []) + + @property + def rdeflocs(self) -> List[str]: + return self.rdefspec.get("rdeflocs", []) + + @property + def var(self) -> str: + return self.rdefspec.get("var", "__no_var__") + + def to_xml(self, node: ET.Element) -> None: + xremrdef = ET.Element("remove-var-rdefs") + node.append(xremrdef) + xremrdef.set("var", self.var) + xremrdef.set("uselocs", ",".join(self.uselocs)) + xremrdef.set("rdeflocs", ",".join(self.rdeflocs)) + + def __str__(self) -> str: + return ( + "var: " + self.var + + "; uselocs: " + ", ".join(self.uselocs) + + "; rdeflocs: " + ", ".join(self.rdeflocs)) class FunctionAnnotation: @@ -678,6 +712,14 @@ def typingrules(self) -> List[TypingRule]: result.append(tr) return result + @property + def remove_reaching_definitions(self) -> List[RemoveReachingDefinitions]: + result: List[RemoveReachingDefinitions] = [] + for d in self.fnannotation.get("remove-reaching-definitions", []): + rrd = RemoveReachingDefinitions(d) + result.append(rrd) + return result + def has_register_variable_introduction(self, iaddr: str) -> bool: return iaddr in self.register_variable_introductions @@ -714,6 +756,11 @@ def to_xml(self, node: ET.Element) -> None: node.append(xtypingrules) for tr in self.typingrules: tr.to_xml(xtypingrules) + if len(self.remove_reaching_definitions) > 0: + xrrds = ET.Element("remove-rdefs") + node.append(xrrds) + for rd in self.remove_reaching_definitions: + rd.to_xml(xrrds) def __str__(self) -> str: lines: List[str] = [] diff --git a/chb/util/DotGraph.py b/chb/util/DotGraph.py index b3f6ce2d..09342bbb 100644 --- a/chb/util/DotGraph.py +++ b/chb/util/DotGraph.py @@ -6,7 +6,7 @@ # # Copyright (c) 2016-2020 Kestrel Technology LLC # Copyright (c) 2020 Henny Sipma -# Copyright (c) 2021-2022 Aarno Labs LLC +# Copyright (c) 2021-2025 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -43,6 +43,17 @@ def sanitize(s: str) -> str: "}", "\\}") +def sanitize_record_format(s: str) -> str: + """Don't escape curly braces to preserve record layout.""" + if s is not None: + return s.replace( + '>', "\\>").replace( + '"', '\\"').replace( + '%', "\\%").replace( + "<", "\\<") + + + class DotNode: def __init__( @@ -50,11 +61,15 @@ def __init__( name: str, labeltxt: Optional[str] = None, color: Optional[str] = None, - shaded: bool = False) -> None: + fillcolor: Optional[str] = None, + shaded: bool = False, + penwidth: int = 0) -> None: self.name = name self.labeltxt = labeltxt self.shaded = shaded self.color = color + self.fillcolor = fillcolor + self.penwidth = penwidth self.addquotes = True def set_label(self, s: str) -> None: @@ -75,15 +90,24 @@ def __str__(self) -> str: labeltxt = 'label="' + self.name + '\\n...."' else: labeltxt = 'label="' + self.labeltxt + '"' - if self.shaded: - shadetxt = 'style=filled,color=".7 .3 1.0"' - elif self.color is not None: - if self.color == "grey": - shadetxt = 'style=filled,fillcolor="grey",color="black",penwidth=5' + if not self.shaded: + if self.color is None: + styletxt = 'color="black",penwidth=2' else: - shadetxt = 'style=filled,color="' + self.color + '"' + styletxt = 'color="' + self.color + '",penwidth=2' else: - shadetxt = 'style=filled,color=".7 .3 1.0"' + if self.color is None and self.fillcolor is None: + styletxt = 'style=filled,fillcolor=".7 .3 1.0", color="black"' + elif self.color is not None and self.fillcolor is None: + styletxt = 'style=filled,fillcolor="' + self.color + '"' + elif self.fillcolor is not None and self.color is None: + styletxt = 'style=filled,fillcolor="' + self.fillcolor + '"' + elif self.fillcolor is not None: + styletxt = ( + 'style=filled,fillcolor="' + self.fillcolor + + '",color="' + '"') + else: + styletext = ('style=filled, fillcolor=".7 .3 1.0"') return ( quote + self.name @@ -91,7 +115,7 @@ def __str__(self) -> str: + ' [' + labeltxt + ',' - + shadetxt + + styletxt + '];') @@ -188,14 +212,23 @@ def add_node( self, name: str, labeltxt: Optional[str] = None, + recordformat: bool = False, shaded: bool = False, - color: Optional[str] = None) -> None: + color: Optional[str] = None, + fillcolor: Optional[str] = None) -> None: if name not in self.nodes: if labeltxt is None: labeltxt = name - labeltxt = sanitize(labeltxt) + if recordformat: + labeltxt = sanitize_record_format(labeltxt) + else: + labeltxt = sanitize(labeltxt) self.nodes[name] = DotNode( - name, labeltxt=labeltxt, shaded=shaded, color=color) + name, + labeltxt=labeltxt, + shaded=shaded, + fillcolor=fillcolor, + color=color) def add_edge( self, diff --git a/chb/util/dotutil.py b/chb/util/dotutil.py index bd2f9a06..e7cc34d8 100644 --- a/chb/util/dotutil.py +++ b/chb/util/dotutil.py @@ -74,7 +74,7 @@ def print_dot_subgraphs( if len(subgraphs) == 0: print("No subgraphs supplied") return "error" - if len(subgraphs) > 10: + if len(subgraphs) > 20: print("Too many subgraphs: " + str(len(subgraphs))) return "error"