diff --git a/README.md b/README.md index b8c0e9f1..cdebf94c 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,7 @@ development and thus somewhat experimental. ### Requirements -The command-line interface requires python3.5 or higher. The analyzer -requires a Java development kit (to provide the `jar` tool). +The command-line interface requires python3.5 or higher. Build instructions for the CodeHawk Binary Analyzer are available [here](https://github.com/static-analysis-engineering/codehawk/tree/master/CodeHawk). diff --git a/chb/app/AppAccess.py b/chb/app/AppAccess.py index 9f93b535..dcdf46c8 100644 --- a/chb/app/AppAccess.py +++ b/chb/app/AppAccess.py @@ -103,7 +103,7 @@ def __init__( """Initializes access to analysis results.""" self._path = path self._filename = filename - self._deps = deps # list of summary jars registered as dependencies + self._deps = deps # list of summary zips registered as dependencies self._header_ty: Type[HeaderTy] = fileformat # currently supported: elf, pe self._userdata: Optional[UserData] = None diff --git a/chb/cmdline/AnalysisManager.py b/chb/cmdline/AnalysisManager.py index cd70751f..6a020842 100644 --- a/chb/cmdline/AnalysisManager.py +++ b/chb/cmdline/AnalysisManager.py @@ -88,7 +88,7 @@ def __init__( Arguments: - path: path of the directory that holds the target executable - filename: filename of the target executable - - deps: list of summary jars + - deps: list of summary zips - hints: Dictionary with items to add to the userdata file - elf/mips/arm: modifiers (default is x86 PE) """ @@ -455,7 +455,7 @@ def _analyze_until_stable( preamble_cutoff: int = 12) -> int: cwd = os.getcwd() os.chdir(self.path) # temporary change in directory - functionsjarfile = UF.get_functionsjar_filename(self.path, self.filename) + functionszipfile = UF.get_functionszip_filename(self.path, self.filename) analysisdir = UF.get_analysis_dir(self.path, self.filename) cmd = [self.chx86_analyze, "-summaries", self.chsummaries] cmd.extend(["-preamble_cutoff", str(preamble_cutoff)]) @@ -510,7 +510,7 @@ def _analyze_until_stable( cmd.append("-fail_on_function_failure") cmd.extend(["-analyze", self.filename]) - jarcmd = ["jar", "cf", functionsjarfile, "-C", analysisdir, "functions"] + zipcmd = ["zip", "-r", functionszipfile, "functions"] print_progress_update("Analyzing " + self.filename + " (max " @@ -543,8 +543,8 @@ def _analyze_until_stable( or (count > iterations)) if isfinished: - chklogger.logger.debug("execute command %s", " ".join(jarcmd)) - subprocess.call(jarcmd, stderr=subprocess.STDOUT) + chklogger.logger.debug("execute zip command %s", " ".join(zipcmd)) + subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir) fincmd = cmd + ["-collectdata"] if self.use_ssa: fincmd = fincmd + ["-ssa"] @@ -552,8 +552,8 @@ def _analyze_until_stable( fincmd = fincmd + ["-no_varinvs"] chklogger.logger.debug("execute command %s", " ".join(fincmd)) result = self._call_analysis(fincmd, timeout=timeout) - chklogger.logger.debug("execute command %s", " ".join(jarcmd)) - subprocess.call(jarcmd, stderr=subprocess.STDOUT) + chklogger.logger.debug("execute zip command %s", " ".join(zipcmd)) + subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir) count += 1 (stable, results, r_update) = self._get_results() print_progress_update(r_update + " " + self.filename) @@ -563,11 +563,12 @@ def _analyze_until_stable( print("\n".join(lines)) return isstable == "yes" - chklogger.logger.debug("execute command %s", " ".join(jarcmd)) - subprocess.call(jarcmd, stderr=subprocess.STDOUT) + chklogger.logger.debug("execute zip command %s", " ".join(zipcmd)) + subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir) result = self._call_analysis(cmd, timeout=timeout) if result != 0: - chklogger.logger.debug("return cwd %s", cwd) + chklogger.logger.error("zip command failed with return code %s, " + "changing back to folder %s", result, cwd) os.chdir(cwd) # return to original directory print("\n".join(lines)) return result diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index f9095d9f..6c9a6160 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -245,7 +245,7 @@ def summariescommand(args: argparse.Namespace) -> NoReturn: def summarieslistcommand(args: argparse.Namespace) -> NoReturn: print("The summaries list command provides access to function summaries.") print("It can be followed by the following subcommands:") - print(" dlls output a list of dlls provided (per jarfile)") + print(" dlls output a list of dlls provided (per zipfile)") print(" dll-functions ... output a list of functions for each dll listed") print(" so-functions output a list of shared-object functions (ELF)") exit(0) @@ -433,7 +433,7 @@ def parse() -> argparse.Namespace: '--thirdpartysummaries', nargs="*", default=[], - help='summary jars for third party libraries') + help='summary zips for third party libraries') analyzecmd.add_argument( "--so_libraries", nargs="*", diff --git a/chb/cmdline/summariescmds.py b/chb/cmdline/summariescmds.py index e8f19baa..0d1703cc 100644 --- a/chb/cmdline/summariescmds.py +++ b/chb/cmdline/summariescmds.py @@ -76,10 +76,10 @@ def summaries_dlls_cmd(args: argparse.Namespace) -> NoReturn: models = ModelsAccess() modeldlls = models.dlls() - for jar in modeldlls: - print(jar) + for zip_f in modeldlls: + print(zip_f) print("-" * 80) - for dll in sorted(modeldlls[jar]): + for dll in sorted(modeldlls[zip_f]): print(" " + dll) print("-" * 80) exit(0) @@ -132,18 +132,18 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn: models = ModelsAccess() - # returns a dictionary with so-functions for different jars + # returns a dictionary with so-functions for different zips sofunctions = models.all_so_function_summaries() - for jar in sorted(sofunctions): + for zip_f in sorted(sofunctions): print("\nShared object functions from " - + jar + + zip_f + " (" - + str(len(sofunctions[jar])) + + str(len(sofunctions[zip_f])) + ")") print("=" * 80) pdrcounter = 0 pdwcounter = 0 - for f in sorted(sofunctions[jar], key=lambda f: f.name): + for f in sorted(sofunctions[zip_f], key=lambda f: f.name): summary = models.so_function_summary(f.name) prec = summary.semantics.preconditions pdread = len([p for p in prec if p.is_deref_read]) @@ -155,7 +155,7 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn: pdwcounter += 1 print("=" * 80) - total = sum(len(sofunctions[jar]) for jar in sofunctions) + total = sum(len(sofunctions[zip_f]) for zip_f in sofunctions) print( "\nTotal: " + str(total) diff --git a/chb/models/JniFunctionSummaryLibrary.py b/chb/models/JniFunctionSummaryLibrary.py index 6a9a1788..19f29c4f 100644 --- a/chb/models/JniFunctionSummaryLibrary.py +++ b/chb/models/JniFunctionSummaryLibrary.py @@ -40,7 +40,7 @@ class JniFunctionSummaryLibrary(L.FunctionSummaryLibrary): Native methods are indexed by numbers, roughly through 231. Many of these methods are similar, differring only in the type to which they are - applicable. The summaries in bchsummaries.jar make use of templates that + applicable. The summaries in the bchsummaries archive make use of templates that can be instantiated for these different types. For example, for jni_190.xml: diff --git a/chb/models/ModelsAccess.py b/chb/models/ModelsAccess.py index 907aea6f..a5a9dd5f 100644 --- a/chb/models/ModelsAccess.py +++ b/chb/models/ModelsAccess.py @@ -45,56 +45,56 @@ class ModelsAccess(object): """Main entry point for library function summaries. The main summary collection is obtained from the configured - bchummaries.jar. Other summary collections may be added via - additional jarfiles, specified with depjars. + bchummaries.zip. Other summary collections may be added via + additional zipfiles, specified with depzips. """ def __init__(self, - depjars: Sequence[str] = []) -> None: - """Initialize library models access with jarfile.""" - self._bchsummariesjarfilename = Config().summaries - self._depjars = depjars + depzips: Sequence[str] = []) -> None: + """Initialize library models access with zipfile.""" + self._bchsummarieszipfilename = Config().summaries + self._depzips = depzips self._bchsummaries: Optional[SummaryCollection] = None self._dependencies: Sequence[SummaryCollection] = [] self._dlls: Dict[str, Sequence[str]] = {} self._sofunctionsummaries: Dict[str, Sequence[FunctionSummary]] = {} @property - def depjars(self) -> Sequence[str]: - return self._depjars + def depzips(self) -> Sequence[str]: + return self._depzips @property - def bchsummariesjarfilename(self) -> str: - return self._bchsummariesjarfilename + def bchsummarieszipfilename(self) -> str: + return self._bchsummarieszipfilename @property def bchsummaries(self) -> SummaryCollection: if self._bchsummaries is None: self._bchsummaries = SummaryCollection( - self, self.bchsummariesjarfilename) + self, self.bchsummarieszipfilename) return self._bchsummaries @property def dependencies(self) -> Sequence[SummaryCollection]: if len(self._dependencies) == 0: - self._dependencies = [SummaryCollection(self, j) for j in self.depjars] + self._dependencies = [SummaryCollection(self, j) for j in self.depzips] return self._dependencies @property def stats(self) -> str: lines: List[str] = [] dlls = self.dlls() - for jar in dlls: - lines.append(jar.ljust(20) + str(len(dlls[jar])) + " dlls") + for zip_f in dlls: + lines.append(zip_f.ljust(20) + str(len(dlls[zip_f])) + " dlls") return "\n".join(lines) def dlls(self) -> Mapping[str, Sequence[str]]: - """Return a mapping from jarfilename to list of function names.""" + """Return a mapping from zipfilename to list of function names.""" if len(self._dlls) == 0: self._dlls["bchsummaries"] = self.bchsummaries.dlls for d in self.dependencies: - self._dlls[d.jarfilename] = d.dlls + self._dlls[d.zipfilename] = d.dlls return self._dlls def has_dll_function_summary(self, dll: str, fname: str) -> bool: @@ -140,7 +140,7 @@ def so_function_summary(self, fname: str) -> FunctionSummary: return self.bchsummaries.so_function_summary(fname) def all_so_function_summaries(self) -> Mapping[str, Sequence[FunctionSummary]]: - """Return a mapping from jarfilename to list of function summaries.""" + """Return a mapping from zipfilename to list of function summaries.""" if len(self._sofunctionsummaries) == 0: sosummaries = self.bchsummaries.all_so_function_summaries() @@ -148,7 +148,7 @@ def all_so_function_summaries(self) -> Mapping[str, Sequence[FunctionSummary]]: for d in self.dependencies: if d.has_so_functions(): self._sofunctionsummaries[ - d.jarfilename] = d.all_so_function_summaries() + d.zipfilename] = d.all_so_function_summaries() return self._sofunctionsummaries def enum_definitions(self) -> Mapping[str, DllEnumDefinitions]: diff --git a/chb/models/SummaryCollection.py b/chb/models/SummaryCollection.py index 2f4ba055..67dfa064 100644 --- a/chb/models/SummaryCollection.py +++ b/chb/models/SummaryCollection.py @@ -51,15 +51,15 @@ class SummaryCollection: - """Represents all summary entities in a single jar file.""" + """Represents all summary entities in a single zip file.""" def __init__( self, models: "ModelsAccess", - jarfilename: str) -> None: + zipfilename: str) -> None: self._models = models - self._jarfilename = jarfilename - self._jarfile = zipfile.ZipFile(self.jarfilename, "r") + self._zipfilename = zipfilename + self._zipfile = zipfile.ZipFile(self.zipfilename, "r") self._filenames: List[str] = [] self._directorynames: List[str] = [] self._dlls: List[str] = [] @@ -73,17 +73,17 @@ def models(self) -> "ModelsAccess": return self._models @property - def jarfile(self) -> zipfile.ZipFile: - return self._jarfile + def zipfile(self) -> zipfile.ZipFile: + return self._zipfile @property - def jarfilename(self) -> str: - return self._jarfilename + def zipfilename(self) -> str: + return self._zipfilename @property def filenames(self) -> List[str]: if len(self._filenames) == 0: - for info in self.jarfile.infolist(): + for info in self.zipfile.infolist(): self._filenames.append(info.filename) return self._filenames @@ -336,7 +336,7 @@ def retrieve_ref_jni_function_summary_xnode( raise UF.CHBError("Retrieval of jni references not implemented yet") def _get_summary_xnode(self, filename: str, tag: str) -> ET.Element: - zfile = self.jarfile.read(filename).decode('utf-8') + zfile = self.zipfile.read(filename).decode('utf-8') try: xnode = ET.fromstring(str(zfile)).find(tag) except ET.ParseError as e: diff --git a/chb/util/fileutil.py b/chb/util/fileutil.py index 4720eb10..7a5c8819 100644 --- a/chb/util/fileutil.py +++ b/chb/util/fileutil.py @@ -50,7 +50,7 @@ x_global_state.xml x_global_locations.xml x_system_info.xml - x_functions.jar + x_functions.zip x_asm.log x_orphan.log x_bdict.log @@ -637,8 +637,9 @@ def get_interface_dictionary_xnode(path: str, xfile: str) -> ET.Element: return get_chb_xnode(filename, "interface-dictionary") -def get_functionsjar_filename(path: str, xfile: str) -> str: +def get_functionszip_filename(path: str, xfile: str) -> str: fdir = get_analysis_dir(path, xfile) + # For now we keep the .jar extension until we update the ocaml analyzer return get_chb_filename(fdir, xfile, "functions.jar")