Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ development and thus somewhat experimental.

### Requirements

The command-line interface requires python3.5 or higher. The analyzer
requires a Java development kit (to provide the `jar` tool).
The command-line interface requires python3.5 or higher.

Build instructions for the CodeHawk Binary Analyzer are available
[here](https://github.com/static-analysis-engineering/codehawk/tree/master/CodeHawk).
Expand Down
2 changes: 1 addition & 1 deletion chb/app/AppAccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(
"""Initializes access to analysis results."""
self._path = path
self._filename = filename
self._deps = deps # list of summary jars registered as dependencies
self._deps = deps # list of summary zips registered as dependencies
self._header_ty: Type[HeaderTy] = fileformat # currently supported: elf, pe

self._userdata: Optional[UserData] = None
Expand Down
21 changes: 11 additions & 10 deletions chb/cmdline/AnalysisManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __init__(
Arguments:
- path: path of the directory that holds the target executable
- filename: filename of the target executable
- deps: list of summary jars
- deps: list of summary zips
- hints: Dictionary with items to add to the userdata file
- elf/mips/arm: modifiers (default is x86 PE)
"""
Expand Down Expand Up @@ -455,7 +455,7 @@ def _analyze_until_stable(
preamble_cutoff: int = 12) -> int:
cwd = os.getcwd()
os.chdir(self.path) # temporary change in directory
functionsjarfile = UF.get_functionsjar_filename(self.path, self.filename)
functionszipfile = UF.get_functionszip_filename(self.path, self.filename)
analysisdir = UF.get_analysis_dir(self.path, self.filename)
cmd = [self.chx86_analyze, "-summaries", self.chsummaries]
cmd.extend(["-preamble_cutoff", str(preamble_cutoff)])
Expand Down Expand Up @@ -510,7 +510,7 @@ def _analyze_until_stable(
cmd.append("-fail_on_function_failure")

cmd.extend(["-analyze", self.filename])
jarcmd = ["jar", "cf", functionsjarfile, "-C", analysisdir, "functions"]
zipcmd = ["zip", "-r", functionszipfile, "functions"]
print_progress_update("Analyzing "
+ self.filename
+ " (max "
Expand Down Expand Up @@ -543,17 +543,17 @@ def _analyze_until_stable(
or (count > iterations))

if isfinished:
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
fincmd = cmd + ["-collectdata"]
if self.use_ssa:
fincmd = fincmd + ["-ssa"]
if self.no_varinvs:
fincmd = fincmd + ["-no_varinvs"]
chklogger.logger.debug("execute command %s", " ".join(fincmd))
result = self._call_analysis(fincmd, timeout=timeout)
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
count += 1
(stable, results, r_update) = self._get_results()
print_progress_update(r_update + " " + self.filename)
Expand All @@ -563,11 +563,12 @@ def _analyze_until_stable(
print("\n".join(lines))
return isstable == "yes"

chklogger.logger.debug("execute command %s", " ".join(jarcmd))
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
result = self._call_analysis(cmd, timeout=timeout)
if result != 0:
chklogger.logger.debug("return cwd %s", cwd)
chklogger.logger.error("zip command failed with return code %s, "
"changing back to folder %s", result, cwd)
os.chdir(cwd) # return to original directory
print("\n".join(lines))
return result
Expand Down
4 changes: 2 additions & 2 deletions chb/cmdline/chkx
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def summariescommand(args: argparse.Namespace) -> NoReturn:
def summarieslistcommand(args: argparse.Namespace) -> NoReturn:
print("The summaries list command provides access to function summaries.")
print("It can be followed by the following subcommands:")
print(" dlls output a list of dlls provided (per jarfile)")
print(" dlls output a list of dlls provided (per zipfile)")
print(" dll-functions <dll-1> ... <dll-n> output a list of functions for each dll listed")
print(" so-functions output a list of shared-object functions (ELF)")
exit(0)
Expand Down Expand Up @@ -433,7 +433,7 @@ def parse() -> argparse.Namespace:
'--thirdpartysummaries',
nargs="*",
default=[],
help='summary jars for third party libraries')
help='summary zips for third party libraries')
analyzecmd.add_argument(
"--so_libraries",
nargs="*",
Expand Down
18 changes: 9 additions & 9 deletions chb/cmdline/summariescmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ def summaries_dlls_cmd(args: argparse.Namespace) -> NoReturn:
models = ModelsAccess()

modeldlls = models.dlls()
for jar in modeldlls:
print(jar)
for zip_f in modeldlls:
print(zip_f)
print("-" * 80)
for dll in sorted(modeldlls[jar]):
for dll in sorted(modeldlls[zip_f]):
print(" " + dll)
print("-" * 80)
exit(0)
Expand Down Expand Up @@ -132,18 +132,18 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:

models = ModelsAccess()

# returns a dictionary with so-functions for different jars
# returns a dictionary with so-functions for different zips
sofunctions = models.all_so_function_summaries()
for jar in sorted(sofunctions):
for zip_f in sorted(sofunctions):
print("\nShared object functions from "
+ jar
+ zip_f
+ " ("
+ str(len(sofunctions[jar]))
+ str(len(sofunctions[zip_f]))
+ ")")
print("=" * 80)
pdrcounter = 0
pdwcounter = 0
for f in sorted(sofunctions[jar], key=lambda f: f.name):
for f in sorted(sofunctions[zip_f], key=lambda f: f.name):
summary = models.so_function_summary(f.name)
prec = summary.semantics.preconditions
pdread = len([p for p in prec if p.is_deref_read])
Expand All @@ -155,7 +155,7 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:
pdwcounter += 1
print("=" * 80)

total = sum(len(sofunctions[jar]) for jar in sofunctions)
total = sum(len(sofunctions[zip_f]) for zip_f in sofunctions)
print(
"\nTotal: "
+ str(total)
Expand Down
2 changes: 1 addition & 1 deletion chb/models/JniFunctionSummaryLibrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class JniFunctionSummaryLibrary(L.FunctionSummaryLibrary):

Native methods are indexed by numbers, roughly through 231. Many of these
methods are similar, differring only in the type to which they are
applicable. The summaries in bchsummaries.jar make use of templates that
applicable. The summaries in the bchsummaries archive make use of templates that
can be instantiated for these different types.

For example, for jni_190.xml:
Expand Down
36 changes: 18 additions & 18 deletions chb/models/ModelsAccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,56 +45,56 @@ class ModelsAccess(object):
"""Main entry point for library function summaries.

The main summary collection is obtained from the configured
bchummaries.jar. Other summary collections may be added via
additional jarfiles, specified with depjars.
bchummaries.zip. Other summary collections may be added via
additional zipfiles, specified with depzips.
"""

def __init__(self,
depjars: Sequence[str] = []) -> None:
"""Initialize library models access with jarfile."""
self._bchsummariesjarfilename = Config().summaries
self._depjars = depjars
depzips: Sequence[str] = []) -> None:
"""Initialize library models access with zipfile."""
self._bchsummarieszipfilename = Config().summaries
self._depzips = depzips
self._bchsummaries: Optional[SummaryCollection] = None
self._dependencies: Sequence[SummaryCollection] = []
self._dlls: Dict[str, Sequence[str]] = {}
self._sofunctionsummaries: Dict[str, Sequence[FunctionSummary]] = {}

@property
def depjars(self) -> Sequence[str]:
return self._depjars
def depzips(self) -> Sequence[str]:
return self._depzips

@property
def bchsummariesjarfilename(self) -> str:
return self._bchsummariesjarfilename
def bchsummarieszipfilename(self) -> str:
return self._bchsummarieszipfilename

@property
def bchsummaries(self) -> SummaryCollection:
if self._bchsummaries is None:
self._bchsummaries = SummaryCollection(
self, self.bchsummariesjarfilename)
self, self.bchsummarieszipfilename)
return self._bchsummaries

@property
def dependencies(self) -> Sequence[SummaryCollection]:
if len(self._dependencies) == 0:
self._dependencies = [SummaryCollection(self, j) for j in self.depjars]
self._dependencies = [SummaryCollection(self, j) for j in self.depzips]
return self._dependencies

@property
def stats(self) -> str:
lines: List[str] = []
dlls = self.dlls()
for jar in dlls:
lines.append(jar.ljust(20) + str(len(dlls[jar])) + " dlls")
for zip_f in dlls:
lines.append(zip_f.ljust(20) + str(len(dlls[zip_f])) + " dlls")
return "\n".join(lines)

def dlls(self) -> Mapping[str, Sequence[str]]:
"""Return a mapping from jarfilename to list of function names."""
"""Return a mapping from zipfilename to list of function names."""

if len(self._dlls) == 0:
self._dlls["bchsummaries"] = self.bchsummaries.dlls
for d in self.dependencies:
self._dlls[d.jarfilename] = d.dlls
self._dlls[d.zipfilename] = d.dlls
return self._dlls

def has_dll_function_summary(self, dll: str, fname: str) -> bool:
Expand Down Expand Up @@ -140,15 +140,15 @@ def so_function_summary(self, fname: str) -> FunctionSummary:
return self.bchsummaries.so_function_summary(fname)

def all_so_function_summaries(self) -> Mapping[str, Sequence[FunctionSummary]]:
"""Return a mapping from jarfilename to list of function summaries."""
"""Return a mapping from zipfilename to list of function summaries."""

if len(self._sofunctionsummaries) == 0:
sosummaries = self.bchsummaries.all_so_function_summaries()
self._sofunctionsummaries["bchsummaries"] = sosummaries
for d in self.dependencies:
if d.has_so_functions():
self._sofunctionsummaries[
d.jarfilename] = d.all_so_function_summaries()
d.zipfilename] = d.all_so_function_summaries()
return self._sofunctionsummaries

def enum_definitions(self) -> Mapping[str, DllEnumDefinitions]:
Expand Down
20 changes: 10 additions & 10 deletions chb/models/SummaryCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@


class SummaryCollection:
"""Represents all summary entities in a single jar file."""
"""Represents all summary entities in a single zip file."""

def __init__(
self,
models: "ModelsAccess",
jarfilename: str) -> None:
zipfilename: str) -> None:
self._models = models
self._jarfilename = jarfilename
self._jarfile = zipfile.ZipFile(self.jarfilename, "r")
self._zipfilename = zipfilename
self._zipfile = zipfile.ZipFile(self.zipfilename, "r")
self._filenames: List[str] = []
self._directorynames: List[str] = []
self._dlls: List[str] = []
Expand All @@ -73,17 +73,17 @@ def models(self) -> "ModelsAccess":
return self._models

@property
def jarfile(self) -> zipfile.ZipFile:
return self._jarfile
def zipfile(self) -> zipfile.ZipFile:
return self._zipfile

@property
def jarfilename(self) -> str:
return self._jarfilename
def zipfilename(self) -> str:
return self._zipfilename

@property
def filenames(self) -> List[str]:
if len(self._filenames) == 0:
for info in self.jarfile.infolist():
for info in self.zipfile.infolist():
self._filenames.append(info.filename)
return self._filenames

Expand Down Expand Up @@ -336,7 +336,7 @@ def retrieve_ref_jni_function_summary_xnode(
raise UF.CHBError("Retrieval of jni references not implemented yet")

def _get_summary_xnode(self, filename: str, tag: str) -> ET.Element:
zfile = self.jarfile.read(filename).decode('utf-8')
zfile = self.zipfile.read(filename).decode('utf-8')
try:
xnode = ET.fromstring(str(zfile)).find(tag)
except ET.ParseError as e:
Expand Down
5 changes: 3 additions & 2 deletions chb/util/fileutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
x_global_state.xml
x_global_locations.xml
x_system_info.xml
x_functions.jar
x_functions.zip
x_asm.log
x_orphan.log
x_bdict.log
Expand Down Expand Up @@ -637,8 +637,9 @@ def get_interface_dictionary_xnode(path: str, xfile: str) -> ET.Element:
return get_chb_xnode(filename, "interface-dictionary")


def get_functionsjar_filename(path: str, xfile: str) -> str:
def get_functionszip_filename(path: str, xfile: str) -> str:
fdir = get_analysis_dir(path, xfile)
# For now we keep the .jar extension until we update the ocaml analyzer
return get_chb_filename(fdir, xfile, "functions.jar")


Expand Down