1
1
from pathlib import Path
2
- from typing import List
3
2
import sqlite3
4
3
import hashlib
5
4
from datetime import datetime
6
5
from dataclasses import dataclass
6
+ from typing import Union
7
7
import ctypes
8
8
9
9
from bindiff .types import FunctionAlgorithm , BasicBlockAlgorithm
12
12
@dataclass
13
13
class File :
14
14
"""
15
- Represent files parsed
15
+ File diffed in database.
16
16
"""
17
-
18
- id : int
19
- filename : str
20
- exefilename : str
21
- hash : str
22
- functions : int
23
- libfunctions : int
24
- calls : int
25
- basicblocks : int
26
- libbasicblocks : int
27
- edges : int
28
- libedges : int
29
- instructions : int
30
- libinstructions : int
17
+ id : int #: Unique ID of the file in database
18
+ filename : str #: file path
19
+ exefilename : str #: file name
20
+ hash : str #: SHA256 hash of the file
21
+ functions : int #: total number of functions
22
+ libfunctions : int #: total number of functions identified as library
23
+ calls : int #: number of calls
24
+ basicblocks : int #: number of basic blocks
25
+ libbasicblocks : int #: number of basic blocks belonging to library functions
26
+ edges : int #: number of edges in callgraph
27
+ libedges : int #: number of edges in callgraph addressing a library
28
+ instructions : int #: number of instructions
29
+ libinstructions : int #: number of instructions in library functions
31
30
32
31
33
32
@dataclass
34
33
class FunctionMatch :
35
34
"""
36
- Class holding a match between two function .
35
+ A match between two functions in database .
37
36
"""
38
-
39
- id : int
40
- address1 : int
41
- address2 : int
42
- similarity : float
43
- confidence : float
44
- algorithm : FunctionAlgorithm
37
+ id : int #: unique ID of function match in database
38
+ address1 : int #: function address in primary
39
+ address2 : int #: function address in secondary
40
+ similarity : float #: similarity score (0..1)
41
+ confidence : float #: confidence of the match (0..1)
42
+ algorithm : FunctionAlgorithm #: algorithm used for the match
45
43
46
44
47
45
@dataclass
48
46
class BasicBlockMatch :
49
47
"""
50
- Class holding a match between two basic blocks
48
+ A match between two basic blocks
51
49
"""
52
-
53
- id : int
54
- function_match : FunctionMatch
55
- address1 : int
56
- address2 : int
57
- algorithm : BasicBlockAlgorithm
50
+ id : int #: ID of the match in database
51
+ function_match : FunctionMatch #: FunctionMatch associated with this match
52
+ address1 : int #: basic block address in primary
53
+ address2 : int #: basic block address in secondary
54
+ algorithm : BasicBlockAlgorithm #: algorithm used to match the basic blocks
58
55
59
56
60
57
class BindiffFile (object ):
61
- def __init__ (self , file : Path | str , permission : str = "ro" ):
58
+ """
59
+ Bindiff database file.
60
+ The class seemlessly parse the database and allowing retrieving
61
+ and manipulating the results.
62
+
63
+ It also provides some methods to create a database and to add entries
64
+ in the database.
65
+ """
66
+ def __init__ (self , file : Union [Path , str ], permission : str = "ro" ):
67
+ """
68
+ :param file: path to Bindiff database
69
+ :param permission: permission to use for opening database (default: ro)
70
+ """
62
71
self ._file = file
63
72
64
73
# Open database
65
- self .db = sqlite3 .connect (f"file:{ file } ?mode={ permission } " , uri = True )
74
+ self .db = sqlite3 .connect (f"file:{ str ( file ) } ?mode={ permission } " , uri = True )
66
75
67
76
# Global variables
68
- self .similarity = None
69
- self .confidence = None
70
- self .version = None
71
- self .created = None
72
- self .modified = None
77
+ self .similarity : float = None #: Overall similarity
78
+ self .confidence : float = None #: Overall diffing confidence
79
+ self .version : str = None #: version of the differ used for diffing
80
+ self .created : datetime = None #: Database creation date
81
+ self .modified : datetime = None #: Database last modification date
73
82
self ._load_metadata (self .db .cursor ())
74
83
75
84
# Files
76
- self .primary = None
77
- self .secondary = None
85
+ self .primary : File = None #: Primary file
86
+ self .secondary : File = None #: Secondary file
78
87
self ._load_file (self .db .cursor ())
79
88
80
89
# Function matches
81
- self .primary_functions_match = {}
82
- self .secondary_functions_match = {}
90
+ self .primary_functions_match : dict [ int , FunctionMatch ] = {} #: FunctionMatch indexed by addresses in primary
91
+ self .secondary_functions_match : dict [ int , FunctionMatch ] = {} #: FunctionMatch indexed by addresses in secondary
83
92
self ._load_function_match (self .db .cursor ())
84
93
85
94
# Basicblock matches
86
- self .primary_basicblock_match = {}
87
- self .secondary_basicblock_match = {}
95
+ self .primary_basicblock_match : dict [ int , dict [ int , BasicBlockMatch ]] = {} #: Basic block match from primary
96
+ self .secondary_basicblock_match : dict [ int , dict [ int , BasicBlockMatch ]] = {} #: Basic block match from secondary
88
97
self ._load_basicblock_match (self .db .cursor ())
89
98
90
99
# Instruction matches
@@ -97,42 +106,35 @@ def unmatched_primary_count(self) -> int:
97
106
"""
98
107
Returns the number of functions inside primary that are not matched
99
108
"""
100
-
101
109
return self .primary .functions + self .primary .libfunctions - len (self .primary_functions_match )
102
110
103
111
@property
104
112
def unmatched_secondary_count (self ) -> int :
105
113
"""
106
114
Returns the number of functions inside secondary that are not matched
107
115
"""
108
-
109
116
return self .secondary .functions + self .secondary .libfunctions - len (self .primary_functions_match )
110
117
111
118
@property
112
- def function_matches (self ) -> List [FunctionMatch ]:
119
+ def function_matches (self ) -> list [FunctionMatch ]:
113
120
"""
114
121
Returns the list of matched functions
115
122
"""
116
-
117
123
return list (self .primary_functions_match .values ())
118
124
119
125
@property
120
- def basicblock_matches (self ) -> List [BasicBlockMatch ]:
126
+ def basicblock_matches (self ) -> list [BasicBlockMatch ]:
121
127
"""
122
128
Returns the list of matched basic blocks in primary (and secondary)
123
129
"""
124
-
125
130
return [x for bb_matches in self .primary_basicblock_match .values () for x in bb_matches .values ()]
126
- # return list(self.primary_basicblock_match.values())
127
131
128
132
def _load_file (self , cursor : sqlite3 .Cursor ) -> None :
129
133
"""
130
134
Load diffing file stored in a DB file
131
135
132
136
:param cursor: sqlite3 cursor to the DB
133
- :return: None
134
137
"""
135
-
136
138
query = "SELECT * FROM file"
137
139
self .primary = File (* cursor .execute (query ).fetchone ())
138
140
self .secondary = File (* cursor .execute (query ).fetchone ())
@@ -142,9 +144,7 @@ def _load_metadata(self, cursor: sqlite3.Cursor) -> None:
142
144
Load diffing metadata as stored in the DB file
143
145
144
146
:param cursor: sqlite3 cursor to the DB
145
- :return: None
146
147
"""
147
-
148
148
query = "SELECT created, modified, similarity, confidence FROM metadata"
149
149
self .created , self .modified , self .similarity , self .confidence = cursor .execute (query ).fetchone ()
150
150
self .created = datetime .strptime (self .created , "%Y-%m-%d %H:%M:%S" )
@@ -157,9 +157,7 @@ def _load_function_match(self, cursor: sqlite3.Cursor) -> None:
157
157
Load matched functions stored in a DB file
158
158
159
159
:param cursor: sqlite3 cursor to the DB
160
- :return: None
161
160
"""
162
-
163
161
i2u = lambda x : ctypes .c_ulonglong (x ).value
164
162
fun_query = "SELECT id, address1, address2, similarity, confidence, algorithm FROM function"
165
163
for id , addr1 , addr2 , sim , conf , alg in cursor .execute (fun_query ):
@@ -173,9 +171,7 @@ def _load_basicblock_match(self, cursor: sqlite3.Cursor) -> None:
173
171
Load matched basic blocks stored in a DB file
174
172
175
173
:param cursor: sqlite3 cursor to the DB
176
- :return: None
177
174
"""
178
-
179
175
mapping = {x .id : x for x in self .function_matches }
180
176
query = "SELECT id, functionid, address1, address2, algorithm FROM basicblock"
181
177
for id , fun_id , bb_addr1 , bb_addr2 , bb_algo in cursor .execute (query ):
@@ -199,9 +195,7 @@ def _load_instruction_match(self, cursor: sqlite3.Cursor) -> None:
199
195
Load matched instructions stored in a DB file
200
196
201
197
:param cursor: sqlite3 cursor to the DB
202
- :return: None
203
198
"""
204
-
205
199
i2u = lambda x : ctypes .c_ulonglong (x ).value
206
200
mapping = {x .id : x for x in self .basicblock_matches }
207
201
query = "SELECT basicblockid, address1, address2 FROM instruction"
0 commit comments