Skip to content

Commit b79dbab

Browse files
authored
Analyze directory tree and archives (#6)
Add APIs to analyze a directory tree or an archive (zip, tar). By default, all regular files with executable bit are analyzed except for files that have a `.py`, `.sh`, or docs extension (md, rst, txt). In addition, files with a pattern like `lib*.so`, `lib*.so.1.2.3`, and `ld-*.so` are analyzed, too. Signed-off-by: Christian Heimes <[email protected]>
1 parent 5177b43 commit b79dbab

File tree

9 files changed

+303
-58
lines changed

9 files changed

+303
-58
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,10 @@ Complete!
7373
* exception `elfdeps.ELFError`
7474
* dataclass `elfdeps.ELFInfo`
7575
* dataclass `elfdeps.SOInfo`
76+
* `elfdeps.analyze_dirtree(dirname, settings=None) -> Generator[ELFInfo, None, None]`
7677
* `elfdeps.analyze_elffile(elffile, *, filename, is_exec, settings=None) -> ELFInfo`
7778
* `elfdeps.analyze_file(filename, *, settings=None) -> ELFInfo`
79+
* `elfdeps.analyze_tarfile(tfile, *, settings=None) -> Generator[ELFInfo, None, None]`
7880
* `elfdeps.analyze_tarmember(tfile, tarinfo, *, settings=None) -> ELFInfo`
81+
* `elfdeps.analyze_zipfile(zfile, *, settings=None) -> Generator[ELFInfo, None, None]`
7982
* `elfdeps.analyze_zipmember(zfile, zipinfo, *, settings=None) -> ELFInfo`

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ source = [
5959
"src/elfdeps",
6060
".tox/py*/**/site-packages/elfdeps",
6161
]
62-
tests =[
62+
tests = [
6363
"tests/",
6464
]
6565

src/elfdeps/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,22 @@
55
"ELFError",
66
"ELFInfo",
77
"SOInfo",
8+
"analyze_dirtree",
89
"analyze_elffile",
910
"analyze_file",
11+
"analyze_tarfile",
1012
"analyze_tarmember",
13+
"analyze_zipfile",
1114
"analyze_zipmember",
1215
)
1316

1417
from elftools.common.exceptions import ELFError
1518

1619
from ._archives import (
20+
analyze_dirtree,
21+
analyze_tarfile,
1722
analyze_tarmember,
23+
analyze_zipfile,
1824
analyze_zipmember,
1925
)
2026
from ._elfdeps import (

src/elfdeps/__main__.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import argparse
4+
import logging
45
import pathlib
56
import pprint
7+
import stat
68
import tarfile
7-
import typing
89
import zipfile
910

1011
from . import _archives, _elfdeps
@@ -14,6 +15,13 @@
1415

1516
parser = argparse.ArgumentParser("elfdeps")
1617
parser.add_argument("filename", type=pathlib.Path)
18+
parser.add_argument(
19+
"-d",
20+
"--debug",
21+
action="store_true",
22+
dest="debug",
23+
help="debug logging",
24+
)
1725
parser.add_argument(
1826
"-P",
1927
"--provides",
@@ -61,24 +69,6 @@
6169
)
6270

6371

64-
def zip_requires(
65-
filename: pathlib.Path, settings: _elfdeps.ELFAnalyzeSettings
66-
) -> typing.Iterable[_elfdeps.ELFInfo]:
67-
with zipfile.ZipFile(filename) as zf:
68-
for zipinfo in zf.infolist():
69-
if zipinfo.filename.endswith(".so"):
70-
yield _archives.analyze_zipmember(zf, zipinfo, settings=settings)
71-
72-
73-
def tar_requires(
74-
filename: pathlib.Path, settings: _elfdeps.ELFAnalyzeSettings
75-
) -> typing.Iterable[_elfdeps.ELFInfo]:
76-
with tarfile.TarFile.open(filename, mode="r:*") as tf:
77-
for tarinfo in tf.getmembers():
78-
if tarinfo.name.endswith(".so"):
79-
yield _archives.analyze_tarmember(tf, tarinfo, settings=settings)
80-
81-
8272
def main(argv: list[str] | None = None) -> None:
8373
args = parser.parse_args(argv)
8474
settings = _elfdeps.ELFAnalyzeSettings(
@@ -88,13 +78,20 @@ def main(argv: list[str] | None = None) -> None:
8878
require_interp=args.require_interp,
8979
unique=args.unique,
9080
)
81+
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
9182
filename: pathlib.Path = args.filename
83+
st = filename.stat()
9284
if filename.name.endswith(ZIPEXT):
93-
infos = list(zip_requires(filename, settings=settings))
85+
with zipfile.ZipFile(filename, mode="r") as zfile:
86+
infos = list(_archives.analyze_zipfile(zfile=zfile, settings=settings))
9487
elif filename.name.endswith(TAREXT):
95-
infos = list(tar_requires(filename, settings=settings))
88+
with tarfile.TarFile.open(filename, mode="r:*") as tfile:
89+
infos = list(_archives.analyze_tarfile(tfile=tfile, settings=settings))
90+
elif stat.S_ISDIR(st.st_mode):
91+
infos = list(_archives.analyze_dirtree(filename, settings=settings))
9692
else:
9793
infos = [_elfdeps.analyze_file(filename, settings=settings)]
94+
9895
if args.provides:
9996
provides = set()
10097
for info in infos:
@@ -108,7 +105,7 @@ def main(argv: list[str] | None = None) -> None:
108105
for r in sorted(requires):
109106
print(r)
110107
else:
111-
for info in infos:
108+
for info in sorted(infos):
112109
pprint.pprint(info)
113110

114111

src/elfdeps/_archives.py

Lines changed: 139 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,35 @@
11
# SPDX-License-Identifier: Apache-2.0
22
"""Analyze archive members"""
33

4+
import logging
5+
import os
46
import pathlib
57
import stat
68
import tarfile
79
import typing
810
import zipfile
911

12+
from elftools.common.exceptions import ELFError
1013
from elftools.elf.elffile import ELFFile
1114

1215
from ._elfdeps import ELFAnalyzeSettings, ELFInfo, analyze_elffile
16+
from ._fileinfo import is_executable_file
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
def _zipinfo_mode(zipinfo: zipfile.ZipInfo) -> int:
22+
"""Full mode for zipinfo object"""
23+
# mode may not contain reg file info
24+
mode = zipinfo.external_attr >> 16
25+
if stat.S_IFMT(mode) == 0:
26+
lo = zipinfo.external_attr & 0xFFFF
27+
if lo & 0x10:
28+
# MS-DOS directory
29+
mode |= stat.S_IFDIR
30+
else:
31+
mode |= stat.S_IFREG
32+
return mode
1333

1434

1535
def analyze_zipmember(
@@ -19,8 +39,8 @@ def analyze_zipmember(
1939
settings: ELFAnalyzeSettings | None = None,
2040
) -> ELFInfo:
2141
"""Analyze a zipfile member"""
22-
mode = zipinfo.external_attr >> 16
23-
is_exec = bool(mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))
42+
mode = _zipinfo_mode(zipinfo)
43+
is_exec = is_executable_file(mode)
2444
filename = pathlib.Path(zipinfo.filename)
2545
with zfile.open(zipinfo, mode="r") as f:
2646
elffile = ELFFile(f)
@@ -29,15 +49,51 @@ def analyze_zipmember(
2949
)
3050

3151

52+
def analyze_zipfile(
53+
zfile: zipfile.ZipFile, *, settings: ELFAnalyzeSettings | None = None
54+
) -> typing.Generator[ELFInfo, None, None]:
55+
"""Analyze a zip file"""
56+
if settings is None:
57+
settings = ELFAnalyzeSettings()
58+
for zipinfo in zfile.infolist():
59+
filename = pathlib.Path(zipinfo.filename)
60+
mode = _zipinfo_mode(zipinfo)
61+
if settings.is_candidate(filename, mode):
62+
try:
63+
yield analyze_zipmember(zfile, zipinfo, settings=settings)
64+
except ELFError as err:
65+
# not an ELF file (e.g. a script or linker script)
66+
logger.debug("%s is not a ELF file: %s", filename, err)
67+
68+
69+
def _tarinfo_mode(tarinfo: tarfile.TarInfo) -> int:
70+
"""Full mode for tarinfo"""
71+
# tarinfo.mode contains only permission bits
72+
mode = tarinfo.mode
73+
if tarinfo.isreg():
74+
mode |= stat.S_IFREG
75+
elif tarinfo.isdir():
76+
mode |= stat.S_IFDIR
77+
elif tarinfo.issym():
78+
mode |= stat.S_IFLNK
79+
elif tarinfo.isblk():
80+
mode |= stat.S_IFBLK
81+
elif tarinfo.ischr():
82+
mode |= stat.S_IFCHR
83+
elif tarinfo.isfifo():
84+
mode |= stat.S_IFIFO
85+
return mode
86+
87+
3288
def analyze_tarmember(
3389
tfile: tarfile.TarFile,
3490
tarinfo: tarfile.TarInfo,
3591
*,
3692
settings: ELFAnalyzeSettings | None = None,
3793
) -> ELFInfo:
3894
"""Analze a tarfile member"""
39-
mode = tarinfo.mode
40-
is_exec = bool(mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))
95+
mode = _tarinfo_mode(tarinfo)
96+
is_exec = is_executable_file(mode)
4197
filename = pathlib.Path(tarinfo.name)
4298
f = tfile.extractfile(tarinfo)
4399
if typing.TYPE_CHECKING:
@@ -47,3 +103,82 @@ def analyze_tarmember(
47103
return analyze_elffile(
48104
elffile, filename=filename, is_exec=is_exec, settings=settings
49105
)
106+
107+
108+
def analyze_tarfile(
109+
tfile: tarfile.TarFile, *, settings: ELFAnalyzeSettings | None = None
110+
) -> typing.Generator[ELFInfo, None, None]:
111+
"""Analyze a tar ball"""
112+
if settings is None:
113+
settings = ELFAnalyzeSettings()
114+
for tarinfo in tfile:
115+
filename = pathlib.Path(tarinfo.name)
116+
mode = _tarinfo_mode(tarinfo)
117+
if settings.is_candidate(filename, mode):
118+
try:
119+
yield analyze_tarmember(tfile, tarinfo, settings=settings)
120+
except ELFError as err:
121+
# not an ELF file (e.g. a script or linker script)
122+
logger.debug("%s is not a ELF file: %s", filename, err)
123+
124+
125+
OnError = typing.Callable[[pathlib.Path, OSError | ELFError], None] | None
126+
127+
128+
def _scanwalk(
129+
dirname: pathlib.Path, onerror: OnError = None
130+
) -> typing.Generator[os.DirEntry, None, None]:
131+
"""Recursive scandir"""
132+
try:
133+
it = os.scandir(dirname)
134+
except OSError as err:
135+
if onerror is not None:
136+
onerror(dirname, err)
137+
return
138+
139+
with it:
140+
while True:
141+
try:
142+
entry = next(it)
143+
except StopIteration:
144+
break
145+
except OSError as err:
146+
if onerror is not None:
147+
onerror(dirname, err)
148+
return
149+
try:
150+
is_dir = entry.is_dir(follow_symlinks=False)
151+
except OSError:
152+
is_dir = False
153+
if is_dir:
154+
yield from _scanwalk(pathlib.Path(entry.path), onerror=onerror)
155+
else:
156+
yield entry
157+
158+
159+
def analyze_dirtree(
160+
dirname: pathlib.Path,
161+
*,
162+
settings: ELFAnalyzeSettings | None = None,
163+
onerror: OnError = None,
164+
) -> typing.Generator[ELFInfo, None, None]:
165+
"""Recursively analyze dirctory tree"""
166+
if settings is None:
167+
settings = ELFAnalyzeSettings()
168+
for entry in _scanwalk(dirname):
169+
filename = pathlib.Path(entry.path)
170+
try:
171+
mode = entry.stat(follow_symlinks=False).st_mode
172+
if settings.is_candidate(filename, mode):
173+
with filename.open("rb") as f:
174+
elffile = ELFFile(f)
175+
yield analyze_elffile(
176+
elffile,
177+
filename=filename,
178+
is_exec=is_executable_file(mode),
179+
settings=settings,
180+
)
181+
except (OSError, ELFError) as err:
182+
logger.debug("%s is not a ELF file or is not accessible: %s", filename, err)
183+
if onerror is not None:
184+
onerror(filename, err)

0 commit comments

Comments
 (0)