Skip to content

Commit bb4f333

Browse files
authored
Merge pull request #163 from netomi/use-asyncio
Use async io where possible to improve runtime performance
2 parents f345f5b + 79d5d14 commit bb4f333

32 files changed

+4289
-28851
lines changed

requirements-dev.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,5 @@ tomli==1.2.3
3838
tqdm==4.64.0
3939
twine==3.8.0
4040
typed-ast==1.5.4
41-
webencodings==0.5.1
41+
webencodings==0.5.1
42+
pytest-asyncio==0.21.1

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ idna==3.3
1010
importlib-metadata==4.12.0
1111
intbitset==3.1.0
1212
packageurl-python==0.10.0
13-
packaging==21.3
13+
packaging==24.2
1414
packvers==21.5
1515
pip-requirements-parser==32.0.1
1616
pkginfo2==30.0.0
@@ -24,3 +24,5 @@ text-unidecode==1.3
2424
toml==0.10.2
2525
urllib3==1.26.11
2626
zipp==3.8.1
27+
aiohttp==3.11.14
28+
aiofiles==23.2.1

setup.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ install_requires =
6969
toml >= 0.10.0
7070
mock >= 3.0.5
7171
packvers >= 21.5
72+
aiohttp >= 3.8
73+
aiofiles >= 23.1
74+
7275
[options.packages.find]
7376
where = src
7477

@@ -86,6 +89,7 @@ testing =
8689
black
8790
isort
8891
pytest-rerunfailures
92+
pytest-asyncio >= 0.21
8993

9094
docs =
9195
Sphinx>=5.0.2

src/python_inspector/api.py

Lines changed: 94 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
# See https://aboutcode-orgnexB/python-inspector for support or download.
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
11-
11+
import asyncio
1212
import os
1313
from netrc import netrc
1414
from typing import Dict
1515
from typing import List
1616
from typing import NamedTuple
1717
from typing import Sequence
18+
from typing import Tuple
1819

1920
from packageurl import PackageURL
2021
from packvers.requirements import Requirement
@@ -26,7 +27,7 @@
2627
from _packagedcode.pypi import PipRequirementsFileHandler
2728
from _packagedcode.pypi import PythonSetupPyHandler
2829
from _packagedcode.pypi import can_process_dependent_package
29-
from python_inspector import DEFAULT_PYTHON_VERSION
30+
from _packagedcode.pypi import get_resolved_purl
3031
from python_inspector import dependencies
3132
from python_inspector import utils
3233
from python_inspector import utils_pypi
@@ -39,6 +40,7 @@
3940
from python_inspector.resolution import get_python_version_from_env_tag
4041
from python_inspector.resolution import get_reqs_insecurely
4142
from python_inspector.resolution import get_requirements_from_python_manifest
43+
from python_inspector.utils import Candidate
4244
from python_inspector.utils_pypi import PLATFORMS_BY_OS
4345
from python_inspector.utils_pypi import PYPI_SIMPLE_URL
4446
from python_inspector.utils_pypi import Environment
@@ -54,7 +56,7 @@ class Resolution(NamedTuple):
5456
``files`` is a parsed list of input file data.
5557
"""
5658

57-
resolution: Dict
59+
resolution: List[Dict]
5860
packages: List[PackageData]
5961
files: List[Dict]
6062

@@ -286,21 +288,27 @@ def resolve_dependencies(
286288
pdt_output=pdt_output,
287289
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
288290
ignore_errors=ignore_errors,
291+
verbose=verbose,
292+
printer=printer,
289293
)
290294

291-
packages = []
295+
async def gather_pypi_data():
296+
async def get_pypi_data(package):
297+
data = await get_pypi_data_from_purl(
298+
package, repos=repos, environment=environment, prefer_source=prefer_source
299+
)
292300

293-
for package in purls:
294-
packages.extend(
295-
[
296-
pkg.to_dict()
297-
for pkg in list(
298-
get_pypi_data_from_purl(
299-
package, repos=repos, environment=environment, prefer_source=prefer_source
300-
)
301-
)
302-
],
303-
)
301+
if verbose:
302+
printer(f" retrieved package '{package}'")
303+
304+
return data
305+
306+
if verbose:
307+
printer(f"retrieve package data from pypi:")
308+
309+
return await asyncio.gather(*[get_pypi_data(package) for package in purls])
310+
311+
packages = [pkg.to_dict() for pkg in asyncio.run(gather_pypi_data()) if pkg is not None]
304312

305313
if verbose:
306314
printer("done!")
@@ -316,14 +324,16 @@ def resolve_dependencies(
316324

317325

318326
def resolve(
319-
direct_dependencies,
320-
environment,
321-
repos=tuple(),
322-
as_tree=False,
323-
max_rounds=200000,
324-
pdt_output=False,
325-
analyze_setup_py_insecurely=False,
326-
ignore_errors=False,
327+
direct_dependencies: List[DependentPackage],
328+
environment: Environment,
329+
repos: Sequence[utils_pypi.PypiSimpleRepository] = tuple(),
330+
as_tree: bool = False,
331+
max_rounds: int = 200000,
332+
pdt_output: bool = False,
333+
analyze_setup_py_insecurely: bool = False,
334+
ignore_errors: bool = False,
335+
verbose: bool = False,
336+
printer=print,
327337
):
328338
"""
329339
Resolve dependencies given a ``direct_dependencies`` list of
@@ -350,6 +360,8 @@ def resolve(
350360
pdt_output=pdt_output,
351361
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
352362
ignore_errors=ignore_errors,
363+
verbose=verbose,
364+
printer=printer,
353365
)
354366

355367
return resolved_dependencies, packages
@@ -364,32 +376,77 @@ def get_resolved_dependencies(
364376
pdt_output: bool = False,
365377
analyze_setup_py_insecurely: bool = False,
366378
ignore_errors: bool = False,
367-
):
379+
verbose: bool = False,
380+
printer=print,
381+
) -> Tuple[List[Dict], List[str]]:
368382
"""
369383
Return resolved dependencies of a ``requirements`` list of Requirement for
370-
an ``enviroment`` Environment. The resolved dependencies are formatted as
384+
an ``environment`` Environment. The resolved dependencies are formatted as
371385
parent/children or a nested tree if ``as_tree`` is True.
372386
373387
Used the provided ``repos`` list of PypiSimpleRepository.
374-
If empty, use instead the PyPI.org JSON API exclusively instead
388+
If empty, use instead the PyPI.org JSON API exclusively instead.
375389
"""
390+
provider = PythonInputProvider(
391+
environment=environment,
392+
repos=repos,
393+
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
394+
ignore_errors=ignore_errors,
395+
)
396+
397+
# gather version data for all requirements concurrently in advance.
398+
399+
async def gather_version_data():
400+
async def get_version_data(name: str):
401+
versions = await provider.fill_versions_for_package(name)
402+
403+
if verbose:
404+
printer(f" retrieved versions for package '{name}'")
405+
406+
return versions
407+
408+
if verbose:
409+
printer(f"versions:")
410+
411+
return await asyncio.gather(
412+
*[get_version_data(requirement.name) for requirement in requirements]
413+
)
414+
415+
asyncio.run(gather_version_data())
416+
417+
# gather dependencies for all pinned requirements concurrently in advance.
418+
419+
async def gather_dependencies():
420+
async def get_dependencies(requirement: Requirement):
421+
purl = PackageURL(type="pypi", name=requirement.name)
422+
resolved_purl = get_resolved_purl(purl=purl, specifiers=requirement.specifier)
423+
424+
if resolved_purl:
425+
purl = resolved_purl.purl
426+
candidate = Candidate(requirement.name, purl.version, requirement.extras)
427+
await provider.fill_requirements_for_package(purl, candidate)
428+
429+
if verbose:
430+
printer(f" retrieved dependencies for requirement '{str(purl)}'")
431+
432+
if verbose:
433+
printer(f"dependencies:")
434+
435+
return await asyncio.gather(
436+
*[get_dependencies(requirement) for requirement in requirements]
437+
)
438+
439+
asyncio.run(gather_dependencies())
440+
376441
resolver = Resolver(
377-
provider=PythonInputProvider(
378-
environment=environment,
379-
repos=repos,
380-
analyze_setup_py_insecurely=analyze_setup_py_insecurely,
381-
ignore_errors=ignore_errors,
382-
),
442+
provider=provider,
383443
reporter=BaseReporter(),
384444
)
385445
resolver_results = resolver.resolve(requirements=requirements, max_rounds=max_rounds)
386446
package_list = get_package_list(results=resolver_results)
387447
if pdt_output:
388-
return (format_pdt_tree(resolver_results), package_list)
389-
return (
390-
format_resolution(resolver_results, as_tree=as_tree),
391-
package_list,
392-
)
448+
return format_pdt_tree(resolver_results), package_list
449+
return format_resolution(resolver_results, as_tree=as_tree), package_list
393450

394451

395452
def get_requirements_from_direct_dependencies(

src/python_inspector/dependencies.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,29 @@
88
# See https://github.com/nexB/skeleton for support or download.
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
11+
from typing import Iterable
12+
from typing import Mapping
1113

1214
from packageurl import PackageURL
1315
from packvers.requirements import Requirement
14-
from pip_requirements_parser import InstallRequirement
1516

1617
from _packagedcode import models
18+
from _packagedcode.models import DependentPackage
1719
from _packagedcode.pypi import PipRequirementsFileHandler
1820
from _packagedcode.pypi import get_requirements_txt_dependencies
1921

2022
"""
21-
Utilities to resolve dependencies .
23+
Utilities to resolve dependencies.
2224
"""
2325

2426
TRACE = False
2527

2628

27-
def get_dependencies_from_requirements(requirements_file="requirements.txt"):
29+
def get_dependencies_from_requirements(
30+
requirements_file="requirements.txt",
31+
) -> Iterable[DependentPackage]:
2832
"""
29-
Yield DependentPackage for each requirement in a `requirement`
30-
file.
33+
Yield DependentPackage for each requirement in a `requirement` file.
3134
"""
3235
dependent_packages, _ = get_requirements_txt_dependencies(
3336
location=requirements_file, include_nested=True
@@ -41,21 +44,20 @@ def get_dependencies_from_requirements(requirements_file="requirements.txt"):
4144
yield dependent_package
4245

4346

44-
def get_extra_data_from_requirements(requirements_file="requirements.txt"):
47+
def get_extra_data_from_requirements(requirements_file="requirements.txt") -> Iterable[Mapping]:
4548
"""
46-
Yield extra_data for each requirement in a `requirement`
47-
file.
49+
Yield extra_data for each requirement in a `requirement` file.
4850
"""
4951
for package_data in PipRequirementsFileHandler.parse(location=requirements_file):
5052
yield package_data.extra_data
5153

5254

53-
def is_requirement_pinned(requirement: Requirement):
55+
def is_requirement_pinned(requirement: Requirement) -> bool:
5456
specifiers = requirement.specifier
5557
return specifiers and len(specifiers) == 1 and next(iter(specifiers)).operator in {"==", "==="}
5658

5759

58-
def get_dependency(specifier):
60+
def get_dependency(specifier) -> DependentPackage:
5961
"""
6062
Return a DependentPackage given a requirement ``specifier`` string.
6163

0 commit comments

Comments
 (0)