diff --git a/.bazelrc b/.bazelrc index 27e89faa97..3f16396659 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered test --test_output=errors diff --git a/BUILD.bazel b/BUILD.bazel index cd4cbc544a..c97f41dee2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -35,6 +35,7 @@ filegroup( "BUILD.bazel", "MODULE.bazel", "WORKSPACE", + "WORKSPACE.bzlmod", "internal_deps.bzl", "internal_setup.bzl", "version.bzl", diff --git a/CHANGELOG.md b/CHANGELOG.md index 142df6a208..dd12bd6b77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ A brief description of the categories of changes: ## Unreleased +[0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 + ### Changed ### Fixed @@ -32,8 +34,10 @@ A brief description of the categories of changes: * (gazelle) Added a new `python_default_visibility` directive to control the _default_ visibility of generated targets. See the [docs][python_default_visibility] for details. +* (bzlmod) New **experimental** `pypi_index` extension that can be used to + instruct the `pip.parse` tag class to use the bazel downloader to fetch + wheels. Note, the API is very unstable and may be changed at any time. -[0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 [python_default_visibility]: gazelle/README.md#directive-python_default_visibility ### Changed diff --git a/MODULE.bazel b/MODULE.bazel index 3ed92dbee6..98dc5cd702 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -4,7 +4,7 @@ module( compatibility_level = 1, ) -bazel_dep(name = "bazel_features", version = "1.1.1") +bazel_dep(name = "bazel_features", version = "1.9.0") bazel_dep(name = "bazel_skylib", version = "1.3.0") bazel_dep(name = "platforms", version = "0.0.4") @@ -12,7 +12,7 @@ bazel_dep(name = "platforms", version = "0.0.4") bazel_dep(name = "rules_proto", version = "5.3.0-21.7") bazel_dep(name = "protobuf", version = "21.7", repo_name = "com_google_protobuf") -internal_deps = use_extension("@rules_python//python/private/bzlmod:internal_deps.bzl", "internal_deps") +internal_deps = use_extension("//python/private/bzlmod:internal_deps.bzl", "internal_deps") internal_deps.install() use_repo( internal_deps, @@ -38,7 +38,7 @@ use_repo( # We need to do another use_extension call to expose the "pythons_hub" # repo. -python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python = use_extension("//python/extensions:python.bzl", "python") # The default toolchain to use if nobody configures a toolchain. # NOTE: This is not a stable version. It is provided for convenience, but will @@ -53,9 +53,36 @@ use_repo(python, "pythons_hub") # This call registers the Python toolchains. register_toolchains("@pythons_hub//:all") +# This call registers the `pypi_index` extension so that it can be used in the `pip` extension +pypi_index = use_extension("//python/extensions:pypi_index.bzl", "pypi_index") +use_repo(pypi_index, "pypi_index") + # ===== DEV ONLY DEPS AND SETUP BELOW HERE ===== bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc") bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True) +bazel_dep(name = "rules_testing", version = "0.5.0", dev_dependency = True) +bazel_dep(name = "rules_cc", version = "0.0.9", dev_dependency = True) + +# Extra gazelle deps +bazel_dep(name = "rules_go", version = "0.41.0", dev_dependency = True, repo_name = "io_bazel_rules_go") +bazel_dep(name = "gazelle", version = "0.33.0", dev_dependency = True, repo_name = "bazel_gazelle") + +# This call additionally only adds items to the `pypi_index` if we are +# not ignoring dev dependencies, making it no-op for the regular usage. +dev_pypi_index = use_extension( + "//python/extensions:pypi_index.bzl", + "pypi_index", + dev_dependency = True, +) +dev_pypi_index.add_requirements( + srcs = [ + # List all of the requirements files used by us + "//docs/sphinx:requirements.txt", + "//tools/publish:requirements_darwin.txt", + "//tools/publish:requirements.txt", + "//tools/publish:requirements_windows.txt", + ], +) dev_pip = use_extension( "//python/extensions:pip.bzl", @@ -77,6 +104,7 @@ dev_pip.parse( python_version = "3.11", requirements_lock = "//docs/sphinx:requirements.txt", ) +use_repo(dev_pip, "dev_pip") bazel_binaries = use_extension( "@rules_bazel_integration_test//:extensions.bzl", diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod new file mode 100644 index 0000000000..b2023607fd --- /dev/null +++ b/WORKSPACE.bzlmod @@ -0,0 +1,99 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains everything that is needed when using bzlmod +workspace(name = "rules_python") + +load("//python:repositories.bzl", "python_register_multi_toolchains") +load("//python:versions.bzl", "MINOR_MAPPING") + +python_register_multi_toolchains( + name = "python", + default_version = MINOR_MAPPING.values()[-2], + python_versions = MINOR_MAPPING.values(), +) + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") + +# Used for Bazel CI +http_archive( + name = "bazelci_rules", + sha256 = "eca21884e6f66a88c358e580fd67a6b148d30ab57b1680f62a96c00f9bc6a07e", + strip_prefix = "bazelci_rules-1.0.0", + url = "https://github.com/bazelbuild/continuous-integration/releases/download/rules-1.0.0/bazelci_rules-1.0.0.tar.gz", +) + +load("@bazelci_rules//:rbe_repo.bzl", "rbe_preconfig") + +# Creates a default toolchain config for RBE. +# Use this as is if you are using the rbe_ubuntu16_04 container, +# otherwise refer to RBE docs. +rbe_preconfig( + name = "buildkite_config", + toolchain = "ubuntu1804-bazel-java11", +) + +local_repository( + name = "rules_python_gazelle_plugin", + path = "gazelle", +) + +# The rules_python gazelle extension has some third-party go dependencies +# which we need to fetch in order to compile it. +load("@rules_python_gazelle_plugin//:deps.bzl", _py_gazelle_deps = "gazelle_deps") + +# See: https://github.com/bazelbuild/rules_python/blob/main/gazelle/README.md +# This rule loads and compiles various go dependencies that running gazelle +# for python requirements. +_py_gazelle_deps() + +# This interpreter is used for various rules_python dev-time tools +load("@python//3.11.8:defs.bzl", "interpreter") + +##################### +# Install twine for our own runfiles wheel publishing. +# Eventually we might want to install twine automatically for users too, see: +# https://github.com/bazelbuild/rules_python/issues/1016. +load("@rules_python//python:pip.bzl", "pip_parse") + +pip_parse( + name = "publish_deps", + python_interpreter_target = interpreter, + requirements_darwin = "//tools/publish:requirements_darwin.txt", + requirements_lock = "//tools/publish:requirements.txt", + requirements_windows = "//tools/publish:requirements_windows.txt", +) + +load("@publish_deps//:requirements.bzl", "install_deps") + +install_deps() + +##################### + +# This wheel is purely here to validate the wheel extraction code. It's not +# intended for anything else. +http_file( + name = "wheel_for_testing", + downloaded_file_path = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + sha256 = "0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2", + urls = [ + "https://files.pythonhosted.org/packages/50/67/3e966d99a07d60a21a21d7ec016e9e4c2642a86fea251ec68677daf71d4d/numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + ], +) + +# rules_proto expects //external:python_headers to point at the python headers. +bind( + name = "python_headers", + actual = "//python/cc:current_py_cc_headers", +) diff --git a/docs/sphinx/BUILD.bazel b/docs/sphinx/BUILD.bazel index 8912f2cfb6..76ba21ea03 100644 --- a/docs/sphinx/BUILD.bazel +++ b/docs/sphinx/BUILD.bazel @@ -91,6 +91,7 @@ sphinx_stardocs( } if IS_BAZEL_7_OR_HIGHER else {}) | ({ # This depends on @pythons_hub, which is only created under bzlmod, "api/extensions/pip.md": "//python/extensions:pip_bzl", + "api/extensions/pypi_index.md": "//python/extensions:pypi_index_bzl", } if IS_BAZEL_7_OR_HIGHER and BZLMOD_ENABLED else {}), footer = "_stardoc_footer.md", tags = ["docs"], diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel index ceb0010bd4..2b7a870d57 100644 --- a/examples/bzlmod/MODULE.bazel +++ b/examples/bzlmod/MODULE.bazel @@ -43,6 +43,30 @@ python.toolchain( # rules based on the `python_version` arg values. use_repo(python, "python_3_10", "python_3_9", "python_versions") +# This extension allows rules_python to optimize downloading for packages by checking +# for available artifacts on PyPI Simple API compatible mirrors. +pypi_index = use_extension("@rules_python//python/extensions:pypi_index.bzl", "pypi_index") +pypi_index.add_requirements( + srcs = [ + "//:requirements_lock_3_10.txt", + "//:requirements_lock_3_9.txt", + "//:requirements_windows_3_10.txt", + "//:requirements_windows_3_9.txt", + ], +) + +# We can also initialize the extension in dev mode. +dev_pypi_index = use_extension( + "@rules_python//python/extensions:pypi_index.bzl", + "pypi_index", + dev_dependency = True, +) +dev_pypi_index.add_requirements( + srcs = [ + "//tests/dupe_requirements:requirements.txt", + ], +) + # This extension allows a user to create modifications to how rules_python # creates different wheel repositories. Different attributes allow the user # to modify the BUILD file, and copy files. diff --git a/python/extensions/BUILD.bazel b/python/extensions/BUILD.bazel index a9dede44ec..b0dcae0f9c 100644 --- a/python/extensions/BUILD.bazel +++ b/python/extensions/BUILD.bazel @@ -31,6 +31,13 @@ bzl_library( deps = ["//python/private/bzlmod:pip_bzl"], ) +bzl_library( + name = "pypi_index_bzl", + srcs = ["pypi_index.bzl"], + visibility = ["//:__subpackages__"], + deps = ["//python/private/bzlmod:pypi_index_bzl"], +) + bzl_library( name = "python_bzl", srcs = ["python.bzl"], diff --git a/python/extensions/pypi_index.bzl b/python/extensions/pypi_index.bzl new file mode 100644 index 0000000000..f8a48d6a99 --- /dev/null +++ b/python/extensions/pypi_index.bzl @@ -0,0 +1,19 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""See the doc in the implementation file.""" + +load("//python/private/bzlmod:pypi_index.bzl", _pypi_index = "pypi_index") + +pypi_index = _pypi_index diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index ded7112144..a91311c822 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -765,18 +765,27 @@ def _whl_library_impl(rctx): # Manually construct the PYTHONPATH since we cannot use the toolchain here environment = _create_repository_execution_environment(rctx, python_interpreter) - repo_utils.execute_checked( - rctx, - op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement), - arguments = args, - environment = environment, - quiet = rctx.attr.quiet, - timeout = rctx.attr.timeout, - ) + if rctx.attr.whl_file: + whl_path = rctx.path(rctx.attr.whl_file) + if not whl_path.exists: + fail("The given whl '{}' does not exist".format(rctx.attr.whl_file)) + + # Simulate the behaviour where the whl is present in the current directory. + rctx.symlink(whl_path, whl_path.basename) + whl_path = rctx.path(whl_path.basename) + else: + repo_utils.execute_checked( + rctx, + op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement), + arguments = args, + environment = environment, + quiet = rctx.attr.quiet, + timeout = rctx.attr.timeout, + ) - whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"]) - if not rctx.delete("whl_file.json"): - fail("failed to delete the whl_file.json file") + whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"]) + if not rctx.delete("whl_file.json"): + fail("failed to delete the whl_file.json file") if rctx.attr.whl_patches: patches = {} @@ -910,6 +919,12 @@ whl_library_attrs = { mandatory = True, doc = "Python requirement string describing the package to make available", ), + "whl_file": attr.label( + doc = """\ +The wheel file label to be used for this installation. This will not use pip to download the +whl and instead use the supplied file. Note that the label needs to point to a single file. +""", + ), "whl_patches": attr.label_keyed_string_dict( doc = """a label-keyed-string dict that has json.encode(struct([whl_file], patch_strip]) as values. This diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index d3d6e76a35..b048a6ea83 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -119,6 +119,17 @@ bzl_library( srcs = ["parse_whl_name.bzl"], ) +bzl_library( + name = "pypi_index_bzl", + srcs = ["pypi_index.bzl"], + deps = [ + ":auth_bzl", + ":normalize_name_bzl", + ":text_util_bzl", + "//python/pip_install:requirements_parser_bzl", + ], +) + bzl_library( name = "py_cc_toolchain_bzl", srcs = [ diff --git a/python/private/auth.bzl b/python/private/auth.bzl index 39ada37cae..2b067fd088 100644 --- a/python/private/auth.bzl +++ b/python/private/auth.bzl @@ -33,10 +33,13 @@ def get_auth(rctx, urls): Returns: dict: A map of authentication parameters by URL. """ - if rctx.attr.netrc: - netrc = read_netrc(rctx, rctx.attr.netrc) + attr = getattr(rctx, "attr", None) + + if getattr(attr, "netrc", None): + netrc = read_netrc(rctx, getattr(attr, "netrc")) elif "NETRC" in rctx.os.environ: netrc = read_netrc(rctx, rctx.os.environ["NETRC"]) else: netrc = read_user_netrc(rctx) - return use_netrc(netrc, urls, rctx.attr.auth_patterns) + + return use_netrc(netrc, urls, getattr(attr, "auth_patterns", "")) diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index b636cca1a2..f5df9cebcb 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -32,6 +32,7 @@ bzl_library( ":pip_repository_bzl", "//python/pip_install:pip_repository_bzl", "//python/pip_install:requirements_parser_bzl", + "//python/private:pypi_index_bzl", "//python/private:full_version_bzl", "//python/private:normalize_name_bzl", "//python/private:parse_whl_name_bzl", @@ -57,6 +58,17 @@ bzl_library( ], ) +bzl_library( + name = "pypi_index_bzl", + srcs = ["pypi_index.bzl"], + deps = [ + ":bazel_features_bzl", + "//python/private:auth_bzl", + "//python/private:envsubst_bzl", + "//python/private:pypi_index_bzl", + ], +) + bzl_library( name = "python_bzl", srcs = ["python.bzl"], diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index a017089803..ff6ccdb081 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -27,6 +27,7 @@ load( load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources") load("//python/private:render_pkg_aliases.bzl", "whl_alias") load("//python/private:version_label.bzl", "version_label") load(":pip_repository.bzl", "pip_repository") @@ -101,6 +102,8 @@ You cannot use both the additive_build_content and additive_build_content_file a def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): python_interpreter_target = pip_attr.python_interpreter_target + pypi_index_repo = module_ctx.path(pip_attr._pypi_index_repo).dirname + # if we do not have the python_interpreter set in the attributes # we programmatically find it. hub_name = pip_attr.hub_name @@ -180,10 +183,38 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): group_name = whl_group_mapping.get(whl_name) group_deps = requirement_cycles.get(group_name, []) + pkg_pypi_index = pypi_index_repo.get_child(whl_name, "index.json") + if not pkg_pypi_index.exists: + # The index for a package does not exist, so not using bazel downloader... + whl_file = None + else: + srcs = get_simpleapi_sources(requirement_line) + + index_json = { + v.sha256: v + for v in [ + struct(**encoded) + for encoded in json.decode(module_ctx.read(pkg_pypi_index)) + ] + } + whls = [ + index_json[sha] + for sha in srcs.shas + if index_json[sha].filename.endswith(".whl") + ] + + # For now only use the bazel downloader only whl file is a + # cross-platform wheel. + if len(whls) == 1 and whls[0].filename.endswith("-any.whl"): + whl_file = whls[0].label + else: + whl_file = None + repo_name = "{}_{}".format(pip_name, whl_name) whl_library( name = repo_name, requirement = requirement_line, + whl_file = whl_file, repo = pip_name, repo_prefix = pip_name + "_", annotation = annotation, @@ -414,6 +445,19 @@ a corresponding `python.toolchain()` configured. doc = """\ A dict of labels to wheel names that is typically generated by the whl_modifications. The labels are JSON config files describing the modifications. +""", + ), + "_pypi_index_repo": attr.label( + default = "@pypi_index//:BUILD.bazel", + doc = """\ +The label to the root of the pypi_index repository to be used for this particular +call of the `pip.parse`. This ensures that we can work with isolated usage of the +pip.parse tag class, where the user may want to also have the `pypi_index` usage +isolated as well. + +This also makes the code cleaner and ensures there are no cyclic dependencies. + +NOTE: For now this is internal and will be exposed if needed. """, ), }, **pip_repository_attrs) diff --git a/python/private/bzlmod/pypi_index.bzl b/python/private/bzlmod/pypi_index.bzl new file mode 100644 index 0000000000..496ad171cc --- /dev/null +++ b/python/private/bzlmod/pypi_index.bzl @@ -0,0 +1,184 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +PyPI index reading extension. + +This allows us to translate the lock file to URLs and labels, that we can use to set up the +rest of the packages in the hub repos. This is created as a separate repository to allow +`pip.parse` to be used in an isolated mode. + +NOTE: for now the repos resulting from this extension are only supposed to be used in the +rules_python repository until this notice is removed. + +I want the usage to be: +```starlark +pypi_index = use_extension("@rules_python//python/extensions:pypi_index.bzl", "pypi_index") +pypi_index.from_requirements( + srcs = [ + "my_requirement", + ], +) +``` + +The main index URL can be overriden with an env var PIP_INDEX_URL by default. What is more, +the user should be able to specify specific package locations to be obtained from elsewhere. + +The most important thing to support would be to also support local wheel locations, where we +could read all of the wheels from a specific folder and construct the same repo. Like: +```starlark +pypi_index.from_dirs( + srcs = [ + "my_folder1", + "my_folder2", + ], +) +``` + +The implementation is left for a future PR. + +This can be later used by `pip` extension when constructing the `whl_library` hubs by passing +the right `whl_file` to the rule. + +This `pypi_index` extension provides labels for reading the `METADATA` from wheels and downloads +metadata only if the Simple API of the PyPI compatible mirror is exposing it. Otherwise, it +falls back to downloading the whl file and then extracting the `METADATA` file so that the users +of the artifacts created by the extension do not have to care about it being any different. +Whilst this may make the downloading of the whl METADATA somewhat slower, because it will be +in the repository cache, it may be a minor hit to the performance. + +The presence of this `METADATA` allows us to essentially get the full graph of the dependencies +within a `hub` repo and contract any dependency cycles in the future as is shown in the +`pypi_install` extension PR. + +Whilst this design has been crafted for `bzlmod`, we could in theory just port this back to +WORKSPACE without too many issues. + +If you do: +```console +$ bazel query @pypi_index//requests/... +@pypi_index//requests:requests-2.28.2-py3-none-any.whl +@pypi_index//requests:requests-2.28.2-py3-none-any.whl.METADATA +@pypi_index//requests:requests-2.28.2.tar.gz +@pypi_index//requests:requests-2.31.0-py3-none-any.whl +@pypi_index//requests:requests-2.31.0-py3-none-any.whl.METADATA +@pypi_index//requests:requests-2.31.0.tar.gz +``` +""" + +load("@bazel_features//:features.bzl", "bazel_features") +load("//python/private:auth.bzl", "get_auth") +load("//python/private:envsubst.bzl", "envsubst") +load( + "//python/private:pypi_index.bzl", + "create_spoke_repos", + "get_packages_from_requirements", + "pypi_index_hub", +) + +_PYPI_INDEX = "pypi_index" + +def _impl(module_ctx): + simpleapi_srcs = {} + for mod in module_ctx.modules: + for reqs in mod.tags.add_requirements: + env_vars = ["PIP_INDEX_URL"] + index_url = envsubst( + reqs.index_url, + env_vars, + module_ctx.getenv if hasattr(module_ctx, "getenv") else module_ctx.os.environ.get, + ) + requirements_files = [module_ctx.read(module_ctx.path(src)) for src in reqs.srcs] + sources = get_packages_from_requirements(requirements_files) + for pkg, want_shas in sources.simpleapi.items(): + entry = simpleapi_srcs.setdefault(pkg, {"urls": {}, "want_shas": {}}) + entry["urls"]["{}/{}/".format(index_url.rstrip("/"), pkg)] = True + entry["want_shas"].update(want_shas) + + download_kwargs = {} + if bazel_features.external_deps.download_has_block_param: + download_kwargs["block"] = False + + downloads = {} + for pkg, args in simpleapi_srcs.items(): + output = module_ctx.path("{}/{}.html".format(_PYPI_INDEX, pkg)) + all_urls = list(args["urls"].keys()) + downloads[pkg] = struct( + out = output, + urls = all_urls, + download = module_ctx.download( + url = all_urls, + output = output, + auth = get_auth( + # Simulate the repository_ctx so that `get_auth` works. + struct( + os = module_ctx.os, + path = module_ctx.path, + read = module_ctx.read, + ), + all_urls, + ), + **download_kwargs + ), + ) + + repos = {} + for pkg, args in simpleapi_srcs.items(): + download = downloads[pkg] + result = download.download + if download_kwargs.get("block") == False: + result = result.wait() + + if not result.success: + fail("Failed to download from {}: {}".format(download.urls, result)) + + repos.update( + create_spoke_repos( + simple_api_urls = download.urls, + pkg = pkg, + html_contents = module_ctx.read(download.out), + want_shas = args["want_shas"], + prefix = _PYPI_INDEX, + ), + ) + + pypi_index_hub( + name = _PYPI_INDEX, + repos = repos, + ) + +pypi_index = module_extension( + doc = "", + implementation = _impl, + tag_classes = { + "add_requirements": tag_class( + attrs = { + "extra_index_urls": attr.string_list( + doc = """\ +Extra indexes to read for the given files. The indexes should support introspection via HTML simple API standard. + +See https://packaging.python.org/en/latest/specifications/simple-repository-api/ +""", + ), + "index_url": attr.string( + doc = """\ +By default rules_python will use the env variable value of PIP_INDEX_URL if present. +""", + default = "${PIP_INDEX_URL:-https://pypi.org/simple}", + ), + "srcs": attr.label_list(), + }, + ), + }, +) diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl new file mode 100644 index 0000000000..64c3589d7d --- /dev/null +++ b/python/private/pypi_index.bzl @@ -0,0 +1,360 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +A file that houses private functions used in the `bzlmod` extension with the same name. + +The functions here should not depend on the `module_ctx` for easy unit testing. +""" + +load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load(":auth.bzl", "get_auth") +load(":normalize_name.bzl", "normalize_name") +load(":text_util.bzl", "render") + +_BUILD_TEMPLATE = """\ +# generated by @rules_python//python/private:pypi_index.bzl + +package(default_visibility = ["//visibility:public"]) +exports_files(["{}"]) +""" + +def get_packages_from_requirements(requirements_files): + """Get Simple API sources from a list of requirements files and merge them. + + Args: + requirements_files(list[str]): A list of requirements files contents. + + Returns: + A struct with `simpleapi` attribute that contains a dict of normalized package + name to a list of shas that we should index. + """ + want_packages = {} + for contents in requirements_files: + parse_result = parse_requirements(contents) + for distribution, line in parse_result.requirements: + want_packages.setdefault(normalize_name(distribution), {}).update({ + # TODO @aignas 2024-03-07: use sets + sha: True + for sha in get_simpleapi_sources(line).shas + }) + + return struct( + simpleapi = want_packages, + ) + +def get_simpleapi_sources(line): + """Get PyPI sources from a requirements.txt line. + + We interpret the spec described in + https://pip.pypa.io/en/stable/reference/requirement-specifiers/#requirement-specifiers + + Args: + line(str): The requirements.txt entry. + + Returns: + A struct with shas attribute containing a list of shas to download from pypi_index. + """ + head, _, maybe_hashes = line.partition(";") + _, _, version = head.partition("==") + version = version.partition(" ")[0].strip() + + if "@" in head: + shas = [] + else: + maybe_hashes = maybe_hashes or line + shas = [ + sha.strip() + for sha in maybe_hashes.split("--hash=sha256:")[1:] + ] + + return struct(version = version, shas = sorted(shas)) + +def create_spoke_repos(simple_api_urls, pkg, html_contents, want_shas, prefix): + """Create spoke repos for the hub repo. + + Args: + simple_api_urls(list[str]): The URLs that were used to download the + HTML contents. + pkg(str): The name of the package. + html_contents(str): The contents of the simple API index. + want_shas(list[str]): The shas that we expect to find in the simple API metadata. + prefix(str): The prefix of all spoke repos. + + Returns: + A dict with the created repository names and the whl filenames that + they download. Note, that extra `.METADATA` repos for each whl + are also created, but they will not be in the returned dictionary. + """ + repos = {} + urls = _get_packages( + simple_api_urls, + html_contents, + want_shas, + ) + + for url in urls: + pkg_name = "{}__{}_{}".format(prefix, pkg, url.sha256) + _pypi_archive( + name = pkg_name, + urls = [url.url], + filename = url.filename, + sha256 = url.sha256, + prefix = prefix, + ) + repos[pkg_name[len(prefix) + 2:]] = url.filename + + if url.metadata_sha256: + _pypi_archive( + name = pkg_name + ".METADATA", + urls = [url.metadata_url], + filename = "METADATA", + sha256 = url.metadata_sha256, + prefix = prefix, + ) + elif url.filename.endswith(".whl"): + _pypi_archive_metadata( + name = pkg_name + ".METADATA", + prefix = prefix, + whl = "@{}//{}:{}".format( + prefix, + pkg_name, + url.filename, + ), + ) + + return repos + +def _get_packages(index_urls, content, want_shas): + """Get the package URLs for given shas by parsing the Simple API HTML.""" + want_shas = {sha: True for sha in want_shas} + packages = [] + lines = content.split("= (2, 0): + # We don't expect to have version 2.0 here, but have this check in place just in case. + # https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api + fail("Unsupported API version: {}".format(api_version)) + + for line in lines[1:]: + url, _, tail = line.partition("#sha256=") + sha256, _, tail = tail.partition("\"") + + if sha256 not in want_shas: + continue + elif "data-yanked" in line: + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + # + # For now we just fail and inform the user to relock the requirements with a + # different version. + fail("The package with '--hash=sha256:{}' was yanked, relock your requirements".format(sha256)) + else: + want_shas.pop(sha256) + + maybe_metadata, _, tail = tail.partition(">") + filename, _, tail = tail.partition("<") + + metadata_marker = "data-core-metadata=\"sha256=" + if metadata_marker in maybe_metadata: + # Implement https://peps.python.org/pep-0714/ + _, _, tail = maybe_metadata.partition(metadata_marker) + metadata_sha256, _, _ = tail.partition("\"") + metadata_url = url + ".metadata" + else: + metadata_sha256 = "" + metadata_url = "" + + packages.append( + struct( + filename = filename, + url = _absolute_urls(index_urls[0], url), + sha256 = sha256, + metadata_sha256 = metadata_sha256, + metadata_url = metadata_url, + ), + ) + + if len(want_shas): + fail( + "Indexes {} did not provide packages with all shas: {}".format( + index_urls, + ", ".join(want_shas.keys()), + ), + ) + + return packages + +def _absolute_urls(index_url, candidate): + if not candidate.startswith(".."): + return candidate + + candidate_parts = candidate.split("..") + last = candidate_parts[-1] + for _ in range(len(candidate_parts) - 1): + index_url, _, _ = index_url.rstrip("/").rpartition("/") + + return "{}/{}".format(index_url, last.strip("/")) + +def _hub_impl(rctx): + # This is so that calling the following in rules_python works: + # $ bazel query $pypi_index/... --ignore_dev_dependency + rctx.file("BUILD.bazel", "") + + if not rctx.attr.repos: + return + + packages = {} + for repo, filename in rctx.attr.repos.items(): + pkg, _, sha256 = repo.rpartition("_") + + packages.setdefault(pkg, []).append( + struct( + sha256 = sha256, + filename = filename, + label = str(Label("@@{}__{}//:{}".format(rctx.attr.name, repo, filename))), + ), + ) + + for pkg, filenames in packages.items(): + # This contains the labels that should be used in the `pip` extension + # to get the labels that can be used by `whl_library`. + rctx.file( + "{}/index.json".format(pkg), + json.encode(filenames), + ) + + # These labels should be used to be passed to `whl_library`. + rctx.file( + "{}/BUILD.bazel".format(pkg), + "\n\n".join([ + _BUILD_TEMPLATE.format("index.json"), + ] + [ + render.alias( + name = r.filename, + actual = repr(r.label), + visibility = ["//visibility:private"], + ) + for r in filenames + ] + [ + render.alias( + name = r.filename + ".METADATA", + actual = repr(r.label.split("//:")[0] + ".METADATA//:METADATA"), + visibility = ["//visibility:private"], + ) + for r in filenames + if r.filename.endswith(".whl") + ]), + ) + +pypi_index_hub = repository_rule( + doc = """\ +This hub repository allows for easy passing of wheel labels to the pip extension. + +The layout of this repo is similar to the simple API: +//:BUILD.bazel +// - normalized to rules_python scheme - lowercase snake-case) + :index.json - contains all labels in the bazel package + :BUILD.bazel - contains aliases to the repos created by the extension for easy + introspection using `bazel query`. Visibility is private for now. + Change it to `public` if needed. +""", + implementation = _hub_impl, + attrs = { + "repos": attr.string_dict(mandatory = True), + }, +) + +def _impl_archive(rctx): + filename = rctx.attr.filename + rctx.file("BUILD.bazel", _BUILD_TEMPLATE.format(filename)) + + if rctx.attr.file: + rctx.symlink(rctx.path(rctx.attr.file), filename) + return + + result = rctx.download( + url = rctx.attr.urls, + output = filename, + auth = get_auth( + rctx, + rctx.attr.urls, + ), + ) + + if not result.success: + fail(result) + +_pypi_archive = repository_rule( + implementation = _impl_archive, + attrs = { + "file": attr.label( + doc = "Used for indexing wheels on the local filesystem", + allow_single_file = [".whl", ".tar.gz", ".zip"], + ), + "filename": attr.string(mandatory = True), + "prefix": attr.string(mandatory = True), + "sha256": attr.string(), + "urls": attr.string_list(), + }, +) + +def _impl_metadata(rctx): + whl_label = rctx.attr.whl + prefix = rctx.attr.prefix + + if not whl_label.workspace_name.endswith(prefix): + # Here we should have a hub repo label which we need to rewrite to the + # thing that the label is pointing to. We can do this because we own + # the construction of the labels. + fail("Expected the label to this rule to be from the '{}' hub repo".format(prefix)) + + # NOTE @aignas 2024-03-08: if we see restarts, then it could mean that we are not constructing + # the right label as an input file. + whl_label = Label("@@{}//:{}".format(rctx.name[:-len(".METADATA")], whl_label.name)) + + rctx.symlink(rctx.path(whl_label), "wheel.zip") + rctx.extract("wheel.zip") + + content = None + for p in rctx.path(".").readdir(): + if p.basename.endswith(".dist-info"): + content = rctx.read(p.get_child("METADATA")) + rctx.delete(p) + + if content == None: + fail("Could not find a METADATA file") + + rctx.file("METADATA", content) + rctx.file("BUILD.bazel", _BUILD_TEMPLATE.format("METADATA")) + +_pypi_archive_metadata = repository_rule( + doc = """Extract METADATA from a '.whl' file in repository context. + +This allows to work with other implementations of Indexes that do not serve +METADATA like PyPI or with patched METADATA in patched and re-zipped wheels. +""", + implementation = _impl_metadata, + attrs = { + "prefix": attr.string(mandatory = True), + "whl": attr.label(mandatory = True, allow_single_file = [".whl"]), + }, +) diff --git a/tests/private/pypi_index/BUILD.bazel b/tests/private/pypi_index/BUILD.bazel new file mode 100644 index 0000000000..d365896cd3 --- /dev/null +++ b/tests/private/pypi_index/BUILD.bazel @@ -0,0 +1,3 @@ +load(":pypi_index_tests.bzl", "pypi_index_test_suite") + +pypi_index_test_suite(name = "pypi_index_tests") diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl new file mode 100644 index 0000000000..4320c7c2c5 --- /dev/null +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -0,0 +1,60 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources") # buildifier: disable=bzl-visibility + +_tests = [] + +def _test_no_simple_api_sources(env): + inputs = [ + "foo==0.0.1", + "foo==0.0.1 @ https://someurl.org", + "foo==0.0.1 @ https://someurl.org --hash=sha256:deadbeef", + "foo==0.0.1 @ https://someurl.org; python_version < 2.7 --hash=sha256:deadbeef", + ] + for input in inputs: + got = get_simpleapi_sources(input) + env.expect.that_collection(got.shas).contains_exactly([]) + env.expect.that_str(got.version).equals("0.0.1") + +_tests.append(_test_no_simple_api_sources) + +def _test_simple_api_sources(env): + tests = { + "foo==0.0.2 --hash=sha256:deafbeef --hash=sha256:deadbeef": [ + "deadbeef", + "deafbeef", + ], + "foo[extra]==0.0.2; (python_version < 2.7 or something_else == \"@\") --hash=sha256:deafbeef --hash=sha256:deadbeef": [ + "deadbeef", + "deafbeef", + ], + } + for input, want_shas in tests.items(): + got = get_simpleapi_sources(input) + env.expect.that_collection(got.shas).contains_exactly(want_shas) + env.expect.that_str(got.version).equals("0.0.2") + +_tests.append(_test_simple_api_sources) + +def pypi_index_test_suite(name): + """Create the test suite. + + Args: + name: the name of the test suite + """ + test_suite(name = name, basic_tests = _tests)