diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 102888d..48e9c7d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest"] - python-version: [ "3.7", "3.8", "3.9", "3.10"] + python-version: [ "3.8", "3.9", "3.10"] name: Test (${{ matrix.python-version }}, ${{ matrix.os }}) runs-on: ${{ matrix.os }} defaults: @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: mamba-version: "*" channels: conda-forge @@ -58,7 +58,7 @@ jobs: name: Build and publish Python distributions to PyPI and TestPyPI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/.gitignore b/.gitignore index f7a440a..79dfb27 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ htmlcov **/.DS_Store .eggs cppimport/_version.py +*.lock \ No newline at end of file diff --git a/README.md b/README.md index 579d401..fa0845c 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,8 @@ Note that `force_rebuild` does not work when importing the module concurrently. ### Can I import my model concurrently? -It's safe to use `cppimport` to import a module concurrently using multiple threads, processes or even machines! +It's (mostly) safe to use `cppimport` to import a module concurrently using multiple threads, processes or even machines! +There's an exception if your filesystem does not support file locking - see the next section. Before building a module, `cppimport` obtains a lockfile preventing other processors from building it at the same time - this prevents clashes that can lead to failure. Other processes will wait maximum 10 mins until the first process has built the module and load it. If your module does not build within 10 mins then it will timeout. @@ -173,6 +174,44 @@ cppimport.settings['lock_timeout'] = 10*60 # 10 mins You should not use `force_rebuild` when importing concurrently. +### Acquiring the lock hangs or times out unexpectedly - what's going on? +Certain platforms (e.g. those running +a Data Virtualization Service, DVS) do not support file locking. If you're on Linux with access to `flock`, you can test whether +locking is supported (credit to [this page](https://help.univention.com/t/howto-verify-the-mounted-filesystem-supports-file-locking/10149)): + +```bash +touch testfile +flock ./testfile true && echo ok || echo nok +``` + +If locking is not supported, you can disable the file lock in +the cppimport global settings: + +```python +cppimport.settings['use_filelock'] = False +``` + +This setting must be changed before you import any +code. By setting `use_filelock=False`, you become responsible +for ensuring that only a single process +(re)builds the package at a time. For example: if you're +using [mpi4py](https://mpi4py.readthedocs.io/en/stable/) +to run independent, communicating processes, here's how +to protect the build: + +```python +from mpi4py import MPI +import cppimport, cppimport.import_hook +cppimport.settings["use_filelock"] = False + +pid = MPI.COMM_WORLD.Get_rank() + +if pid == 0: + import somecode # Process 0 compiles extension if needed +MPI.COMM_WORLD.Barrier() # Remaining processes wait +import somecode # All processes use compiled extension +``` + ### How can I get information about filepaths in the configuration block? The module name is available as the `fullname` variable and the C++ module file is available as `filepath`. For example, diff --git a/cppimport/__init__.py b/cppimport/__init__.py index b6d3302..069952e 100644 --- a/cppimport/__init__.py +++ b/cppimport/__init__.py @@ -1,6 +1,7 @@ """ See CONTRIBUTING.md for a description of the project structure and the internal logic. """ + import ctypes import logging import os @@ -18,6 +19,7 @@ force_rebuild=False, # `force_rebuild` with multiple processes is not supported file_exts=[".cpp", ".c"], rtld_flags=ctypes.RTLD_LOCAL, + use_filelock=True, # Filelock if multiple processes try to build simultaneously lock_suffix=".lock", lock_timeout=10 * 60, remove_strict_prototypes=True, diff --git a/cppimport/build_module.py b/cppimport/build_module.py index 9585c9b..bec390e 100644 --- a/cppimport/build_module.py +++ b/cppimport/build_module.py @@ -98,7 +98,7 @@ def _handle_strict_prototypes(): cfg_vars = distutils.sysconfig.get_config_vars() for key, value in cfg_vars.items(): - if type(value) == str: + if value is str: cfg_vars[key] = value.replace("-Wstrict-prototypes", "") @@ -144,7 +144,6 @@ def _parallel_compile( extra_postargs=None, depends=None, ): - # these lines are copied directly from distutils.ccompiler.CCompiler macros, objects, extra_postargs, pp_opts, build = self._setup_compile( output_dir, macros, include_dirs, sources, depends, extra_postargs diff --git a/cppimport/importer.py b/cppimport/importer.py index c46b07f..e5ca91d 100644 --- a/cppimport/importer.py +++ b/cppimport/importer.py @@ -43,33 +43,36 @@ def build_completed(): t = time() - # Race to obtain the lock and build. Other processes can wait - while not build_completed() and time() - t < cppimport.settings["lock_timeout"]: - try: - with filelock.FileLock(lock_path, timeout=1): - if build_completed(): - break - template_and_build(filepath, module_data) - except filelock.Timeout: - logging.debug(f"Could not obtain lock (pid {os.getpid()})") - if cppimport.settings["force_rebuild"]: - raise ValueError( - "force_build must be False to build concurrently." - "This process failed to claim a filelock indicating that" - " a concurrent build is in progress" - ) - sleep(1) - - if os.path.exists(lock_path): - with suppress(OSError): - os.remove(lock_path) - - if not build_completed(): - raise Exception( - f"Could not compile binary as lock already taken and timed out." - f" Try increasing the timeout setting if " - f"the build time is longer (pid {os.getpid()})." - ) + if cppimport.settings["use_filelock"]: + # Race to obtain the lock and build. Other processes can wait + while not build_completed() and time() - t < cppimport.settings["lock_timeout"]: + try: + with filelock.FileLock(lock_path, timeout=1): + if build_completed(): + break + template_and_build(filepath, module_data) + except filelock.Timeout: + logging.debug(f"Could not obtain lock (pid {os.getpid()})") + if cppimport.settings["force_rebuild"]: + raise ValueError( + "force_build must be False to build concurrently." + "This process failed to claim a filelock indicating that" + " a concurrent build is in progress" + ) + sleep(1) + + if os.path.exists(lock_path): + with suppress(OSError): + os.remove(lock_path) + + if not build_completed(): + raise Exception( + f"Could not compile binary as lock already taken and timed out." + f" Try increasing the timeout setting if " + f"the build time is longer (pid {os.getpid()})." + ) + else: + template_and_build(filepath, module_data) def template_and_build(filepath, module_data): diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..627e092 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +def pytest_addoption(parser): + parser.addoption( + "--multiprocessing", + action="store_true", + dest="multiprocessing", + default=False, + help="enable multiprocessing tests with filelock", + ) diff --git a/tests/test_cppimport.py b/tests/test_cppimport.py index 9c12c74..a21942d 100644 --- a/tests/test_cppimport.py +++ b/tests/test_cppimport.py @@ -8,11 +8,16 @@ from multiprocessing import Process from tempfile import TemporaryDirectory +import pytest + import cppimport import cppimport.build_module import cppimport.templating from cppimport.find import find_module_cpppath +cppimport.settings["use_filelock"] = False # Filelock only enabled for multiprocessing +multiprocessing_enable = pytest.mark.skipif("not config.getoption('multiprocessing')") + root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) @@ -127,8 +132,11 @@ def test_with_file_in_syspath(): def test_rebuild_after_failed_compile(): cppimport.imp("mymodule") test_code = """ -import cppimport; mymodule = cppimport.imp("mymodule");assert(mymodule.add(1,2) == 3) -""" +import cppimport; +cppimport.settings["use_filelock"] = False; +mymodule = cppimport.imp("mymodule"); +assert(mymodule.add(1,2) == 3) + """ with appended("tests/mymodule.cpp", ";asdf;"): subprocess_check(test_code, 1) subprocess_check(test_code, 0) @@ -149,6 +157,7 @@ def test_no_rebuild_if_no_deps_change(): cppimport.imp("mymodule") test_code = """ import cppimport; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); assert(not hasattr(mymodule, 'Thing')) """ @@ -160,6 +169,7 @@ def test_rebuild_header_after_change(): cppimport.imp("mymodule") test_code = """ import cppimport; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); mymodule.Thing().cheer() """ @@ -215,7 +225,12 @@ def test_relative_import(): assert f() == 3 +@multiprocessing_enable def test_multiple_processes(): + """ + Only runs if the flag --multiprocessing is passed to + pytest. This function requires file locking enabled. + """ with tmp_dir(["tests/hook_test.cpp"]) as tmp_path: test_code = f""" import os;