From 5c27d64cb5c234a1d8ba63d702043d95bdec7faf Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj Date: Mon, 27 May 2024 11:58:53 -0700 Subject: [PATCH 1/7] Added ability to disable filelock. --- README.md | 27 +++++++++++++++++++- cppimport/__init__.py | 1 + cppimport/importer.py | 57 +++++++++++++++++++++++-------------------- 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 579d401..5115dd0 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,8 @@ Note that `force_rebuild` does not work when importing the module concurrently. ### Can I import my model concurrently? -It's safe to use `cppimport` to import a module concurrently using multiple threads, processes or even machines! +It's (mostly) safe to use `cppimport` to import a module concurrently using multiple threads, processes or even machines! +There's an exception if your filesystem does not support file locking - see the next section. Before building a module, `cppimport` obtains a lockfile preventing other processors from building it at the same time - this prevents clashes that can lead to failure. Other processes will wait maximum 10 mins until the first process has built the module and load it. If your module does not build within 10 mins then it will timeout. @@ -173,6 +174,30 @@ cppimport.settings['lock_timeout'] = 10*60 # 10 mins You should not use `force_rebuild` when importing concurrently. +### The lockfile keeps timing out unexpectedly - what's going on? +Certain filesystems do not support file locking. You can disable the lock +in the settings (must be set before you import any extension). + +```python +cppimport.settings['use_filelock'] = False +``` + +In this case, you are responsible for ensuring that only a single process +(re)builds the package at a time. If you're using [mpi4py](https://mpi4py.readthedocs.io/en/stable/), +to run independent, communicating processes, here's an example of how that might work: + +```python +from mpi4py import MPI +import cppimport.import_hook + +pid = MPI.COMM_WORLD.Get_rank() + +if pid == 0: + import my_cpp_extension # Process 0 compiles the extension if necessary +MPI.COMM_WORLD.Barrier() # Remaining processors wait +import my_cpp_extension # All processes can use compiled extension +``` + ### How can I get information about filepaths in the configuration block? The module name is available as the `fullname` variable and the C++ module file is available as `filepath`. For example, diff --git a/cppimport/__init__.py b/cppimport/__init__.py index b6d3302..34f91f5 100644 --- a/cppimport/__init__.py +++ b/cppimport/__init__.py @@ -18,6 +18,7 @@ force_rebuild=False, # `force_rebuild` with multiple processes is not supported file_exts=[".cpp", ".c"], rtld_flags=ctypes.RTLD_LOCAL, + use_filelock=True, # Use filelocking if multiple processes try to build simultaneously lock_suffix=".lock", lock_timeout=10 * 60, remove_strict_prototypes=True, diff --git a/cppimport/importer.py b/cppimport/importer.py index c46b07f..cd74953 100644 --- a/cppimport/importer.py +++ b/cppimport/importer.py @@ -43,33 +43,36 @@ def build_completed(): t = time() - # Race to obtain the lock and build. Other processes can wait - while not build_completed() and time() - t < cppimport.settings["lock_timeout"]: - try: - with filelock.FileLock(lock_path, timeout=1): - if build_completed(): - break - template_and_build(filepath, module_data) - except filelock.Timeout: - logging.debug(f"Could not obtain lock (pid {os.getpid()})") - if cppimport.settings["force_rebuild"]: - raise ValueError( - "force_build must be False to build concurrently." - "This process failed to claim a filelock indicating that" - " a concurrent build is in progress" - ) - sleep(1) - - if os.path.exists(lock_path): - with suppress(OSError): - os.remove(lock_path) - - if not build_completed(): - raise Exception( - f"Could not compile binary as lock already taken and timed out." - f" Try increasing the timeout setting if " - f"the build time is longer (pid {os.getpid()})." - ) + if cppimport.settings['use_filelock']: + # Race to obtain the lock and build. Other processes can wait + while not build_completed() and time() - t < cppimport.settings["lock_timeout"]: + try: + with filelock.FileLock(lock_path, timeout=1): + if build_completed(): + break + template_and_build(filepath, module_data) + except filelock.Timeout: + logging.debug(f"Could not obtain lock (pid {os.getpid()})") + if cppimport.settings["force_rebuild"]: + raise ValueError( + "force_build must be False to build concurrently." + "This process failed to claim a filelock indicating that" + " a concurrent build is in progress" + ) + sleep(1) + + if os.path.exists(lock_path): + with suppress(OSError): + os.remove(lock_path) + + if not build_completed(): + raise Exception( + f"Could not compile binary as lock already taken and timed out." + f" Try increasing the timeout setting if " + f"the build time is longer (pid {os.getpid()})." + ) + else: + template_and_build(filepath, module_data) def template_and_build(filepath, module_data): From 2647cbdfe2013797ce37392b3a2646580db060f6 Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj Date: Mon, 27 May 2024 13:19:03 -0700 Subject: [PATCH 2/7] Updated README. --- README.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 5115dd0..092b5e2 100644 --- a/README.md +++ b/README.md @@ -174,28 +174,33 @@ cppimport.settings['lock_timeout'] = 10*60 # 10 mins You should not use `force_rebuild` when importing concurrently. -### The lockfile keeps timing out unexpectedly - what's going on? +### Acquiring the lock hangs or times out unexpectedly - what's going on? Certain filesystems do not support file locking. You can disable the lock -in the settings (must be set before you import any extension). +in the settings: ```python cppimport.settings['use_filelock'] = False ``` -In this case, you are responsible for ensuring that only a single process -(re)builds the package at a time. If you're using [mpi4py](https://mpi4py.readthedocs.io/en/stable/), -to run independent, communicating processes, here's an example of how that might work: +This setting must be changed before you import any +code. By setting `use_filelock=False`, you become responsible +for ensuring that only a single process +(re)builds the package at a time. For example: if you're +using [mpi4py](https://mpi4py.readthedocs.io/en/stable/) +to run independent, communicating processes, here's how +to protect the build: ```python from mpi4py import MPI -import cppimport.import_hook +import cppimport, cppimport.import_hook +cppimport.settings["use_filelock"] = False pid = MPI.COMM_WORLD.Get_rank() if pid == 0: - import my_cpp_extension # Process 0 compiles the extension if necessary -MPI.COMM_WORLD.Barrier() # Remaining processors wait -import my_cpp_extension # All processes can use compiled extension + import somecode # Process 0 compiles extension if needed +MPI.COMM_WORLD.Barrier() # Remaining processes wait +import somecode # All processes use compiled extension ``` ### How can I get information about filepaths in the configuration block? From 9dcfae8baad44e1bfba8d65141feeb641a3bc995 Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj Date: Mon, 27 May 2024 15:13:49 -0700 Subject: [PATCH 3/7] Updated tests. --- .gitignore | 1 + tests/conftest.py | 3 +++ tests/test_cppimport.py | 21 +++++++++++++++++---- 3 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 tests/conftest.py diff --git a/.gitignore b/.gitignore index f7a440a..79dfb27 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ htmlcov **/.DS_Store .eggs cppimport/_version.py +*.lock \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..05b4d6a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +def pytest_addoption(parser): + parser.addoption('--multiprocessing', action='store_true', dest="multiprocessing", + default=False, help="enable multiprocessing tests with filelock") \ No newline at end of file diff --git a/tests/test_cppimport.py b/tests/test_cppimport.py index 9c12c74..7fad207 100644 --- a/tests/test_cppimport.py +++ b/tests/test_cppimport.py @@ -5,6 +5,7 @@ import shutil import subprocess import sys +import pytest from multiprocessing import Process from tempfile import TemporaryDirectory @@ -13,6 +14,10 @@ import cppimport.templating from cppimport.find import find_module_cpppath +cppimport.settings["use_filelock"] = False # No need for filelock except for + # multiprocessing test. +multiprocessing_enable = pytest.mark.skipif("not config.getoption('multiprocessing')") + root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) @@ -22,7 +27,6 @@ handler.setFormatter(formatter) root_logger.addHandler(handler) - @contextlib.contextmanager def appended(filename, text): with open(filename, "r") as f: @@ -127,8 +131,11 @@ def test_with_file_in_syspath(): def test_rebuild_after_failed_compile(): cppimport.imp("mymodule") test_code = """ -import cppimport; mymodule = cppimport.imp("mymodule");assert(mymodule.add(1,2) == 3) -""" +import cppimport; +cppimport.settings["use_filelock"] = False; +mymodule = cppimport.imp("mymodule"); +assert(mymodule.add(1,2) == 3) + """ with appended("tests/mymodule.cpp", ";asdf;"): subprocess_check(test_code, 1) subprocess_check(test_code, 0) @@ -149,6 +156,7 @@ def test_no_rebuild_if_no_deps_change(): cppimport.imp("mymodule") test_code = """ import cppimport; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); assert(not hasattr(mymodule, 'Thing')) """ @@ -160,6 +168,7 @@ def test_rebuild_header_after_change(): cppimport.imp("mymodule") test_code = """ import cppimport; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); mymodule.Thing().cheer() """ @@ -214,8 +223,12 @@ def test_relative_import(): print(f()) assert f() == 3 - +@multiprocessing_enable def test_multiple_processes(): + ''' + Only runs if the flag --multiprocessing is passed to + pytest. This function requires file locking enabled. + ''' with tmp_dir(["tests/hook_test.cpp"]) as tmp_path: test_code = f""" import os; From ab4e2f41f521351e8a404e8a5d5ce9711f955116 Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj Date: Thu, 30 May 2024 22:13:55 -0700 Subject: [PATCH 4/7] Passed flake8, black, isort, upgraded miniconda GH action. --- .github/workflows/test.yml | 4 ++-- cppimport/__init__.py | 2 +- cppimport/build_module.py | 3 +-- cppimport/importer.py | 2 +- tests/conftest.py | 9 +++++++-- tests/test_cppimport.py | 22 ++++++++++++---------- 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 102888d..6b1fc3d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: mamba-version: "*" channels: conda-forge @@ -58,7 +58,7 @@ jobs: name: Build and publish Python distributions to PyPI and TestPyPI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" diff --git a/cppimport/__init__.py b/cppimport/__init__.py index 34f91f5..f1bb6fc 100644 --- a/cppimport/__init__.py +++ b/cppimport/__init__.py @@ -18,7 +18,7 @@ force_rebuild=False, # `force_rebuild` with multiple processes is not supported file_exts=[".cpp", ".c"], rtld_flags=ctypes.RTLD_LOCAL, - use_filelock=True, # Use filelocking if multiple processes try to build simultaneously + use_filelock=True, # Filelock if multiple processes try to build simultaneously lock_suffix=".lock", lock_timeout=10 * 60, remove_strict_prototypes=True, diff --git a/cppimport/build_module.py b/cppimport/build_module.py index 9585c9b..bec390e 100644 --- a/cppimport/build_module.py +++ b/cppimport/build_module.py @@ -98,7 +98,7 @@ def _handle_strict_prototypes(): cfg_vars = distutils.sysconfig.get_config_vars() for key, value in cfg_vars.items(): - if type(value) == str: + if value is str: cfg_vars[key] = value.replace("-Wstrict-prototypes", "") @@ -144,7 +144,6 @@ def _parallel_compile( extra_postargs=None, depends=None, ): - # these lines are copied directly from distutils.ccompiler.CCompiler macros, objects, extra_postargs, pp_opts, build = self._setup_compile( output_dir, macros, include_dirs, sources, depends, extra_postargs diff --git a/cppimport/importer.py b/cppimport/importer.py index cd74953..e5ca91d 100644 --- a/cppimport/importer.py +++ b/cppimport/importer.py @@ -43,7 +43,7 @@ def build_completed(): t = time() - if cppimport.settings['use_filelock']: + if cppimport.settings["use_filelock"]: # Race to obtain the lock and build. Other processes can wait while not build_completed() and time() - t < cppimport.settings["lock_timeout"]: try: diff --git a/tests/conftest.py b/tests/conftest.py index 05b4d6a..627e092 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,8 @@ def pytest_addoption(parser): - parser.addoption('--multiprocessing', action='store_true', dest="multiprocessing", - default=False, help="enable multiprocessing tests with filelock") \ No newline at end of file + parser.addoption( + "--multiprocessing", + action="store_true", + dest="multiprocessing", + default=False, + help="enable multiprocessing tests with filelock", + ) diff --git a/tests/test_cppimport.py b/tests/test_cppimport.py index 7fad207..a21942d 100644 --- a/tests/test_cppimport.py +++ b/tests/test_cppimport.py @@ -5,17 +5,17 @@ import shutil import subprocess import sys -import pytest from multiprocessing import Process from tempfile import TemporaryDirectory +import pytest + import cppimport import cppimport.build_module import cppimport.templating from cppimport.find import find_module_cpppath -cppimport.settings["use_filelock"] = False # No need for filelock except for - # multiprocessing test. +cppimport.settings["use_filelock"] = False # Filelock only enabled for multiprocessing multiprocessing_enable = pytest.mark.skipif("not config.getoption('multiprocessing')") root_logger = logging.getLogger() @@ -27,6 +27,7 @@ handler.setFormatter(formatter) root_logger.addHandler(handler) + @contextlib.contextmanager def appended(filename, text): with open(filename, "r") as f: @@ -131,8 +132,8 @@ def test_with_file_in_syspath(): def test_rebuild_after_failed_compile(): cppimport.imp("mymodule") test_code = """ -import cppimport; -cppimport.settings["use_filelock"] = False; +import cppimport; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); assert(mymodule.add(1,2) == 3) """ @@ -156,7 +157,7 @@ def test_no_rebuild_if_no_deps_change(): cppimport.imp("mymodule") test_code = """ import cppimport; -cppimport.settings["use_filelock"] = False; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); assert(not hasattr(mymodule, 'Thing')) """ @@ -168,7 +169,7 @@ def test_rebuild_header_after_change(): cppimport.imp("mymodule") test_code = """ import cppimport; -cppimport.settings["use_filelock"] = False; +cppimport.settings["use_filelock"] = False; mymodule = cppimport.imp("mymodule"); mymodule.Thing().cheer() """ @@ -223,12 +224,13 @@ def test_relative_import(): print(f()) assert f() == 3 + @multiprocessing_enable def test_multiple_processes(): - ''' + """ Only runs if the flag --multiprocessing is passed to - pytest. This function requires file locking enabled. - ''' + pytest. This function requires file locking enabled. + """ with tmp_dir(["tests/hook_test.cpp"]) as tmp_path: test_code = f""" import os; From 5564a874baa47862a3a6464424ce4d4af83dfa17 Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj Date: Thu, 30 May 2024 22:22:34 -0700 Subject: [PATCH 5/7] Reformatted one remaining file. --- cppimport/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cppimport/__init__.py b/cppimport/__init__.py index f1bb6fc..069952e 100644 --- a/cppimport/__init__.py +++ b/cppimport/__init__.py @@ -1,6 +1,7 @@ """ See CONTRIBUTING.md for a description of the project structure and the internal logic. """ + import ctypes import logging import os From 0581bffc41b851f4859ff3001958797a167d7643 Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj Date: Thu, 30 May 2024 23:08:16 -0700 Subject: [PATCH 6/7] Modified README with example of filesystem that does not support locking and flock test. --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 092b5e2..fa0845c 100644 --- a/README.md +++ b/README.md @@ -175,8 +175,17 @@ cppimport.settings['lock_timeout'] = 10*60 # 10 mins You should not use `force_rebuild` when importing concurrently. ### Acquiring the lock hangs or times out unexpectedly - what's going on? -Certain filesystems do not support file locking. You can disable the lock -in the settings: +Certain platforms (e.g. those running +a Data Virtualization Service, DVS) do not support file locking. If you're on Linux with access to `flock`, you can test whether +locking is supported (credit to [this page](https://help.univention.com/t/howto-verify-the-mounted-filesystem-supports-file-locking/10149)): + +```bash +touch testfile +flock ./testfile true && echo ok || echo nok +``` + +If locking is not supported, you can disable the file lock in +the cppimport global settings: ```python cppimport.settings['use_filelock'] = False From 5788402c905e4271537ac38028b9f9e3ba1de5be Mon Sep 17 00:00:00 2001 From: Vivek Bharadwaj <67718556+vbharadwaj-bk@users.noreply.github.com> Date: Fri, 31 May 2024 08:38:30 -0700 Subject: [PATCH 7/7] Removed Python 3.7 from CI. --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b1fc3d..48e9c7d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest"] - python-version: [ "3.7", "3.8", "3.9", "3.10"] + python-version: [ "3.8", "3.9", "3.10"] name: Test (${{ matrix.python-version }}, ${{ matrix.os }}) runs-on: ${{ matrix.os }} defaults: