Skip to content

Commit a6b9954

Browse files
authored
Support to ignore specific storage options when tokenizing (#1933)
1 parent 79dbdcd commit a6b9954

File tree

4 files changed

+112
-6
lines changed

4 files changed

+112
-6
lines changed

fsspec/conftest.py

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import subprocess
44
import sys
55
import time
6+
from collections import deque
7+
from collections.abc import Generator, Sequence
68

79
import pytest
810

911
import fsspec
10-
from fsspec.implementations.cached import CachingFileSystem
1112

1213

1314
@pytest.fixture()
@@ -27,16 +28,85 @@ def m():
2728
m.pseudo_dirs.append("")
2829

2930

30-
@pytest.fixture
31+
class InstanceCacheInspector:
32+
"""
33+
Helper class to inspect instance caches of filesystem classes in tests.
34+
"""
35+
36+
def clear(self) -> None:
37+
"""
38+
Clear instance caches of all currently imported filesystem classes.
39+
"""
40+
classes = deque([fsspec.spec.AbstractFileSystem])
41+
while classes:
42+
cls = classes.popleft()
43+
cls.clear_instance_cache()
44+
classes.extend(cls.__subclasses__())
45+
46+
def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
47+
"""
48+
Gather counts of filesystem instances in the instance caches
49+
of all currently imported filesystem classes.
50+
51+
Parameters
52+
----------
53+
omit_zero:
54+
Whether to omit instance types with no cached instances.
55+
"""
56+
out: dict[str, int] = {}
57+
classes = deque([fsspec.spec.AbstractFileSystem])
58+
while classes:
59+
cls = classes.popleft()
60+
count = len(cls._cache) # there is no public interface for the cache
61+
# note: skip intermediate AbstractFileSystem subclasses
62+
# if they proxy the protocol attribute via a property.
63+
if isinstance(cls.protocol, (Sequence, str)):
64+
key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
65+
if count or not omit_zero:
66+
out[key] = count
67+
classes.extend(cls.__subclasses__())
68+
return out
69+
70+
71+
@pytest.fixture(scope="function", autouse=True)
72+
def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
73+
"""
74+
Fixture to ensure empty filesystem instance caches before and after a test.
75+
76+
Used by default for all tests.
77+
Clears caches of all imported filesystem classes.
78+
Can be used to write test assertions about instance caches.
79+
80+
Usage:
81+
82+
def test_something(instance_caches):
83+
# Test code here
84+
fsspec.open("file://abc")
85+
fsspec.open("memory://foo/bar")
86+
87+
# Test assertion
88+
assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
89+
90+
Returns
91+
-------
92+
instance_caches: An instance cache inspector for clearing and inspecting caches.
93+
"""
94+
ic = InstanceCacheInspector()
95+
96+
ic.clear()
97+
try:
98+
yield ic
99+
finally:
100+
ic.clear()
101+
102+
103+
@pytest.fixture(scope="function")
31104
def ftp_writable(tmpdir):
32105
"""
33106
Fixture providing a writable FTP filesystem.
34107
"""
35108
pytest.importorskip("pyftpdlib")
36-
from fsspec.implementations.ftp import FTPFileSystem
37109

38-
FTPFileSystem.clear_instance_cache() # remove lingering connections
39-
CachingFileSystem.clear_instance_cache()
40110
d = str(tmpdir)
41111
with open(os.path.join(d, "out"), "wb") as f:
42112
f.write(b"hello" * 10000)

fsspec/implementations/cached.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class CachingFileSystem(ChainedFileSystem):
6161
"""
6262

6363
protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
64+
_strip_tokenize_options = ("fo",)
6465

6566
def __init__(
6667
self,

fsspec/implementations/tests/test_cached.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,3 +1358,33 @@ def test_local_temp_file_put_by_list2(protocol, mocker, tmp_path) -> None:
13581358
spy_put.assert_called_once_with([file.name], ["/some/file.txt"])
13591359
# which avoids isdir() check
13601360
spy_isdir.assert_not_called()
1361+
1362+
1363+
def test_simplecache_tokenization_independent_of_path():
1364+
# check that the tokenization is independent of the path
1365+
of0 = fsspec.open("simplecache::memory://foo/bar.txt")
1366+
of1 = fsspec.open("simplecache::memory://baz/qux.txt")
1367+
assert of0.path != of1.path
1368+
assert of0.fs._fs_token_ == of1.fs._fs_token_
1369+
assert of0.fs is of1.fs
1370+
1371+
1372+
def test_simplecache_instance_cache(instance_caches):
1373+
# check that the simplecache instance cache does not grow with every unique path
1374+
1375+
assert instance_caches.gather_counts() == {}
1376+
1377+
# check that the cache does not grow with multiple paths
1378+
fsspec.open("simplecache::memory://foo/bar.txt")
1379+
fsspec.open("simplecache::memory://bar/baz.txt")
1380+
fsspec.open("simplecache::memory://baz/qux.txt")
1381+
fsspec.open("simplecache::file:///foo/bar.txt")
1382+
fsspec.open("simplecache::memory://bar/baz.txt")
1383+
fsspec.open("simplecache::https://example.com/")
1384+
1385+
assert instance_caches.gather_counts() == {
1386+
"simplecache": 3,
1387+
"memory": 1,
1388+
"file": 1,
1389+
"http": 1,
1390+
}

fsspec/spec.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ def __call__(cls, *args, **kwargs):
6767
extra_tokens = tuple(
6868
getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
6969
)
70+
strip_tokenize_options = {
71+
k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
72+
}
7073
token = tokenize(
7174
cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
7275
)
@@ -78,7 +81,7 @@ def __call__(cls, *args, **kwargs):
7881
cls._latest = token
7982
return cls._cache[token]
8083
else:
81-
obj = super().__call__(*args, **kwargs)
84+
obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
8285
# Setting _fs_token here causes some static linters to complain.
8386
obj._fs_token_ = token
8487
obj.storage_args = args
@@ -115,6 +118,8 @@ class AbstractFileSystem(metaclass=_Cached):
115118

116119
#: Extra *class attributes* that should be considered when hashing.
117120
_extra_tokenize_attributes = ()
121+
#: *storage options* that should not be considered when hashing.
122+
_strip_tokenize_options = ()
118123

119124
# Set by _Cached metaclass
120125
storage_args: tuple[Any, ...]

0 commit comments

Comments
 (0)