1+ import logging
12import time
23from collections .abc import MutableMapping
4+ from enum import Enum
35from functools import lru_cache
6+ from pathlib import Path
7+ from typing import Union , Optional
48
9+ logger = logging .getLogger (__name__ )
510
6- class DirCache (MutableMapping ):
11+
12+ class MemoryDirCache (MutableMapping ):
713 """
814 Caching of directory listings, in a structure like::
915
@@ -26,19 +32,15 @@ class DirCache(MutableMapping):
2632
2733 def __init__ (
2834 self ,
29- use_listings_cache = True ,
30- listings_expiry_time = None ,
35+ expiry_time = None ,
3136 max_paths = None ,
3237 ** kwargs ,
3338 ):
3439 """
3540
3641 Parameters
3742 ----------
38- use_listings_cache: bool
39- If False, this cache never returns items, but always reports KeyError,
40- and setting items has no effect
41- listings_expiry_time: int or float (optional)
43+ expiry_time: int or float (optional)
4244 Time in seconds that a listing is considered valid. If None,
4345 listings do not expire.
4446 max_paths: int (optional)
@@ -49,13 +51,12 @@ def __init__(
4951 self ._times = {}
5052 if max_paths :
5153 self ._q = lru_cache (max_paths + 1 )(lambda key : self ._cache .pop (key , None ))
52- self .use_listings_cache = use_listings_cache
53- self .listings_expiry_time = listings_expiry_time
54+ self .expiry_time = expiry_time
5455 self .max_paths = max_paths
5556
5657 def __getitem__ (self , item ):
57- if self .listings_expiry_time is not None :
58- if self ._times .get (item , 0 ) - time .time () < - self .listings_expiry_time :
58+ if self .expiry_time is not None :
59+ if self ._times .get (item , 0 ) - time .time () < - self .expiry_time :
5960 del self ._cache [item ]
6061 if self .max_paths :
6162 self ._q (item )
@@ -75,12 +76,10 @@ def __contains__(self, item):
7576 return False
7677
7778 def __setitem__ (self , key , value ):
78- if not self .use_listings_cache :
79- return
8079 if self .max_paths :
8180 self ._q (key )
8281 self ._cache [key ] = value
83- if self .listings_expiry_time is not None :
82+ if self .expiry_time is not None :
8483 self ._times [key ] = time .time ()
8584
8685 def __delitem__ (self , key ):
@@ -93,6 +92,110 @@ def __iter__(self):
9392
9493 def __reduce__ (self ):
9594 return (
96- DirCache ,
97- (self .use_listings_cache , self .listings_expiry_time , self .max_paths ),
95+ MemoryDirCache ,
96+ (self .expiry_time , self .max_paths ),
97+ )
98+
99+
100+ class FileDirCache (MutableMapping ):
101+ def __init__ (
102+ self ,
103+ expiry_time = None ,
104+ directory = None ,
105+ ** kwargs ,
106+ ):
107+ """
108+
109+ Parameters
110+ ----------
111+ expiry_time: int or float (optional)
112+ Time in seconds that a listing is considered valid. If None,
113+ listings do not expire.
114+ directory: str (optional)
115+ Directory path at which the listings cache file is stored. If None,
116+ an autogenerated path at the user folder is created.
117+
118+ """
119+ try :
120+ import platformdirs
121+ from diskcache import Cache
122+ except ImportError as e :
123+ raise ImportError ("The optional dependencies ``platformdirs`` and ``diskcache`` are required for file-based dircache." ) from e
124+
125+ if not directory :
126+ directory = platformdirs .user_cache_dir (appname = "fsspec" )
127+ directory = Path (directory ) / "dircache" / str (expiry_time )
128+
129+ try :
130+ directory .mkdir (exist_ok = True , parents = True )
131+ except OSError as e :
132+ logger .error (
133+ f"Directory for dircache could not be created at { directory } ."
134+ )
135+ raise e
136+ else :
137+ logger .info (f"Dircache located at { directory } ." )
138+
139+ self .directory = directory
140+ self ._cache = Cache (directory = str (directory ))
141+ self .expiry_time = expiry_time
142+
143+ def __getitem__ (self , item ):
144+ """Draw item as fileobject from cache, retry if timeout occurs"""
145+ return self ._cache .get (key = item , read = True , retry = True )
146+
147+ def clear (self ):
148+ self ._cache .clear ()
149+
150+ def __len__ (self ):
151+ return len (list (self ._cache .iterkeys ()))
152+
153+ def __contains__ (self , item ):
154+ value = self ._cache .get (item , retry = True ) # None, if expired
155+ if value :
156+ return True
157+ return False
158+
159+ def __setitem__ (self , key , value ):
160+ self ._cache .set (
161+ key = key , value = value , expire = self .expiry_time , retry = True
162+ )
163+
164+ def __delitem__ (self , key ):
165+ del self ._cache [key ]
166+
167+ def __iter__ (self ):
168+ return (k for k in self ._cache .iterkeys () if k in self )
169+
170+ def __reduce__ (self ):
171+ return (
172+ FileDirCache ,
173+ (self .expiry_time , self .directory ),
98174 )
175+
176+
177+ class CacheType (Enum ):
178+ MEMORY = MemoryDirCache
179+ FILE = FileDirCache
180+
181+
182+ def create_dircache (
183+ cache_type : CacheType = None ,
184+ expiry_time : Optional [Union [int , float ]] = None ,
185+ ** kwargs ,
186+ ) -> Optional [Union [MemoryDirCache , FileDirCache ]]:
187+ if not cache_type :
188+ return
189+ cache_map = {
190+ CacheType .MEMORY : MemoryDirCache ,
191+ CacheType .FILE : FileDirCache ,
192+ }
193+ expiry_time = expiry_time and float (expiry_time )
194+ if expiry_time == 0.0 :
195+ return
196+ return cache_map [cache_type ](expiry_time , ** kwargs )
197+
198+
199+ if __name__ == "__main__" :
200+ d = create_dircache (cache_type = "memory" )
201+ print (d )
0 commit comments