1+ import logging
12import time
23from collections .abc import MutableMapping
4+ from enum import Enum
35from functools import lru_cache
6+ from pathlib import Path
7+ from typing import Union , Optional
48
9+ logger = logging .getLogger (__name__ )
510
6- class DirCache (MutableMapping ):
11+
12+ class MemoryDirCache (MutableMapping ):
713 """
814 Caching of directory listings, in a structure like::
915
@@ -26,19 +32,15 @@ class DirCache(MutableMapping):
2632
2733 def __init__ (
2834 self ,
29- use_listings_cache = True ,
30- listings_expiry_time = None ,
35+ expiry_time = None ,
3136 max_paths = None ,
3237 ** kwargs ,
3338 ):
3439 """
3540
3641 Parameters
3742 ----------
38- use_listings_cache: bool
39- If False, this cache never returns items, but always reports KeyError,
40- and setting items has no effect
41- listings_expiry_time: int or float (optional)
43+ expiry_time: int or float (optional)
4244 Time in seconds that a listing is considered valid. If None,
4345 listings do not expire.
4446 max_paths: int (optional)
@@ -49,13 +51,12 @@ def __init__(
4951 self ._times = {}
5052 if max_paths :
5153 self ._q = lru_cache (max_paths + 1 )(lambda key : self ._cache .pop (key , None ))
52- self .use_listings_cache = use_listings_cache
53- self .listings_expiry_time = listings_expiry_time
54+ self .expiry_time = expiry_time
5455 self .max_paths = max_paths
5556
5657 def __getitem__ (self , item ):
57- if self .listings_expiry_time is not None :
58- if self ._times .get (item , 0 ) - time .time () < - self .listings_expiry_time :
58+ if self .expiry_time is not None :
59+ if self ._times .get (item , 0 ) - time .time () < - self .expiry_time :
5960 del self ._cache [item ]
6061 if self .max_paths :
6162 self ._q (item )
@@ -75,12 +76,10 @@ def __contains__(self, item):
7576 return False
7677
7778 def __setitem__ (self , key , value ):
78- if not self .use_listings_cache :
79- return
8079 if self .max_paths :
8180 self ._q (key )
8281 self ._cache [key ] = value
83- if self .listings_expiry_time is not None :
82+ if self .expiry_time is not None :
8483 self ._times [key ] = time .time ()
8584
8685 def __delitem__ (self , key ):
@@ -93,6 +92,112 @@ def __iter__(self):
9392
9493 def __reduce__ (self ):
9594 return (
96- DirCache ,
97- (self .use_listings_cache , self .listings_expiry_time , self .max_paths ),
95+ MemoryDirCache ,
96+ (self .expiry_time , self .max_paths ),
97+ )
98+
99+
100+ class FileDirCache (MutableMapping ):
101+ def __init__ (
102+ self ,
103+ expiry_time = None ,
104+ directory = None ,
105+ ** kwargs ,
106+ ):
107+ """
108+
109+ Parameters
110+ ----------
111+ expiry_time: int or float (optional)
112+ Time in seconds that a listing is considered valid. If None,
113+ listings do not expire.
114+ directory: str (optional)
115+ Directory path at which the listings cache file is stored. If None,
116+ an autogenerated path at the user folder is created.
117+
118+ """
119+ import platformdirs
120+ from diskcache import Cache
121+
122+ if not directory :
123+ directory = platformdirs .user_cache_dir (appname = "fsspec" )
124+ directory = Path (directory ) / "dircache" / str (expiry_time )
125+
126+ try :
127+ directory .mkdir (exist_ok = True , parents = True )
128+ except OSError as e :
129+ logger .error (
130+ f"folder for dircache could not be created at { directory } "
131+ )
132+ raise e
133+ else :
134+ logger .info (f"Dircache located at { directory } " )
135+
136+ self .directory = directory
137+ self ._cache = Cache (directory = str (directory ))
138+ self .expiry_time = expiry_time
139+
140+ def __getitem__ (self , item ):
141+ """Draw item as fileobject from cache, retry if timeout occurs"""
142+ return self ._cache .get (key = item , read = True , retry = True )
143+
144+ def clear (self ):
145+ self ._cache .clear ()
146+
147+ def __len__ (self ):
148+ return len (list (self ._cache .iterkeys ()))
149+
150+ def __contains__ (self , item ):
151+ value = self ._cache .get (item , retry = True ) # None, if expired
152+ if value :
153+ return True
154+ return False
155+
156+ def __setitem__ (self , key , value ):
157+ self ._cache .set (
158+ key = key , value = value , expire = self .expiry_time , retry = True
159+ )
160+
161+ def __delitem__ (self , key ):
162+ del self ._cache [key ]
163+
164+ def __iter__ (self ):
165+ return (k for k in self ._cache .iterkeys () if k in self )
166+
167+ def __reduce__ (self ):
168+ return (
169+ FileDirCache ,
170+ (self .expiry_time , self .directory ),
98171 )
172+
173+
174+ class CacheType (Enum ):
175+ MEMORY = MemoryDirCache
176+ FILE = FileDirCache
177+
178+
179+ def create_dircache (
180+ cache_type : Union [str , CacheType ] = None ,
181+ expiry_time : Optional [Union [int , float ]] = None ,
182+ ** kwargs ,
183+ ) -> Optional [Union [MemoryDirCache , FileDirCache ]]:
184+ if not cache_type :
185+ return
186+ cache_map = {
187+ CacheType .MEMORY : MemoryDirCache ,
188+ CacheType .FILE : FileDirCache ,
189+ }
190+ if isinstance (cache_type , str ):
191+ try :
192+ cache_type = CacheType [cache_type .upper ()]
193+ except KeyError as e :
194+ raise ValueError (f"Cache type must be one of { ', ' .join (ct .name .lower () for ct in CacheType )} " ) from e
195+ expiry_time = expiry_time and float (expiry_time )
196+ if expiry_time == 0.0 :
197+ return
198+ return cache_map [cache_type ](expiry_time , ** kwargs )
199+
200+
201+ if __name__ == "__main__" :
202+ d = create_dircache (cache_type = "memory" )
203+ print (d )
0 commit comments