Skip to content

Commit ca41084

Browse files
utotsubasainakam
andauthored
feat: Add InMemoryTarget to store data in a memory instead of dumping to files (#432)
* feat: add in-memory target * feat: add a data format to be stored in `Repository` feat: last_modification_time feature in `InMemoryTarget` style: add some type hints fix: fix typo in `InMemoryCacheRepository` test: add some tests for `InMemoryTarget` and `InMemoryCacheRepository` * fix: fix linting errors * fix: update type union shorthand to to make compatible with py39 * style: refactor some base classes to inherite from * fix: remove unnessesary optional type * fix: fix format error * chore: add an assertion error message style: update variable name from `id` to `key` * style: update the variable name to for code consistency * docs: add a document of how to create InMemoryTarget * chore: update name from `inmemory` to `in_memory` chore: add type hints style: remove `Protocol` * fix: fix lint errors * chore: raise Error when using InMemoryTarget with Redis * fix: fix a linting error --------- Co-authored-by: Ibuki Nakamura <[email protected]>
1 parent 7fa0123 commit ca41084

File tree

7 files changed

+246
-0
lines changed

7 files changed

+246
-0
lines changed

docs/task_on_kart.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,26 @@ If you want to dump csv file with other encodings, you can use `encoding` parame
286286
def output(self):
287287
return self.make_target('file_name.csv', processor=CsvFileProcessor(encoding='cp932'))
288288
# This will dump csv as 'cp932' which is used in Windows.
289+
290+
Cache output in memory instead of dumping to files
291+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
292+
You can use :class:`~InMemoryTarget` to cache output in memory instead of dumping to files by calling :func:`~gokart.target.make_in_memory_target`.
293+
294+
Please note that :class:`~InMemoryTarget` is an experimental feature.
295+
296+
.. code:: python
297+
298+
from gokart.in_memory.target import make_in_memory_target
299+
300+
def output(self):
301+
unique_id = self.make_unique_id() if use_unique_id else None
302+
# TaskLock is not supported in InMemoryTarget, so it's dummy
303+
task_lock_params = make_task_lock_params(
304+
file_path='dummy_path',
305+
unique_id=unique_id,
306+
redis_host=None,
307+
redis_port=None,
308+
redis_timeout=self.redis_timeout,
309+
raise_task_lock_exception_on_collision=False,
310+
)
311+
return make_in_memory_target('dummy_path', task_lock_params, unique_id)

gokart/in_memory/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .repository import InMemoryCacheRepository # noqa:F401
2+
from .target import InMemoryTarget, make_in_memory_target # noqa:F401

gokart/in_memory/data.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from dataclasses import dataclass
2+
from datetime import datetime
3+
from typing import Any
4+
5+
6+
@dataclass
7+
class InMemoryData:
8+
value: Any
9+
last_modification_time: datetime
10+
11+
@classmethod
12+
def create_data(self, value: Any) -> 'InMemoryData':
13+
return InMemoryData(value=value, last_modification_time=datetime.now())

gokart/in_memory/repository.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from typing import Any, Iterator
2+
3+
from .data import InMemoryData
4+
5+
6+
class InMemoryCacheRepository:
7+
_cache: dict[str, InMemoryData] = {}
8+
9+
def __init__(self):
10+
pass
11+
12+
def get_value(self, key: str) -> Any:
13+
return self._get_data(key).value
14+
15+
def get_last_modification_time(self, key: str):
16+
return self._get_data(key).last_modification_time
17+
18+
def _get_data(self, key: str) -> InMemoryData:
19+
return self._cache[key]
20+
21+
def set_value(self, key: str, obj: Any) -> None:
22+
data = InMemoryData.create_data(obj)
23+
self._cache[key] = data
24+
25+
def has(self, key: str) -> bool:
26+
return key in self._cache
27+
28+
def remove(self, key: str) -> None:
29+
assert self.has(key), f'{key} does not exist.'
30+
del self._cache[key]
31+
32+
def empty(self) -> bool:
33+
return not self._cache
34+
35+
def clear(self) -> None:
36+
self._cache.clear()
37+
38+
def get_gen(self) -> Iterator[tuple[str, Any]]:
39+
for key, data in self._cache.items():
40+
yield key, data.value
41+
42+
@property
43+
def size(self) -> int:
44+
return len(self._cache)

gokart/in_memory/target.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from datetime import datetime
2+
from typing import Any
3+
4+
from gokart.in_memory.repository import InMemoryCacheRepository
5+
from gokart.target import TargetOnKart, TaskLockParams
6+
7+
_repository = InMemoryCacheRepository()
8+
9+
10+
class InMemoryTarget(TargetOnKart):
11+
def __init__(self, data_key: str, task_lock_param: TaskLockParams):
12+
if task_lock_param.should_task_lock:
13+
raise ValueError('Redis with `InMemoryTarget` is not currently supported.')
14+
15+
self._data_key = data_key
16+
self._task_lock_params = task_lock_param
17+
18+
def _exists(self) -> bool:
19+
return _repository.has(self._data_key)
20+
21+
def _get_task_lock_params(self) -> TaskLockParams:
22+
return self._task_lock_params
23+
24+
def _load(self) -> Any:
25+
return _repository.get_value(self._data_key)
26+
27+
def _dump(self, obj: Any) -> None:
28+
return _repository.set_value(self._data_key, obj)
29+
30+
def _remove(self) -> None:
31+
_repository.remove(self._data_key)
32+
33+
def _last_modification_time(self) -> datetime:
34+
if not _repository.has(self._data_key):
35+
raise ValueError(f'No object(s) which id is {self._data_key} are stored before.')
36+
time = _repository.get_last_modification_time(self._data_key)
37+
return time
38+
39+
def _path(self) -> str:
40+
# TODO: this module name `_path` migit not be appropriate
41+
return self._data_key
42+
43+
44+
def make_in_memory_target(target_key: str, task_lock_params: TaskLockParams) -> InMemoryTarget:
45+
return InMemoryTarget(target_key, task_lock_params)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from datetime import datetime
2+
from time import sleep
3+
4+
import pytest
5+
6+
from gokart.conflict_prevention_lock.task_lock import TaskLockParams
7+
from gokart.in_memory import InMemoryCacheRepository, InMemoryTarget, make_in_memory_target
8+
9+
10+
class TestInMemoryTarget:
11+
@pytest.fixture
12+
def task_lock_params(self) -> TaskLockParams:
13+
return TaskLockParams(
14+
redis_host=None,
15+
redis_port=None,
16+
redis_timeout=None,
17+
redis_key='dummy',
18+
should_task_lock=False,
19+
raise_task_lock_exception_on_collision=False,
20+
lock_extend_seconds=0,
21+
)
22+
23+
@pytest.fixture
24+
def target(self, task_lock_params: TaskLockParams) -> InMemoryTarget:
25+
return make_in_memory_target(target_key='dummy_key', task_lock_params=task_lock_params)
26+
27+
@pytest.fixture(autouse=True)
28+
def clear_repo(self) -> None:
29+
InMemoryCacheRepository().clear()
30+
31+
def test_dump_and_load_data(self, target: InMemoryTarget):
32+
dumped = 'dummy_data'
33+
target.dump(dumped)
34+
loaded = target.load()
35+
assert loaded == dumped
36+
37+
def test_exist(self, target: InMemoryTarget):
38+
assert not target.exists()
39+
target.dump('dummy_data')
40+
assert target.exists()
41+
42+
def test_last_modified_time(self, target: InMemoryTarget):
43+
input = 'dummy_data'
44+
target.dump(input)
45+
time = target.last_modification_time()
46+
assert isinstance(time, datetime)
47+
48+
sleep(0.1)
49+
another_input = 'another_data'
50+
target.dump(another_input)
51+
another_time = target.last_modification_time()
52+
assert time < another_time
53+
54+
target.remove()
55+
with pytest.raises(ValueError):
56+
assert target.last_modification_time()

test/in_memory/test_repository.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import time
2+
3+
import pytest
4+
5+
from gokart.in_memory import InMemoryCacheRepository as Repo
6+
7+
dummy_num = 100
8+
9+
10+
class TestInMemoryCacheRepository:
11+
@pytest.fixture
12+
def repo(self) -> Repo:
13+
repo = Repo()
14+
repo.clear()
15+
return repo
16+
17+
def test_set(self, repo: Repo):
18+
repo.set_value('dummy_key', dummy_num)
19+
assert repo.size == 1
20+
for key, value in repo.get_gen():
21+
assert (key, value) == ('dummy_key', dummy_num)
22+
23+
repo.set_value('another_key', 'another_value')
24+
assert repo.size == 2
25+
26+
def test_get(self, repo: Repo):
27+
repo.set_value('dummy_key', dummy_num)
28+
repo.set_value('another_key', 'another_value')
29+
30+
"""Raise Error when key doesn't exist."""
31+
with pytest.raises(KeyError):
32+
repo.get_value('not_exist_key')
33+
34+
assert repo.get_value('dummy_key') == dummy_num
35+
assert repo.get_value('another_key') == 'another_value'
36+
37+
def test_empty(self, repo: Repo):
38+
assert repo.empty()
39+
repo.set_value('dummmy_key', dummy_num)
40+
assert not repo.empty()
41+
42+
def test_has(self, repo: Repo):
43+
assert not repo.has('dummy_key')
44+
repo.set_value('dummy_key', dummy_num)
45+
assert repo.has('dummy_key')
46+
assert not repo.has('not_exist_key')
47+
48+
def test_remove(self, repo: Repo):
49+
repo.set_value('dummy_key', dummy_num)
50+
51+
with pytest.raises(AssertionError):
52+
repo.remove('not_exist_key')
53+
54+
repo.remove('dummy_key')
55+
assert not repo.has('dummy_key')
56+
57+
def test_last_modification_time(self, repo: Repo):
58+
repo.set_value('dummy_key', dummy_num)
59+
date1 = repo.get_last_modification_time('dummy_key')
60+
time.sleep(0.1)
61+
repo.set_value('dummy_key', dummy_num)
62+
date2 = repo.get_last_modification_time('dummy_key')
63+
assert date1 < date2

0 commit comments

Comments
 (0)