|
1 | 1 | import csv |
2 | 2 | import io |
3 | 3 | import logging |
4 | | -from typing import IO, Any, Callable, Dict, Iterator, Optional |
| 4 | +from typing import IO, Any, Callable, Dict, Iterator, Optional, Union |
5 | 5 |
|
6 | 6 | import ijson # type: ignore |
7 | 7 |
|
| 8 | +from .files import OpenedFileRef |
| 9 | + |
8 | 10 | logger = logging.getLogger(__name__) |
9 | 11 |
|
10 | 12 |
|
11 | 13 | def json_records( |
12 | 14 | json_path: str = "item", multiple_values: Optional[bool] = False |
13 | | -) -> Callable[[Iterator[IO[bytes]]], Iterator[Dict[str, Any]]]: |
| 15 | +) -> Callable[[Iterator[Union[IO[bytes], OpenedFileRef]]], Iterator[Dict[str, Any]]]: |
14 | 16 | logger.info(f"Initializing json reader for {json_path}") |
15 | 17 |
|
16 | | - def json_records_func(json_files: Iterator[IO[bytes]]) -> Iterator[Dict[str, Any]]: |
| 18 | + def json_records_func( |
| 19 | + json_files: Iterator[Union[IO[bytes], OpenedFileRef]] |
| 20 | + ) -> Iterator[Dict[str, Any]]: |
17 | 21 | for json_file in json_files: |
18 | | - logger.info(f"Reading json file {json_file}") |
19 | | - records = ijson.items(json_file, json_path, multiple_values=multiple_values) |
| 22 | + if isinstance(json_file, OpenedFileRef): |
| 23 | + contents = json_file.contents |
| 24 | + logger.info(f"Reading json file {json_file.name}") |
| 25 | + else: |
| 26 | + contents = json_file |
| 27 | + logger.info(f"Reading json file {json_file}") |
| 28 | + records = ijson.items(contents, json_path, multiple_values=multiple_values) |
20 | 29 | yield from records |
21 | 30 |
|
22 | 31 | return json_records_func |
23 | 32 |
|
24 | 33 |
|
25 | | -def csv_records() -> Callable[[Iterator[IO[bytes]]], Iterator[Dict[str, Any]]]: |
26 | | - def csv_records_func(csv_paths: Iterator[IO[bytes]]) -> Iterator[Dict[str, Any]]: |
27 | | - for csv_path in csv_paths: |
28 | | - logger.info(f"Reading csv file {csv_path}") |
| 34 | +def csv_records() -> ( |
| 35 | + Callable[[Iterator[Union[IO[bytes], OpenedFileRef]]], Iterator[Dict[str, Any]]] |
| 36 | +): |
| 37 | + logger.info("Initializing csv reader") |
| 38 | + |
| 39 | + def csv_records_func( |
| 40 | + csv_files: Iterator[Union[IO[bytes], OpenedFileRef]] |
| 41 | + ) -> Iterator[Dict[str, Any]]: |
| 42 | + for csv_file in csv_files: |
| 43 | + if isinstance(csv_file, OpenedFileRef): |
| 44 | + contents = csv_file.contents |
| 45 | + logger.info(f"Reading csv file {csv_file.name}") |
| 46 | + else: |
| 47 | + contents = csv_file |
| 48 | + logger.info(f"Reading csv file {csv_file}") |
29 | 49 | csv_reader = csv.DictReader( |
30 | | - io.TextIOWrapper(csv_path, "utf-8"), delimiter="," |
| 50 | + io.TextIOWrapper(contents, "utf-8"), delimiter="," |
31 | 51 | ) |
32 | 52 | yield from csv_reader |
33 | 53 |
|
|
0 commit comments