add optional 'strict' parameter to decompression functions

valgur · valgur · commit c65888782fbc · 2022-01-21T11:30:56.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 See also [CHANGES.md](rnxcmp/docs/CHANGES.md) of the original RNXCMP software package.
 
+## [2.6.0] - 2022-01-21
+
+- Added `strict` parameter to decompression methods, which defaults to False.
+  `ValueError` for non-RINEX files is only raised when `strict=True`. 
+
 ## [2.5.0] - 2022-01-10
 
 - Decompression now raises a `ValueError` if the decompressed file lacks a valid RINEX header record.
@@ -47,8 +52,9 @@ First release.
 - Provide Hatanaka decompression / compression support via `crx2rnx` and `rnx2crx` functions.
 - Install `crx2rnx` and `rnx2crx` as command line executables.
 
+[2.6.0]: https://github.com/valgur/hatanaka/compare/v2.5.0...v2.6.0
+[2.5.0]: https://github.com/valgur/hatanaka/compare/v2.4.0...v2.5.0
 [2.4.0]: https://github.com/valgur/hatanaka/compare/v2.3.0...v2.4.0
-
 [2.3.0]: https://github.com/valgur/hatanaka/compare/v2.2.0...v2.3.0
 [2.2.0]: https://github.com/valgur/hatanaka/compare/v2.1.0...v2.2.0
 [2.1.0]: https://github.com/valgur/hatanaka/compare/v2.0.0...v2.1.0
diff --git a/hatanaka/__init__.py b/hatanaka/__init__.py
@@ -1,5 +1,5 @@
 from .general_compression import *
 from .hatanaka import *
 
-__version__ = '2.5.0'
+__version__ = '2.6.0'
 rnxcmp_version = '4.0.8'
diff --git a/hatanaka/general_compression.py b/hatanaka/general_compression.py
@@ -19,7 +19,7 @@
 
 
 def decompress(content: Union[Path, str, bytes], *,
-               skip_strange_epochs: bool = False) -> bytes:
+               skip_strange_epochs: bool = False, strict: bool = False) -> bytes:
     """Decompress compressed RINEX files.
 
     Any RINEX files compressed with Hatanaka compression (.crx|.##d) and/or with a conventional
@@ -41,6 +41,8 @@ def decompress(content: Union[Path, str, bytes], *,
         Using this together with of reinit_every_nth option of rnx2crx may be effective.
         Caution: It is assumed that no change in the list of data types happens in the
         lost part of the data.
+    strict : bool, default False
+        If True, a ValueError is raised if the decoded file is not RINEX.
 
     Returns
     -------
@@ -55,14 +57,14 @@ def decompress(content: Union[Path, str, bytes], *,
         For invalid file contents.
     """
     if isinstance(content, (Path, str)):
-        content = _decompress(Path(content).read_bytes(), skip_strange_epochs)[1]
+        content = _decompress(Path(content).read_bytes(), skip_strange_epochs, strict)[1]
     elif not isinstance(content, bytes):
         raise ValueError('input must be either a path or a binary string')
-    return _decompress(content, skip_strange_epochs)[1]
+    return _decompress(content, skip_strange_epochs, strict)[1]
 
 
 def decompress_on_disk(path: Union[Path, str], *, delete: bool = False,
-                       skip_strange_epochs: bool = False) -> Path:
+                       skip_strange_epochs: bool = False, strict: bool = False) -> Path:
     """Decompress compressed RINEX files and write the resulting file to disk.
 
     Any RINEX files compressed with Hatanaka compression (.crx|.##d) and/or with a conventional
@@ -86,6 +88,8 @@ def decompress_on_disk(path: Union[Path, str], *, delete: bool = False,
         Using this together with of reinit_every_nth option of rnx2crx may be effective.
         Caution: It is assumed that no change in the list of data types happens in the
         lost part of the data.
+    strict : bool, default False
+        If True, a ValueError is raised if the decoded file is not RINEX.
 
     Returns
     -------
@@ -101,7 +105,7 @@ def decompress_on_disk(path: Union[Path, str], *, delete: bool = False,
     """
     path = Path(path)
     with _record_warnings() as warning_list:
-        is_obs, txt = _decompress(path.read_bytes(), skip_strange_epochs=skip_strange_epochs)
+        is_obs, txt = _decompress(path.read_bytes(), skip_strange_epochs, strict)
     out_path = get_decompressed_path(path)
     if out_path == path:
         # file does not need decompressing
@@ -307,15 +311,15 @@ def _is_bz2(magic_bytes: bytes) -> bool:
     return magic_bytes == b'\x42\x5A'
 
 
-def _decompress(txt: bytes, skip_strange_epochs: bool) -> (bool, bytes):
+def _decompress(txt: bytes, skip_strange_epochs: bool, strict: bool) -> (bool, bytes):
     if len(txt) < 2:
         raise ValueError('empty file')
     magic_bytes = txt[:2]
 
     if _is_gz(magic_bytes):
-        return _decompress_hatanaka(gzip.decompress(txt), skip_strange_epochs)
+        return _decompress_hatanaka(gzip.decompress(txt), skip_strange_epochs, strict)
     if _is_bz2(magic_bytes):
-        return _decompress_hatanaka(bz2.decompress(txt), skip_strange_epochs)
+        return _decompress_hatanaka(bz2.decompress(txt), skip_strange_epochs, strict)
     elif _is_zip(magic_bytes):
         with zipfile.ZipFile(BytesIO(txt), 'r') as z:
             flist = z.namelist()
@@ -324,21 +328,21 @@ def _decompress(txt: bytes, skip_strange_epochs: bool) -> (bool, bytes):
             elif len(flist) > 1:
                 raise ValueError('more than one file in zip archive')
             with z.open(flist[0], 'r') as f:
-                return _decompress_hatanaka(f.read(), skip_strange_epochs)
+                return _decompress_hatanaka(f.read(), skip_strange_epochs, strict)
     elif _is_lzw(magic_bytes):
-        return _decompress_hatanaka(lzw.decompress(txt), skip_strange_epochs)
+        return _decompress_hatanaka(lzw.decompress(txt), skip_strange_epochs, strict)
     else:
-        return _decompress_hatanaka(txt, skip_strange_epochs)
+        return _decompress_hatanaka(txt, skip_strange_epochs, strict)
 
 
-def _decompress_hatanaka(txt: bytes, skip_strange_epochs) -> (bool, bytes):
+def _decompress_hatanaka(txt: bytes, skip_strange_epochs, strict) -> (bool, bytes):
     if len(txt) < 80:
         raise ValueError('file is too short to be a valid RINEX file')
     header = txt[:80]
     is_crinex = b'COMPACT RINEX' in header
     if is_crinex:
         txt = crx2rnx(txt, skip_strange_epochs=skip_strange_epochs)
-    elif not header.endswith(b'RINEX VERSION / TYPE'):
+    elif strict and not header.endswith(b'RINEX VERSION / TYPE'):
         raise ValueError('not a valid RINEX file')
     is_obs = b'OBSERVATION DATA' in txt[:80]
     return is_obs, txt
diff --git a/hatanaka/test/test_on_disk_functions.py b/hatanaka/test/test_on_disk_functions.py
@@ -112,7 +112,7 @@ def test_on_disk_invalid_input(tmp_path):
     path = tmp_path / 'sample.crx'
     path.write_bytes(b'blah' * 100)
     with pytest.raises(ValueError) as excinfo:
-        decompress_on_disk(path)
+        decompress_on_disk(path, strict=True)
     msg = excinfo.value.args[0]
     assert 'not a valid RINEX file' in msg
     assert not get_compressed_path(path).exists()
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = hatanaka
-version = 2.5.0
+version = 2.6.0
 author = Martin Valgur
 author_email = martin.valgur@gmail.com
 url = https://github.com/valgur/hatanaka