Skip to content

Commit a26f459

Browse files
authored
Fix scan (#51)
1 parent 6a4ece4 commit a26f459

File tree

4 files changed

+300
-8
lines changed

4 files changed

+300
-8
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ After modifying the `entrypoint.py` as needed, using any dependencies you add in
7777
4. The SDK automatically packages all dependencies when you run `datacustomcode zip`
7878

7979
```zsh
80+
cd my_package
8081
datacustomcode scan ./payload/entrypoint.py
8182
datacustomcode deploy --path ./payload --name my_custom_script --cpu-size CPU_L
8283
```
@@ -183,7 +184,7 @@ Options:
183184

184185

185186
#### `datacustomcode init`
186-
Initialize a new development environment with a template.
187+
Initialize a new development environment with a code package template.
187188

188189
Argument:
189190
- `DIRECTORY`: Directory to create project in (default: ".")
@@ -213,19 +214,19 @@ Options:
213214

214215

215216
#### `datacustomcode zip`
216-
Zip a transformation job in preparation to upload to Data Cloud.
217+
Zip a transformation job in preparation to upload to Data Cloud. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command.
217218

218219
Options:
219-
- `--path TEXT`: Path to the code directory (default: ".")
220+
- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".")
220221
- `--network TEXT`: docker network (default: "default")
221222

222223

223224
#### `datacustomcode deploy`
224-
Deploy a transformation job to Data Cloud.
225+
Deploy a transformation job to Data Cloud. Note that this command takes care of creating a zip file from provided path before deployment. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command.
225226

226227
Options:
227228
- `--profile TEXT`: Credential profile name (default: "default")
228-
- `--path TEXT`: Path to the code directory (default: ".")
229+
- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".")
229230
- `--name TEXT`: Name of the transformation job [required]
230231
- `--version TEXT`: Version of the transformation job (default: "0.0.1")
231232
- `--description TEXT`: Description of the transformation job (default: "")

src/datacustomcode/deploy.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,29 @@ def _make_api_call(
9191
logger.debug(f"Request params: {kwargs}")
9292

9393
response = requests.request(method=method, url=url, headers=headers, **kwargs)
94-
json_response = response.json()
9594
if response.status_code >= 400:
96-
logger.debug(f"Error Response: {json_response}")
95+
logger.debug(f"Error Response Status: {response.status_code}")
96+
logger.debug(f"Error Response Headers: {response.headers}")
97+
logger.debug(f"Error Response Text: {response.text[:500]}")
98+
99+
if not response.text or response.text.strip() == "":
100+
response.raise_for_status()
101+
raise ValueError(
102+
f"Received empty response from {method} {url}. "
103+
f"Status code: {response.status_code}"
104+
)
105+
106+
try:
107+
json_response = response.json()
108+
except requests.exceptions.JSONDecodeError as e:
109+
logger.error(f"Failed to parse JSON response. Status: {response.status_code}")
110+
logger.error(f"Response text: {response.text[:500]}")
111+
raise ValueError(
112+
f"Invalid JSON response from {method} {url}. "
113+
f"Status code: {response.status_code}, "
114+
f"Response: {response.text[:200]}"
115+
) from e
116+
97117
response.raise_for_status()
98118
assert isinstance(
99119
json_response, dict

src/datacustomcode/scan.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from __future__ import annotations
1616

1717
import ast
18+
import json
19+
import logging
1820
import os
1921
import sys
2022
from typing import (
@@ -29,12 +31,14 @@
2931

3032
from datacustomcode.version import get_version
3133

34+
logger = logging.getLogger(__name__)
35+
3236
DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"]
3337

3438
DATA_TRANSFORM_CONFIG_TEMPLATE = {
3539
"sdkVersion": get_version(),
3640
"entryPoint": "",
37-
"dataspace": "default",
41+
"dataspace": "",
3842
"permissions": {
3943
"read": {},
4044
"write": {},
@@ -232,6 +236,40 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
232236
config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
233237
config["entryPoint"] = file_path.rpartition("/")[-1]
234238

239+
file_dir = os.path.dirname(file_path)
240+
config_json_path = os.path.join(file_dir, "config.json")
241+
242+
if os.path.exists(config_json_path) and os.path.isfile(config_json_path):
243+
try:
244+
with open(config_json_path, "r") as f:
245+
existing_config = json.load(f)
246+
247+
if "dataspace" in existing_config:
248+
dataspace_value = existing_config["dataspace"]
249+
if not dataspace_value or (
250+
isinstance(dataspace_value, str) and dataspace_value.strip() == ""
251+
):
252+
logger.warning(
253+
f"dataspace in {config_json_path} is empty or None. "
254+
f"Updating config file to use dataspace 'default'. "
255+
)
256+
config["dataspace"] = "default"
257+
else:
258+
config["dataspace"] = dataspace_value
259+
else:
260+
raise ValueError(
261+
f"dataspace must be defined in {config_json_path}. "
262+
f"Please add a 'dataspace' field to the config.json file. "
263+
)
264+
except json.JSONDecodeError as e:
265+
raise ValueError(
266+
f"Failed to parse JSON from {config_json_path}: {e}"
267+
) from e
268+
except OSError as e:
269+
raise OSError(f"Failed to read config file {config_json_path}: {e}") from e
270+
else:
271+
config["dataspace"] = "default"
272+
235273
read: dict[str, list[str]] = {}
236274
if output.read_dlo:
237275
read["dlo"] = list(output.read_dlo)
@@ -244,4 +282,5 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
244282
write["dmo"] = list(output.write_to_dmo)
245283

246284
config["permissions"] = {"read": read, "write": write}
285+
247286
return config

tests/test_scan.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,238 @@ def test_dmo_to_dmo_config(self):
358358
finally:
359359
os.remove(temp_path)
360360

361+
@patch(
362+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
363+
{
364+
"sdkVersion": "1.2.3",
365+
"entryPoint": "",
366+
"dataspace": "",
367+
"permissions": {
368+
"read": {},
369+
"write": {},
370+
},
371+
},
372+
)
373+
def test_preserves_existing_dataspace(self):
374+
"""Test that existing dataspace value is preserved when config.json exists."""
375+
import json
376+
377+
content = textwrap.dedent(
378+
"""
379+
from datacustomcode.client import Client
380+
381+
client = Client()
382+
df = client.read_dlo("input_dlo")
383+
client.write_to_dlo("output_dlo", df, "overwrite")
384+
"""
385+
)
386+
temp_path = create_test_script(content)
387+
file_dir = os.path.dirname(temp_path)
388+
config_path = os.path.join(file_dir, "config.json")
389+
390+
try:
391+
# Create an existing config.json with a custom dataspace
392+
existing_config = {
393+
"sdkVersion": "1.0.0",
394+
"entryPoint": "test.py",
395+
"dataspace": "my_custom_dataspace",
396+
"permissions": {
397+
"read": {"dlo": ["old_dlo"]},
398+
"write": {"dlo": ["old_output"]},
399+
},
400+
}
401+
with open(config_path, "w") as f:
402+
json.dump(existing_config, f)
403+
404+
# Generate new config - should preserve dataspace
405+
result = dc_config_json_from_file(temp_path)
406+
assert result["dataspace"] == "my_custom_dataspace"
407+
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
408+
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
409+
finally:
410+
os.remove(temp_path)
411+
if os.path.exists(config_path):
412+
os.remove(config_path)
413+
414+
@patch(
415+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
416+
{
417+
"sdkVersion": "1.2.3",
418+
"entryPoint": "",
419+
"dataspace": "",
420+
"permissions": {
421+
"read": {},
422+
"write": {},
423+
},
424+
},
425+
)
426+
def test_uses_default_for_empty_dataspace(self, caplog):
427+
"""Test that empty dataspace value uses default and logs warning."""
428+
import json
429+
import logging
430+
431+
content = textwrap.dedent(
432+
"""
433+
from datacustomcode.client import Client
434+
435+
client = Client()
436+
df = client.read_dlo("input_dlo")
437+
client.write_to_dlo("output_dlo", df, "overwrite")
438+
"""
439+
)
440+
temp_path = create_test_script(content)
441+
file_dir = os.path.dirname(temp_path)
442+
config_path = os.path.join(file_dir, "config.json")
443+
444+
try:
445+
# Create an existing config.json with empty dataspace
446+
existing_config = {
447+
"sdkVersion": "1.0.0",
448+
"entryPoint": "test.py",
449+
"dataspace": "",
450+
"permissions": {
451+
"read": {"dlo": ["old_dlo"]},
452+
"write": {"dlo": ["old_output"]},
453+
},
454+
}
455+
with open(config_path, "w") as f:
456+
json.dump(existing_config, f)
457+
458+
# Should use "default" for empty dataspace (not raise error)
459+
with caplog.at_level(logging.WARNING):
460+
result = dc_config_json_from_file(temp_path)
461+
462+
assert result["dataspace"] == "default"
463+
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
464+
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
465+
466+
# Verify that a warning was logged
467+
assert len(caplog.records) > 0
468+
assert any(
469+
"dataspace" in record.message.lower()
470+
and "empty" in record.message.lower()
471+
for record in caplog.records
472+
)
473+
finally:
474+
os.remove(temp_path)
475+
if os.path.exists(config_path):
476+
os.remove(config_path)
477+
478+
@patch(
479+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
480+
{
481+
"sdkVersion": "1.2.3",
482+
"entryPoint": "",
483+
"dataspace": "",
484+
"permissions": {
485+
"read": {},
486+
"write": {},
487+
},
488+
},
489+
)
490+
def test_uses_default_dataspace_when_no_config(self):
491+
"""Test missing config.json uses default dataspace."""
492+
content = textwrap.dedent(
493+
"""
494+
from datacustomcode.client import Client
495+
496+
client = Client()
497+
df = client.read_dlo("input_dlo")
498+
client.write_to_dlo("output_dlo", df, "overwrite")
499+
"""
500+
)
501+
temp_path = create_test_script(content)
502+
503+
try:
504+
# No existing config.json - should use "default" dataspace
505+
result = dc_config_json_from_file(temp_path)
506+
assert result["dataspace"] == "default"
507+
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
508+
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
509+
finally:
510+
os.remove(temp_path)
511+
512+
@patch(
513+
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
514+
{
515+
"sdkVersion": "1.2.3",
516+
"entryPoint": "",
517+
"dataspace": "",
518+
"permissions": {
519+
"read": {},
520+
"write": {},
521+
},
522+
},
523+
)
524+
def test_rejects_missing_dataspace(self):
525+
"""Test that config.json missing dataspace field raises ValueError."""
526+
import json
527+
528+
content = textwrap.dedent(
529+
"""
530+
from datacustomcode.client import Client
531+
532+
client = Client()
533+
df = client.read_dlo("input_dlo")
534+
client.write_to_dlo("output_dlo", df, "overwrite")
535+
"""
536+
)
537+
temp_path = create_test_script(content)
538+
file_dir = os.path.dirname(temp_path)
539+
config_path = os.path.join(file_dir, "config.json")
540+
541+
try:
542+
# Create an existing config.json without dataspace field
543+
existing_config = {
544+
"sdkVersion": "1.0.0",
545+
"entryPoint": "test.py",
546+
"permissions": {
547+
"read": {"dlo": ["old_dlo"]},
548+
"write": {"dlo": ["old_output"]},
549+
},
550+
}
551+
with open(config_path, "w") as f:
552+
json.dump(existing_config, f)
553+
554+
# Should raise ValueError when dataspace field is missing
555+
with pytest.raises(
556+
ValueError, match="dataspace must be defined in.*config.json"
557+
):
558+
dc_config_json_from_file(temp_path)
559+
finally:
560+
os.remove(temp_path)
561+
if os.path.exists(config_path):
562+
os.remove(config_path)
563+
564+
def test_raises_error_on_invalid_json(self):
565+
"""Test that invalid JSON in config.json raises an error."""
566+
567+
content = textwrap.dedent(
568+
"""
569+
from datacustomcode.client import Client
570+
571+
client = Client()
572+
df = client.read_dlo("input_dlo")
573+
client.write_to_dlo("output_dlo", df, "overwrite")
574+
"""
575+
)
576+
temp_path = create_test_script(content)
577+
file_dir = os.path.dirname(temp_path)
578+
config_path = os.path.join(file_dir, "config.json")
579+
580+
try:
581+
# Create an invalid JSON file
582+
with open(config_path, "w") as f:
583+
f.write("{ invalid json }")
584+
585+
# Should raise ValueError for invalid JSON
586+
with pytest.raises(ValueError, match="Failed to parse JSON"):
587+
dc_config_json_from_file(temp_path)
588+
finally:
589+
os.remove(temp_path)
590+
if os.path.exists(config_path):
591+
os.remove(config_path)
592+
361593

362594
class TestDataAccessLayerCalls:
363595
"""Tests for the DataAccessLayerCalls class directly."""

0 commit comments

Comments
 (0)