1+ import click
2+ import pathlib
3+ import os
4+ import runpy
5+ from typing import List
6+
7+ from fairscape_cli .tracking .io_capture import IOCapture
8+ from fairscape_cli .tracking .provenance_tracker import ProvenanceTracker
9+ from fairscape_cli .tracking .config import ProvenanceConfig , TrackerConfig
10+ from fairscape_cli .tracking .metadata_generator import create_metadata_generator
11+
12+
13+ @click .command ('track' )
14+ @click .argument ('script-path' , type = click .Path (exists = True , path_type = pathlib .Path ))
15+ @click .option ('--rocrate-path' , type = click .Path (path_type = pathlib .Path ), default = None , help = 'Path to RO-Crate directory (default: current directory)' )
16+ @click .option ('--author' , type = str , default = "Unknown" , help = 'Author name (default: from RO-Crate or "Unknown")' )
17+ @click .option ('--keywords' , multiple = True , default = ["computation" ], help = 'Keywords for metadata (default: from RO-Crate or ["computation"])' )
18+ @click .option ('--input' , 'manual_inputs' , multiple = True , help = 'Manual input files to track' )
19+ @click .option ('--no-llm' , is_flag = True , default = False , help = 'Disable LLM-based description generation' )
20+ @click .option ('--execution-name' , type = str , default = None , help = 'Name for this execution (default: script filename)' )
21+ @click .pass_context
22+ def track (
23+ ctx ,
24+ script_path : pathlib .Path ,
25+ rocrate_path : pathlib .Path ,
26+ author : str ,
27+ keywords : List [str ],
28+ manual_inputs : List [str ],
29+ no_llm : bool ,
30+ execution_name : str
31+ ):
32+ """Track execution of a Python script and generate provenance metadata.
33+
34+ Executes SCRIPT_PATH while capturing file I/O operations, then generates
35+ RO-Crate metadata documenting the computation, software, input datasets,
36+ and output datasets.
37+
38+ Examples:
39+
40+ fairscape-cli track analysis.py
41+
42+ fairscape-cli track analysis.py --author "Jane Doe" --keywords ml analysis
43+
44+ fairscape-cli track analysis.py --rocrate-path ./my-crate --input config.json
45+
46+ fairscape-cli track analysis.py --no-llm --author "John Smith"
47+ """
48+
49+ rocrate_path = rocrate_path or pathlib .Path .cwd ()
50+
51+ if not script_path .exists ():
52+ click .echo (f"ERROR: Script file not found: { script_path } " , err = True )
53+ ctx .exit (code = 1 )
54+
55+ try :
56+ with script_path .open ('r' ) as f :
57+ code = f .read ()
58+ except Exception as exc :
59+ click .echo (f"ERROR: Could not read script file: { exc } " , err = True )
60+ ctx .exit (code = 1 )
61+
62+ tracker_config = TrackerConfig ()
63+
64+ original_cwd = pathlib .Path .cwd ()
65+ script_dir = script_path .parent .resolve ()
66+
67+ try :
68+ os .chdir (script_dir )
69+
70+ with IOCapture (config = tracker_config ) as capture :
71+ try :
72+ runpy .run_path (str (script_path ), run_name = '__main__' )
73+ except SystemExit as e :
74+ if e .code != 0 :
75+ click .echo (f"WARNING: Script exited with code { e .code } " , err = True )
76+ except Exception as exc :
77+ click .echo (f"ERROR: Script execution failed: { exc } " , err = True )
78+ ctx .exit (code = 1 )
79+ finally :
80+ os .chdir (original_cwd )
81+
82+ if not capture .inputs and not capture .outputs and not manual_inputs :
83+ click .echo ("WARNING: No file I/O detected in script execution" , err = True )
84+ click .echo ("No metadata generated." , err = True )
85+ return
86+
87+ use_llm = not no_llm and os .environ .get ("GEMINI_API_KEY" )
88+
89+ metadata_generator = None
90+ if use_llm :
91+ from datetime import datetime
92+ try :
93+ metadata_generator = create_metadata_generator (
94+ provider = "gemini" ,
95+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
96+ )
97+ except Exception as exc :
98+ click .echo (f"WARNING: Could not initialize LLM metadata generator: { exc } " , err = True )
99+ click .echo ("Falling back to simple descriptions" , err = True )
100+
101+ provenance_config = ProvenanceConfig (
102+ rocrate_path = rocrate_path ,
103+ author = author ,
104+ keywords = list (keywords ),
105+ manual_inputs = list (manual_inputs ),
106+ use_llm = use_llm
107+ )
108+
109+ try :
110+ tracker = ProvenanceTracker (
111+ config = provenance_config ,
112+ metadata_generator = metadata_generator
113+ )
114+
115+ exec_name = execution_name or script_path .stem
116+
117+ result = tracker .track_execution (code , capture , execution_name = exec_name )
118+
119+ click .echo (result .computation_guid )
120+
121+ if ctx .obj and ctx .obj .get ('verbose' ):
122+ click .echo (f"\n Tracking Summary:" , err = True )
123+ click .echo (f" Software: { result .software_guid } " , err = True )
124+ click .echo (f" Inputs: { result .input_count } datasets ({ result .reused_count } reused)" , err = True )
125+ click .echo (f" Outputs: { result .output_count } datasets" , err = True )
126+
127+ except ValueError as exc :
128+ click .echo (f"ERROR: { exc } " , err = True )
129+ ctx .exit (code = 1 )
130+ except RuntimeError as exc :
131+ click .echo (f"ERROR: { exc } " , err = True )
132+ ctx .exit (code = 1 )
133+ except Exception as exc :
134+ click .echo (f"ERROR: Tracking failed: { exc } " , err = True )
135+ ctx .exit (code = 1 )
0 commit comments