-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Actions before raising this issue
- I searched the existing issues and did not find anything similar.
- I read/searched the docs
Steps to Reproduce
import argparse
import tempfile
import zipfile
import time
from pathlib import Path
from urllib.parse import urlparse
from clearml import Dataset, Task
from cvat_sdk.api_client import ApiClient, Configuration
from cvat_sdk.exceptions import ApiException
# The sync_dataset_to_clearml function is unchanged...
def sync_dataset_to_clearml(project_name: str, dataset_name: str, local_path: Path) -> str:
print(f"Checking dataset '{dataset_name}' in project '{project_name}'...")
latest_remote_dataset = None
try:
latest_remote_dataset = Dataset.get(
dataset_project=project_name, dataset_name=dataset_name, only_latest=True)
remote_hash = latest_remote_dataset.file_hash
local_hash = Dataset.hash_local_folder(local_path)
if remote_hash == local_hash:
print("✅ The dataset is already synchronized. No changes found.")
return latest_remote_dataset.id
print("🔄 Changes detected. Creating a new version...")
except ValueError:
print(f"No existing dataset found. Creating first version.")
new_dataset = Dataset.create(
dataset_project=project_name, dataset_name=dataset_name,
parent_datasets=[latest_remote_dataset] if latest_remote_dataset else None)
new_dataset.add_files(path=local_path)
new_dataset.upload()
new_dataset.finalize()
print(f"✅ New version '{new_dataset.version}' created with ID: {new_dataset.id}")
return new_dataset.id
def main():
task = Task.init(
project_name="Dataset Management",
task_name="Sync CVAT Dataset",
task_type=Task.TaskTypes.data_processing)
parser = argparse.ArgumentParser(description="Synchronize a CVAT dataset to ClearML.")
parser.add_argument("--cvat-url", required=True, help="URL to the CVAT server.")
parser.add_argument("--cvat-user", required=True, help="Username for CVAT.")
parser.add_argument("--cvat-password", required=True, help="Password for CVAT.")
parser.add_argument("--cvat-project-id", type=int, required=True, help="The ID of the project in CVAT.")
parser.add_argument("--clearml-project", default="My Datasets", help="ClearML project for the dataset.")
parser.add_argument("--clearml-dataset-name", required=True, help="Name of the dataset in ClearML.")
args = parser.parse_args()
task.connect(args)
print("Connecting to CVAT server...")
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
dataset_zip_path = temp_path / "dataset.zip"
config = Configuration(host=args.cvat_url, username=args.cvat_user, password=args.cvat_password)
with ApiClient(config) as api_client:
print(f"Step 1: Starting export job for project ID: {args.cvat_project_id}...")
# --- CHANGE IS HERE ---
(request, _) = api_client.projects_api.create_dataset_export(
id=args.cvat_project_id,
format="Ultralytics YOLO Detection 1.0",
save_images=True # Tell CVAT that we want to include the images
)
# --- END OF CHANGE ---
rq_id = request.rq_id
print(f"Export job started with Request ID: {rq_id}")
print("Step 2: Waiting for the server to prepare the ZIP file...")
final_status_request = None
while True:
try:
(status_request, _) = api_client.requests_api.retrieve(id=rq_id)
status = status_request.status.value
if status == "finished":
print("✅ Export is complete!")
final_status_request = status_request
break
elif status == "failed":
raise Exception("The export job on the CVAT server failed.")
print(f"Status is '{status}'. Waiting 10 seconds...")
time.sleep(10)
except ApiException as e:
print(f"Error during status check: {e}. Retrying...")
time.sleep(10)
print("Step 3: Retrieving download URL from the completed job...")
download_url = final_status_request.result_url
if not download_url:
raise Exception("CVAT did not return a 'result_url'. Cannot download the dataset.")
print(f"Found download URL: {download_url}")
parsed_url = urlparse(download_url)
resource_path = parsed_url.path
if parsed_url.query:
resource_path += "?" + parsed_url.query
print(f"Step 4: Downloading ZIP file from resource path: {resource_path}...")
(_, response) = api_client.call_api(
resource_path=resource_path, method='GET',
auth_settings=['basicAuth', 'cookieAuth', 'tokenAuth'],
_parse_response=False, _request_timeout=600)
status_code = response.status
if status_code != 200:
raise Exception(f"Download from result_url failed with status code {status_code}")
print(f"Downloading ZIP file to {dataset_zip_path}...")
with open(dataset_zip_path, 'wb') as f:
f.write(response.data)
if dataset_zip_path.exists() and dataset_zip_path.stat().st_size > 100:
print(f"✅ ZIP file downloaded and has a size of {dataset_zip_path.stat().st_size} bytes.")
else:
raise FileNotFoundError("Downloading the ZIP file appears to have failed.")
unzip_dir = temp_path / "unzipped_data"
unzip_dir.mkdir()
print(f"Extracting the dataset to {unzip_dir}...")
with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
zip_ref.extractall(unzip_dir)
print("Contents of the extracted folder:")
for item in sorted(list(unzip_dir.iterdir())):
print(f"- {item.name}")
sync_dataset_to_clearml(
project_name=args.clearml_project,
dataset_name=args.clearml_dataset_name,
local_path=unzip_dir)
print("Job completed!")
if __name__ == "__main__":
main()
Annotation values are then:
data/images/train/01-08-2022__01-54-35PM_jpg.rf.618b38f02c5697287d80f5362f19ecf7.jpg
not
images/train/01-08-2022__01-54-35PM_jpg.rf.618b38f02c5697287d80f5362f19ecf7.jpg
as describe here:
https://docs.cvat.ai/docs/manual/advanced/formats/format-yolo-ultralytics/
Expected Behavior
No response
Possible Solution
No response
Context
No response
Environment
v2.43.0 kubernetes
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working