Skip to content

problem with yolo export #9799

@torsteinelv

Description

@torsteinelv

Actions before raising this issue

  • I searched the existing issues and did not find anything similar.
  • I read/searched the docs

Steps to Reproduce

import argparse
import tempfile
import zipfile
import time
from pathlib import Path
from urllib.parse import urlparse

from clearml import Dataset, Task
from cvat_sdk.api_client import ApiClient, Configuration
from cvat_sdk.exceptions import ApiException

# The sync_dataset_to_clearml function is unchanged...
def sync_dataset_to_clearml(project_name: str, dataset_name: str, local_path: Path) -> str:
    print(f"Checking dataset '{dataset_name}' in project '{project_name}'...")
    latest_remote_dataset = None
    try:
        latest_remote_dataset = Dataset.get(
            dataset_project=project_name, dataset_name=dataset_name, only_latest=True)
        remote_hash = latest_remote_dataset.file_hash
        local_hash = Dataset.hash_local_folder(local_path)
        if remote_hash == local_hash:
            print("✅ The dataset is already synchronized. No changes found.")
            return latest_remote_dataset.id
        print("🔄 Changes detected. Creating a new version...")
    except ValueError:
        print(f"No existing dataset found. Creating first version.")
    new_dataset = Dataset.create(
        dataset_project=project_name, dataset_name=dataset_name,
        parent_datasets=[latest_remote_dataset] if latest_remote_dataset else None)
    new_dataset.add_files(path=local_path)
    new_dataset.upload()
    new_dataset.finalize()
    print(f"✅ New version '{new_dataset.version}' created with ID: {new_dataset.id}")
    return new_dataset.id

def main():
    task = Task.init(
        project_name="Dataset Management",
        task_name="Sync CVAT Dataset",
        task_type=Task.TaskTypes.data_processing)
    parser = argparse.ArgumentParser(description="Synchronize a CVAT dataset to ClearML.")
    parser.add_argument("--cvat-url", required=True, help="URL to the CVAT server.")
    parser.add_argument("--cvat-user", required=True, help="Username for CVAT.")
    parser.add_argument("--cvat-password", required=True, help="Password for CVAT.")
    parser.add_argument("--cvat-project-id", type=int, required=True, help="The ID of the project in CVAT.")
    parser.add_argument("--clearml-project", default="My Datasets", help="ClearML project for the dataset.")
    parser.add_argument("--clearml-dataset-name", required=True, help="Name of the dataset in ClearML.")
    args = parser.parse_args()
    task.connect(args)

    print("Connecting to CVAT server...")
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)
        dataset_zip_path = temp_path / "dataset.zip"
        config = Configuration(host=args.cvat_url, username=args.cvat_user, password=args.cvat_password)
        with ApiClient(config) as api_client:
            print(f"Step 1: Starting export job for project ID: {args.cvat_project_id}...")
            # --- CHANGE IS HERE ---
            (request, _) = api_client.projects_api.create_dataset_export(
                id=args.cvat_project_id,
                format="Ultralytics YOLO Detection 1.0",
                save_images=True  # Tell CVAT that we want to include the images
            )
            # --- END OF CHANGE ---
            rq_id = request.rq_id
            print(f"Export job started with Request ID: {rq_id}")

            print("Step 2: Waiting for the server to prepare the ZIP file...")
            final_status_request = None
            while True:
                try:
                    (status_request, _) = api_client.requests_api.retrieve(id=rq_id)
                    status = status_request.status.value
                    if status == "finished":
                        print("✅ Export is complete!")
                        final_status_request = status_request
                        break
                    elif status == "failed":
                        raise Exception("The export job on the CVAT server failed.")
                    print(f"Status is '{status}'. Waiting 10 seconds...")
                    time.sleep(10)
                except ApiException as e:
                    print(f"Error during status check: {e}. Retrying...")
                    time.sleep(10)
            
            print("Step 3: Retrieving download URL from the completed job...")
            download_url = final_status_request.result_url
            if not download_url:
                raise Exception("CVAT did not return a 'result_url'. Cannot download the dataset.")
            print(f"Found download URL: {download_url}")
            
            parsed_url = urlparse(download_url)
            resource_path = parsed_url.path
            if parsed_url.query:
                resource_path += "?" + parsed_url.query
            
            print(f"Step 4: Downloading ZIP file from resource path: {resource_path}...")
            (_, response) = api_client.call_api(
                resource_path=resource_path, method='GET',
                auth_settings=['basicAuth', 'cookieAuth', 'tokenAuth'],
                _parse_response=False, _request_timeout=600)
            status_code = response.status
            if status_code != 200:
                raise Exception(f"Download from result_url failed with status code {status_code}")

            print(f"Downloading ZIP file to {dataset_zip_path}...")
            with open(dataset_zip_path, 'wb') as f:
                f.write(response.data)

            if dataset_zip_path.exists() and dataset_zip_path.stat().st_size > 100:
                print(f"✅ ZIP file downloaded and has a size of {dataset_zip_path.stat().st_size} bytes.")
            else:
                raise FileNotFoundError("Downloading the ZIP file appears to have failed.")

        unzip_dir = temp_path / "unzipped_data"
        unzip_dir.mkdir()
        print(f"Extracting the dataset to {unzip_dir}...")
        with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
            zip_ref.extractall(unzip_dir)
            
        print("Contents of the extracted folder:")
        for item in sorted(list(unzip_dir.iterdir())):
            print(f"- {item.name}")
            
        sync_dataset_to_clearml(
            project_name=args.clearml_project,
            dataset_name=args.clearml_dataset_name,
            local_path=unzip_dir)
    
    print("Job completed!")

if __name__ == "__main__":
    main()

Annotation values are then:

data/images/train/01-08-2022__01-54-35PM_jpg.rf.618b38f02c5697287d80f5362f19ecf7.jpg

not

images/train/01-08-2022__01-54-35PM_jpg.rf.618b38f02c5697287d80f5362f19ecf7.jpg

as describe here:
https://docs.cvat.ai/docs/manual/advanced/formats/format-yolo-ultralytics/

Expected Behavior

No response

Possible Solution

No response

Context

No response

Environment

v2.43.0 kubernetes

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions