From 7c34076a20420f05a3906f76656d4028da43de65 Mon Sep 17 00:00:00 2001 From: Shahzaib-Hamid Date: Tue, 14 Oct 2025 22:30:38 +0500 Subject: [PATCH 1/4] feat:Added Backblaze B2 Integration --- diagnose_b2_export.py | 146 ++++++ label_studio/core/settings/base.py | 9 + label_studio/io_storages/b2/__init__.py | 3 + label_studio/io_storages/b2/api.py | 321 ++++++++++++ label_studio/io_storages/b2/form_layout.yml | 155 ++++++ label_studio/io_storages/b2/models.py | 461 ++++++++++++++++++ label_studio/io_storages/b2/openapi_schema.py | 95 ++++ label_studio/io_storages/b2/serializers.py | 146 ++++++ label_studio/io_storages/b2/utils.py | 304 ++++++++++++ label_studio/io_storages/functions.py | 7 + .../migrations/0022_add_b2_storage_models.py | 125 +++++ label_studio/io_storages/models.py | 6 + label_studio/io_storages/urls.py | 29 ++ show_b2_files.py | 123 +++++ test_b2_upload.py | 83 ++++ .../Settings/StorageSettings/providers/b2.ts | 129 +++++ .../StorageSettings/providers/index.ts | 2 + 17 files changed, 2144 insertions(+) create mode 100644 diagnose_b2_export.py create mode 100644 label_studio/io_storages/b2/__init__.py create mode 100644 label_studio/io_storages/b2/api.py create mode 100644 label_studio/io_storages/b2/form_layout.yml create mode 100644 label_studio/io_storages/b2/models.py create mode 100644 label_studio/io_storages/b2/openapi_schema.py create mode 100644 label_studio/io_storages/b2/serializers.py create mode 100644 label_studio/io_storages/b2/utils.py create mode 100644 label_studio/io_storages/migrations/0022_add_b2_storage_models.py create mode 100644 show_b2_files.py create mode 100644 test_b2_upload.py create mode 100644 web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts diff --git a/diagnose_b2_export.py b/diagnose_b2_export.py new file mode 100644 index 000000000000..e7ee38b364a6 --- /dev/null +++ b/diagnose_b2_export.py @@ -0,0 +1,146 @@ +""" +Diagnostic script to check B2 export storage configuration +""" +import os +import sys +import django + +# Add label_studio to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'label_studio')) + +# Setup Django +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings.label_studio') +django.setup() + +from io_storages.b2.models import B2ExportStorage, B2ImportStorage, B2ExportStorageLink +from projects.models import Project +from tasks.models import Annotation as AnnotationModel + +def diagnose(): + print("=" * 60) + print("B2 Export Storage Diagnostic") + print("=" * 60) + print() + + # Check B2 models exist + print("[1] Checking B2 Models...") + try: + print(f" B2ImportStorage: {B2ImportStorage}") + print(f" B2ExportStorage: {B2ExportStorage}") + print(" [OK] B2 models imported") + except Exception as e: + print(f" [ERROR] {e}") + return + + print() + + # List all B2 export storages + print("[2] Checking B2 Export Storages in Database...") + export_storages = B2ExportStorage.objects.all() + print(f" Total B2 Export Storages: {export_storages.count()}") + + for storage in export_storages: + print(f" - ID: {storage.id}") + print(f" Title: {storage.title}") + print(f" Bucket: {storage.bucket}") + print(f" Prefix: {storage.prefix}") + print(f" Project: {storage.project.title if storage.project else 'None'}") + print(f" Project ID: {storage.project.id if storage.project else 'None'}") + print(f" Endpoint: {storage.b2_endpoint_url}") + + if export_storages.count() == 0: + print(" [WARNING] No B2 export storages configured!") + print(" Please configure one in UI: Settings -> Cloud Storage -> Add Target Storage") + return + + print() + + # Check related name + print("[3] Checking Related Name Access...") + projects = Project.objects.all() + print(f" Total Projects: {projects.count()}") + + for project in projects: + print(f" Project: {project.title} (ID: {project.id})") + + # Try to access B2 export storages via related name + try: + b2_storages = project.io_storages_b2exportstorages.all() + print(f" B2 Export Storages: {b2_storages.count()}") + for storage in b2_storages: + print(f" - {storage.title} (Bucket: {storage.bucket})") + except AttributeError as e: + print(f" [ERROR] Cannot access io_storages_b2exportstorages: {e}") + print(f" This means the related_name might be wrong!") + + print() + + # Check recent annotations + print("[4] Checking Recent Annotations...") + annotations = AnnotationModel.objects.all().order_by('-id')[:5] + print(f" Total Annotations: {AnnotationModel.objects.count()}") + print(f" Recent 5:") + + for ann in annotations: + print(f" - Annotation ID: {ann.id}") + print(f" Task ID: {ann.task.id}") + print(f" Project: {ann.project.title if ann.project else 'N/A'}") + print(f" Created: {ann.created_at}") + + # Check if this annotation has export links + links = B2ExportStorageLink.objects.filter(annotation=ann) + print(f" B2 Export Links: {links.count()}") + for link in links: + print(f" - Storage: {link.storage.title}") + + print() + + # Check signal registration + print("[5] Checking Django Signal Registration...") + from django.db.models.signals import post_save + + receivers = post_save._live_receivers(AnnotationModel) + print(f" Total post_save receivers for Annotation: {len(receivers)}") + + b2_receiver_found = False + for receiver in receivers: + receiver_name = receiver.__name__ if hasattr(receiver, '__name__') else str(receiver) + print(f" - {receiver_name}") + if 'b2' in receiver_name.lower(): + b2_receiver_found = True + print(f" [OK] B2 export signal found!") + + if not b2_receiver_found: + print(" [WARNING] B2 export signal not found!") + print(" This means signals might not be registered properly") + + print() + print("=" * 60) + print("Diagnostic Complete") + print("=" * 60) + print() + + # Summary + if export_storages.count() > 0 and b2_receiver_found: + print("[RESULT] Everything looks configured correctly!") + print() + print("If export still not working:") + print("1. Make sure you submitted annotation (not just saved draft)") + print("2. Check terminal logs for 'Export' messages") + print("3. Wait 30 seconds and refresh B2 bucket") + print("4. Check correct bucket and prefix folder") + else: + print("[ACTION REQUIRED]") + if export_storages.count() == 0: + print("- Configure B2 Export Storage in UI") + if not b2_receiver_found: + print("- Restart server to register signals") + +if __name__ == '__main__': + try: + diagnose() + except Exception as e: + print(f"[ERROR] {e}") + import traceback + traceback.print_exc() + diff --git a/label_studio/core/settings/base.py b/label_studio/core/settings/base.py index effefd68a400..7f375f75021a 100644 --- a/label_studio/core/settings/base.py +++ b/label_studio/core/settings/base.py @@ -788,6 +788,15 @@ def collect_versions_dummy(**kwargs): ], ) +# Custom B2 endpoints on these domains will get detailed error reporting +B2_TRUSTED_STORAGE_DOMAINS = get_env_list( + 'B2_TRUSTED_STORAGE_DOMAINS', + [ + 'backblazeb2.com', + 'backblaze.com', + ], +) + REAL_HOSTNAME = os.getenv('HOSTNAME') # we have to use getenv, because we don't use LABEL_STUDIO_ prefix GCS_CLOUD_STORAGE_FORCE_DEFAULT_CREDENTIALS = get_bool_env('GCS_CLOUD_STORAGE_FORCE_DEFAULT_CREDENTIALS', False) PUBLIC_API_DOCS = get_bool_env('PUBLIC_API_DOCS', False) diff --git a/label_studio/io_storages/b2/__init__.py b/label_studio/io_storages/b2/__init__.py new file mode 100644 index 000000000000..31ab45f5acec --- /dev/null +++ b/label_studio/io_storages/b2/__init__.py @@ -0,0 +1,3 @@ +"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. +""" + diff --git a/label_studio/io_storages/b2/api.py b/label_studio/io_storages/b2/api.py new file mode 100644 index 000000000000..d9ee5b76584a --- /dev/null +++ b/label_studio/io_storages/b2/api.py @@ -0,0 +1,321 @@ +"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. +""" +from django.utils.decorators import method_decorator +from drf_spectacular.types import OpenApiTypes +from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema +from io_storages.api import ( + ExportStorageDetailAPI, + ExportStorageFormLayoutAPI, + ExportStorageListAPI, + ExportStorageSyncAPI, + ExportStorageValidateAPI, + ImportStorageDetailAPI, + ImportStorageFormLayoutAPI, + ImportStorageListAPI, + ImportStorageSyncAPI, + ImportStorageValidateAPI, +) +from io_storages.b2.models import B2ExportStorage, B2ImportStorage +from io_storages.b2.serializers import B2ExportStorageSerializer, B2ImportStorageSerializer + +from .openapi_schema import ( + _b2_export_storage_schema, + _b2_export_storage_schema_with_id, + _b2_import_storage_schema, + _b2_import_storage_schema_with_id, +) + + +@method_decorator( + name='get', + decorator=extend_schema( + tags=['Storage: B2'], + summary='List B2 import storage', + description='Get a list of all Backblaze B2 import storage connections.', + parameters=[ + OpenApiParameter( + name='project', + type=OpenApiTypes.INT, + location='query', + description='Project ID', + ), + ], + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'list', + 'x-fern-audiences': ['public'], + }, + ), +) +@method_decorator( + name='post', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Create new B2 import storage', + description='Create new Backblaze B2 import storage connection', + request={ + 'application/json': _b2_import_storage_schema, + }, + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'create', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ImportStorageListAPI(ImportStorageListAPI): + """API for listing and creating B2 import storage connections.""" + + queryset = B2ImportStorage.objects.all() + serializer_class = B2ImportStorageSerializer + + +@method_decorator( + name='get', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Get B2 import storage', + description='Get a specific Backblaze B2 import storage connection.', + request=None, + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'get', + 'x-fern-audiences': ['public'], + }, + ), +) +@method_decorator( + name='patch', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Update B2 import storage', + description='Update a specific Backblaze B2 import storage connection.', + request={ + 'application/json': _b2_import_storage_schema, + }, + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'update', + 'x-fern-audiences': ['public'], + }, + ), +) +@method_decorator( + name='delete', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Delete B2 import storage', + description='Delete a specific Backblaze B2 import storage connection.', + request=None, + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'delete', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ImportStorageDetailAPI(ImportStorageDetailAPI): + """API for retrieving, updating, and deleting a specific B2 import storage.""" + + queryset = B2ImportStorage.objects.all() + serializer_class = B2ImportStorageSerializer + + +@method_decorator( + name='post', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Sync B2 import storage', + description='Sync tasks from a Backblaze B2 import storage connection.', + parameters=[ + OpenApiParameter( + name='id', + type=OpenApiTypes.INT, + location='path', + description='Storage ID', + ), + ], + request=None, + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'sync', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ImportStorageSyncAPI(ImportStorageSyncAPI): + """API for syncing a B2 import storage.""" + + serializer_class = B2ImportStorageSerializer + + +@method_decorator( + name='post', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Validate B2 import storage', + description='Validate a specific Backblaze B2 import storage connection.', + request={ + 'application/json': _b2_import_storage_schema_with_id, + }, + responses={200: OpenApiResponse(description='Validation successful')}, + extensions={ + 'x-fern-sdk-group-name': ['import_storage', 'b2'], + 'x-fern-sdk-method-name': 'validate', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ImportStorageValidateAPI(ImportStorageValidateAPI): + """API for validating a B2 import storage connection.""" + + serializer_class = B2ImportStorageSerializer + + +@method_decorator( + name='post', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Validate B2 export storage', + description='Validate a specific Backblaze B2 export storage connection.', + request={ + 'application/json': _b2_export_storage_schema_with_id, + }, + responses={200: OpenApiResponse(description='Validation successful')}, + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'validate', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ExportStorageValidateAPI(ExportStorageValidateAPI): + """API for validating a B2 export storage connection.""" + + serializer_class = B2ExportStorageSerializer + + +@method_decorator( + name='get', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Get all B2 export storage', + description='Get a list of all Backblaze B2 export storage connections.', + parameters=[ + OpenApiParameter( + name='project', + type=OpenApiTypes.INT, + location='query', + description='Project ID', + ), + ], + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'list', + 'x-fern-audiences': ['public'], + }, + ), +) +@method_decorator( + name='post', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Create B2 export storage', + description='Create a new Backblaze B2 export storage connection to store annotations.', + request={ + 'application/json': _b2_export_storage_schema, + }, + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'create', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ExportStorageListAPI(ExportStorageListAPI): + """API for listing and creating B2 export storage connections.""" + + queryset = B2ExportStorage.objects.all() + serializer_class = B2ExportStorageSerializer + + +@method_decorator( + name='get', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Get B2 export storage', + description='Get a specific Backblaze B2 export storage connection.', + request=None, + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'get', + 'x-fern-audiences': ['public'], + }, + ), +) +@method_decorator( + name='patch', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Update B2 export storage', + description='Update a specific Backblaze B2 export storage connection.', + request={ + 'application/json': _b2_export_storage_schema, + }, + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'update', + 'x-fern-audiences': ['public'], + }, + ), +) +@method_decorator( + name='delete', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Delete B2 export storage', + description='Delete a specific Backblaze B2 export storage connection.', + request=None, + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'delete', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ExportStorageDetailAPI(ExportStorageDetailAPI): + """API for retrieving, updating, and deleting a specific B2 export storage.""" + + queryset = B2ExportStorage.objects.all() + serializer_class = B2ExportStorageSerializer + + +@method_decorator( + name='post', + decorator=extend_schema( + tags=['Storage: B2'], + summary='Sync B2 export storage', + description='Sync annotations to a Backblaze B2 export storage connection.', + request=None, + extensions={ + 'x-fern-sdk-group-name': ['export_storage', 'b2'], + 'x-fern-sdk-method-name': 'sync', + 'x-fern-audiences': ['public'], + }, + ), +) +class B2ExportStorageSyncAPI(ExportStorageSyncAPI): + """API for syncing a B2 export storage.""" + + serializer_class = B2ExportStorageSerializer + + +class B2ImportStorageFormLayoutAPI(ImportStorageFormLayoutAPI): + """API for getting the form layout for B2 import storage.""" + pass + + +class B2ExportStorageFormLayoutAPI(ExportStorageFormLayoutAPI): + """API for getting the form layout for B2 export storage.""" + pass + diff --git a/label_studio/io_storages/b2/form_layout.yml b/label_studio/io_storages/b2/form_layout.yml new file mode 100644 index 000000000000..d4700f102ce2 --- /dev/null +++ b/label_studio/io_storages/b2/form_layout.yml @@ -0,0 +1,155 @@ +# Form layout configuration for Backblaze B2 Cloud Storage integration +# This defines the UI fields shown when creating/editing B2 storage connections + +# 1x3 grid - Basic information +title_bucket_prefix: &title_bucket_prefix + - type: text + name: title + label: Storage Name + required: true + - type: text + name: bucket + label: Bucket Name + allowEmpty: false + required: true + - type: text + name: prefix + label: Bucket Prefix (Folder Path) + +# 2x3 grid - B2 specific parameters for import +b2_params_import: &b2_params_import + - type: text + name: region_name + label: Region Name + placeholder: us-west-004 + tooltip: "B2 region (e.g., us-west-004, us-east-005, eu-central-003)" + - type: text + name: b2_endpoint_url + label: B2 Endpoint URL + placeholder: https://s3.us-west-004.backblazeb2.com + tooltip: "Your B2 S3-compatible endpoint URL" + - null + - type: password + name: b2_access_key_id + label: Application Key ID + autoComplete: "off" + skipAutofill: true + allowEmpty: false + protectedValue: true + tooltip: "Your B2 Application Key ID. Leave blank if already set up as an Environment Variable (B2_ACCESS_KEY_ID)." + - type: password + name: b2_secret_access_key + label: Application Key + autoComplete: "new-password" + skipAutofill: true + allowEmpty: false + protectedValue: true + tooltip: "Your B2 Application Key. Leave blank if already set up as an Environment Variable (B2_SECRET_ACCESS_KEY)." + - null + +# 2x3 grid - B2 specific parameters for export +b2_params_export: &b2_params_export + - type: text + name: region_name + label: Region Name + placeholder: us-west-004 + tooltip: "B2 region (e.g., us-west-004, us-east-005, eu-central-003)" + - type: text + name: b2_endpoint_url + label: B2 Endpoint URL + placeholder: https://s3.us-west-004.backblazeb2.com + tooltip: "Your B2 S3-compatible endpoint URL" + - null + - type: password + name: b2_access_key_id + label: Application Key ID + autoComplete: "off" + skipAutofill: true + allowEmpty: false + protectedValue: true + tooltip: "Your B2 Application Key ID" + - type: password + name: b2_secret_access_key + label: Application Key + autoComplete: "new-password" + skipAutofill: true + allowEmpty: false + protectedValue: true + tooltip: "Your B2 Application Key" + - null + + +ImportStorage: + # Title, Bucket, Prefix + - columnCount: 3 + fields: *title_bucket_prefix + # Regex filter + - columnCount: 1 + fields: + - type: text + name: regex_filter + label: File Filter Regex + placeholder: '.*csv or .*(jpe?g|png|tiff) or .\w+-\d+.text' + validators: + - regexp + # B2 specific params + - columnCount: 3 + fields: *b2_params_import + + # Import method selection + - columnCount: 1 + fields: + - type: select + name: use_blob_urls + label: Import method + description: Choose how to import your data from B2 storage + placeholder: "Select an option" + required: true + options: + - value: true + label: "Files - Automatically creates a task for each storage object (e.g. JPG, MP3, TXT)" + - value: false + label: "Tasks - Treat each JSON or JSONL file as a task definition (one or more tasks per file)" + + # 2 column grid - Presigned URLs + - columnCount: 2 + columns: + - width: 468 + fields: + - type: toggle + name: presign + label: "Use pre-signed URLs (On)\n Proxy through the platform (Off)" + description: "When pre-signed URLs are enabled, all data bypasses the platform and user browsers directly read data from B2" + value: true + - fields: + - type: counter + name: presign_ttl + label: Expire pre-signed URLs (minutes) + min: 1 + value: 15 + dependency: presign + # Recursive scan option + - columnCount: 1 + columns: + - fields: + - type: toggle + name: recursive_scan + label: Scan all sub-folders + description: Include files from all nested folders in the bucket + +ExportStorage: + # Title, Bucket, Prefix + - columnCount: 3 + fields: *title_bucket_prefix + # B2 specific params + - columnCount: 3 + fields: *b2_params_export + # Delete objects option + - columnCount: 1 + columns: + - fields: + - type: toggle + name: can_delete_objects + label: Can delete objects from storage + description: If unchecked, annotations will not be deleted from B2 storage when deleted from Label Studio + diff --git a/label_studio/io_storages/b2/models.py b/label_studio/io_storages/b2/models.py new file mode 100644 index 000000000000..c203cc65f86c --- /dev/null +++ b/label_studio/io_storages/b2/models.py @@ -0,0 +1,461 @@ +"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. +""" + +import json +import logging +import re +from typing import Union +from urllib.parse import urlparse + +import boto3 +from core.feature_flags import flag_set +from core.redis import start_job_async_or_sync +from django.conf import settings +from django.db import models +from django.db.models.signals import post_save, pre_delete +from django.dispatch import receiver +from django.utils.translation import gettext_lazy as _ +from io_storages.base_models import ( + ExportStorage, + ExportStorageLink, + ImportStorage, + ImportStorageLink, + ProjectStorageMixin, +) +from io_storages.b2.utils import ( + catch_and_reraise_from_none, + get_client_and_resource, + resolve_b2_url, +) +from io_storages.utils import StorageObject, load_tasks_json, storage_can_resolve_bucket_url +from tasks.models import Annotation + +from label_studio.io_storages.b2.utils import B2 + +logger = logging.getLogger(__name__) +logging.getLogger('botocore').setLevel(logging.CRITICAL) +boto3.set_stream_logger(level=logging.INFO) + +# Cache for B2 clients to avoid re-creating them on every request +clients_cache = {} + + +class B2StorageMixin(models.Model): + """ + Mixin for Backblaze B2 Cloud Storage connection settings. + + B2 is S3-compatible, so we use boto3 with custom endpoints. + Unlike AWS S3, B2 requires: + - An explicit endpoint URL (e.g., https://s3.us-west-004.backblazeb2.com) + - Application Key ID and Application Key (equivalent to AWS credentials) + - No special session tokens or SSE KMS keys + """ + + bucket = models.TextField( + _('bucket'), + null=True, + blank=True, + help_text='B2 bucket name' + ) + prefix = models.TextField( + _('prefix'), + null=True, + blank=True, + help_text='B2 bucket prefix (folder path)' + ) + regex_filter = models.TextField( + _('regex_filter'), + null=True, + blank=True, + help_text='Cloud storage regex for filtering objects', + ) + use_blob_urls = models.BooleanField( + _('use_blob_urls'), + default=False, + help_text='Interpret objects as BLOBs and generate URLs', + ) + + # B2-specific credentials + # Note: These are called "Application Key ID" and "Application Key" in B2 UI, + # but we use AWS-compatible naming for boto3 compatibility + b2_access_key_id = models.TextField( + _('b2_access_key_id'), + null=True, + blank=True, + help_text='B2 Application Key ID (equivalent to AWS_ACCESS_KEY_ID)' + ) + b2_secret_access_key = models.TextField( + _('b2_secret_access_key'), + null=True, + blank=True, + help_text='B2 Application Key (equivalent to AWS_SECRET_ACCESS_KEY)', + ) + + # B2-specific endpoint configuration + # B2 uses region-specific endpoints like: https://s3.us-west-004.backblazeb2.com + b2_endpoint_url = models.TextField( + _('b2_endpoint_url'), + null=True, + blank=True, + help_text='B2 S3-compatible endpoint URL (e.g., https://s3.us-west-004.backblazeb2.com)' + ) + region_name = models.TextField( + _('region_name'), + null=True, + blank=True, + help_text='B2 Region (e.g., us-west-004, us-east-005, eu-central-003)' + ) + + @catch_and_reraise_from_none + def get_client_and_resource(self): + """ + Get or create cached boto3 client and resource for B2. + + B2 client initialization takes ~100ms, so we cache clients to avoid + performance issues when processing many tasks. + """ + # Create cache key from connection parameters + cache_key = f'{self.b2_access_key_id}:{self.b2_secret_access_key}:{self.b2_endpoint_url}:{self.region_name}' + if cache_key in clients_cache: + return clients_cache[cache_key] + + # Create new client and resource + result = get_client_and_resource( + self.b2_access_key_id, + self.b2_secret_access_key, + self.b2_endpoint_url, + self.region_name, + ) + clients_cache[cache_key] = result + return result + + def get_client(self): + """Get boto3 client for B2.""" + client, _ = self.get_client_and_resource() + return client + + def get_client_and_bucket(self, validate_connection=True): + """Get boto3 client and bucket resource for B2.""" + client, b2 = self.get_client_and_resource() + if validate_connection: + self.validate_connection(client) + return client, b2.Bucket(self.bucket) + + @catch_and_reraise_from_none + def validate_connection(self, client=None): + """ + Validate connection to B2 bucket. + + For import storage, we check that at least one object exists with the prefix. + For export storage, we only check that the bucket exists (prefix can be empty). + """ + logger.debug('validate_connection') + if client is None: + client = self.get_client() + + # Check if this is an export storage class + is_export = 'Export' in self.__class__.__name__ + + if self.prefix: + logger.debug( + f'[Class {self.__class__.__name__}]: Test connection to B2 bucket {self.bucket} ' + f'with prefix {self.prefix} using ListObjectsV2 operation' + ) + result = client.list_objects_v2(Bucket=self.bucket, Prefix=self.prefix, MaxKeys=1) + # We expect 1 key with the prefix for imports. For exports it's okay if there are 0 with the prefix. + expected_keycount = 0 if is_export else 1 + if (keycount := result.get('KeyCount')) is None or keycount < expected_keycount: + raise KeyError(f'{self.url_scheme}://{self.bucket}/{self.prefix} not found.') + else: + logger.debug( + f'[Class {self.__class__.__name__}]: Test connection to B2 bucket {self.bucket} ' + f'using HeadBucket operation' + ) + client.head_bucket(Bucket=self.bucket) + + @property + def path_full(self): + """Full path to the storage location.""" + prefix = self.prefix or '' + return f'{self.url_scheme}://{self.bucket}/{prefix}' + + @property + def type_full(self): + """Human-readable storage type name.""" + return 'Backblaze B2' + + @catch_and_reraise_from_none + def get_bytes_stream(self, uri, range_header=None): + """ + Get file directly from B2 using iter_chunks without wrapper. + + This method forwards Range headers directly to B2 and returns the raw stream. + Note: The returned stream is NOT seekable and will break if seeking backwards. + + Args: + uri: The B2 URI of the file to retrieve + range_header: Optional HTTP Range header to forward to B2 + + Returns: + Tuple of (stream, content_type, metadata) where metadata contains + important B2 headers like ETag, ContentLength, etc. + """ + # Parse URI to get bucket and key + parsed_uri = urlparse(uri, allow_fragments=False) + bucket_name = parsed_uri.netloc + key = parsed_uri.path.lstrip('/') + + # Get B2 client + client = self.get_client() + + try: + # Forward Range header to B2 if provided + request_params = {'Bucket': bucket_name, 'Key': key} + if range_header: + request_params['Range'] = range_header + + # Get the object from B2 + response = client.get_object(**request_params) + + # Extract metadata to return + metadata = { + 'ETag': response.get('ETag'), + 'ContentLength': response.get('ContentLength'), + 'ContentRange': response.get('ContentRange'), + 'LastModified': response.get('LastModified'), + 'StatusCode': response['ResponseMetadata']['HTTPStatusCode'], + } + + # Return the streaming body directly + return response['Body'], response.get('ContentType'), metadata + + except Exception as e: + logger.error(f'Error getting direct stream from B2 for uri {uri}: {e}', exc_info=True) + return None, None, {} + + class Meta: + abstract = True + + +class B2ImportStorageBase(B2StorageMixin, ImportStorage): + """ + Base class for B2 Import Storage. + + This class provides the core functionality for importing tasks from B2 buckets. + """ + + url_scheme = 'b2' + + presign = models.BooleanField( + _('presign'), + default=True, + help_text='Generate presigned URLs' + ) + presign_ttl = models.PositiveSmallIntegerField( + _('presign_ttl'), + default=1, + help_text='Presigned URLs TTL (in minutes)' + ) + recursive_scan = models.BooleanField( + _('recursive scan'), + default=False, + help_text=_('Perform recursive scan over the bucket content'), + ) + + @catch_and_reraise_from_none + def iter_objects(self): + """ + Iterate over objects in the B2 bucket. + + Yields: + B2 object instances + """ + _, bucket = self.get_client_and_bucket() + list_kwargs = {} + if self.prefix: + list_kwargs['Prefix'] = self.prefix.rstrip('/') + '/' + if not self.recursive_scan: + list_kwargs['Delimiter'] = '/' + bucket_iter = bucket.objects.filter(**list_kwargs).all() + regex = re.compile(str(self.regex_filter)) if self.regex_filter else None + for obj in bucket_iter: + key = obj.key + if key.endswith('/'): + logger.debug(key + ' is skipped because it is a folder') + continue + if regex and not regex.match(key): + logger.debug(key + ' is skipped by regex filter') + continue + logger.debug(f'B2 {key} has passed the regex filter') + yield obj + + @catch_and_reraise_from_none + def iter_keys(self): + """Iterate over object keys in the B2 bucket.""" + for obj in self.iter_objects(): + yield obj.key + + def get_unified_metadata(self, obj): + """Get standardized metadata for an object.""" + return { + 'key': obj.key, + 'last_modified': obj.last_modified, + 'size': obj.size, + } + + @catch_and_reraise_from_none + def scan_and_create_links(self): + """Scan B2 bucket and create task links.""" + return self._scan_and_create_links(B2ImportStorageLink) + + @catch_and_reraise_from_none + def get_data(self, key) -> list[StorageObject]: + """ + Get data from B2 for a given key. + + If use_blob_urls is True, return the B2 URL directly. + Otherwise, read and parse the JSON content. + """ + uri = f'{self.url_scheme}://{self.bucket}/{key}' + if self.use_blob_urls: + data_key = settings.DATA_UNDEFINED_NAME + task = {data_key: uri} + return [StorageObject(key=key, task_data=task)] + + # read task json from bucket and validate it + _, b2 = self.get_client_and_resource() + bucket = b2.Bucket(self.bucket) + obj = b2.Object(bucket.name, key).get()['Body'].read() + return load_tasks_json(obj, key) + + @catch_and_reraise_from_none + def generate_http_url(self, url): + """Generate HTTP URL (presigned or base64) for a B2 URL.""" + return resolve_b2_url(url, self.get_client(), self.presign, expires_in=self.presign_ttl * 60) + + @catch_and_reraise_from_none + def can_resolve_url(self, url: Union[str, None]) -> bool: + """Check if this storage can resolve the given URL.""" + return storage_can_resolve_bucket_url(self, url) + + @catch_and_reraise_from_none + def get_blob_metadata(self, key): + """Get metadata for a blob in B2.""" + return B2.get_blob_metadata( + key, + self.bucket, + b2_access_key_id=self.b2_access_key_id, + b2_secret_access_key=self.b2_secret_access_key, + b2_endpoint_url=self.b2_endpoint_url, + region_name=self.region_name, + ) + + class Meta: + abstract = True + + +class B2ImportStorage(ProjectStorageMixin, B2ImportStorageBase): + """Concrete model for B2 Import Storage.""" + + class Meta: + abstract = False + + +class B2ExportStorage(B2StorageMixin, ExportStorage): + """ + B2 Export Storage for saving annotations. + + This storage saves annotations to a B2 bucket in JSON format. + """ + + @catch_and_reraise_from_none + def save_annotation(self, annotation): + """Save a single annotation to B2.""" + client, b2 = self.get_client_and_resource() + logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}') + ser_annotation = self._get_serialized_data(annotation) + + # get key that identifies this object in storage + key = B2ExportStorageLink.get_key(annotation) + key = str(self.prefix) + '/' + key if self.prefix else key + + # put object into storage + # Note: B2 doesn't support AWS SSE KMS keys, so we use basic server-side encryption + additional_params = {} + + # B2 supports server-side encryption (AES-256) automatically + # No need to explicitly set it like with AWS + + b2.Object(self.bucket, key).put(Body=json.dumps(ser_annotation), **additional_params) + + # create link if everything ok + B2ExportStorageLink.create(annotation, self) + + @catch_and_reraise_from_none + def delete_annotation(self, annotation): + """Delete an annotation from B2.""" + client, b2 = self.get_client_and_resource() + logger.debug(f'Deleting object on {self.__class__.__name__} Storage {self} for annotation {annotation}') + + # get key that identifies this object in storage + key = B2ExportStorageLink.get_key(annotation) + key = str(self.prefix) + '/' + key if self.prefix else key + + # delete object from storage + b2.Object(self.bucket, key).delete() + + # delete link if everything ok + B2ExportStorageLink.objects.filter(storage=self, annotation=annotation).delete() + + +def async_export_annotation_to_b2_storages(annotation): + """Async function to export annotation to all B2 export storages.""" + project = annotation.project + if hasattr(project, 'io_storages_b2exportstorages'): + for storage in project.io_storages_b2exportstorages.all(): + logger.debug(f'Export {annotation} to B2 storage {storage}') + storage.save_annotation(annotation) + + +@receiver(post_save, sender=Annotation) +def export_annotation_to_b2_storages(sender, instance, **kwargs): + """Signal handler to export annotation to B2 when saved.""" + storages = getattr(instance.project, 'io_storages_b2exportstorages', None) + if storages and storages.exists(): # avoid excess jobs in rq + start_job_async_or_sync(async_export_annotation_to_b2_storages, instance) + + +@receiver(pre_delete, sender=Annotation) +def delete_annotation_from_b2_storages(sender, instance, **kwargs): + """Signal handler to delete annotation from B2 when deleted.""" + links = B2ExportStorageLink.objects.filter(annotation=instance) + for link in links: + storage = link.storage + if storage.can_delete_objects: + logger.debug(f'Delete {instance} from B2 storage {storage}') + storage.delete_annotation(instance) + + +class B2ImportStorageLink(ImportStorageLink): + """Link between a Task and B2 Import Storage.""" + + storage = models.ForeignKey(B2ImportStorage, on_delete=models.CASCADE, related_name='links') + + @classmethod + def exists(cls, key, storage): + """Check if a link already exists for this key and storage.""" + storage_link_exists = super(B2ImportStorageLink, cls).exists(key, storage) + # TODO: this is a workaround to be compatible with old keys version - remove it later + prefix = str(storage.prefix) or '' + return ( + storage_link_exists + or cls.objects.filter(key=prefix + key, storage=storage.id).exists() + or cls.objects.filter(key=prefix + '/' + key, storage=storage.id).exists() + ) + + +class B2ExportStorageLink(ExportStorageLink): + """Link between an Annotation and B2 Export Storage.""" + + storage = models.ForeignKey(B2ExportStorage, on_delete=models.CASCADE, related_name='links') + diff --git a/label_studio/io_storages/b2/openapi_schema.py b/label_studio/io_storages/b2/openapi_schema.py new file mode 100644 index 000000000000..8021da8c977c --- /dev/null +++ b/label_studio/io_storages/b2/openapi_schema.py @@ -0,0 +1,95 @@ +"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. +""" + +# Common B2 storage schema properties following OpenAPI 3.0 specification +_common_b2_storage_schema_properties = { + 'title': {'type': 'string', 'description': 'Storage title', 'maxLength': 2048}, + 'description': {'type': 'string', 'description': 'Storage description'}, + 'project': {'type': 'integer', 'description': 'Project ID'}, + 'bucket': {'type': 'string', 'description': 'B2 bucket name'}, + 'prefix': {'type': 'string', 'description': 'B2 bucket prefix (folder path)'}, + 'b2_access_key_id': { + 'type': 'string', + 'description': 'B2 Application Key ID (equivalent to AWS_ACCESS_KEY_ID)', + }, + 'b2_secret_access_key': { + 'type': 'string', + 'description': 'B2 Application Key (equivalent to AWS_SECRET_ACCESS_KEY)', + }, + 'b2_endpoint_url': { + 'type': 'string', + 'description': 'B2 S3-compatible endpoint URL (e.g., https://s3.us-west-004.backblazeb2.com)', + }, + 'region_name': { + 'type': 'string', + 'description': 'B2 Region (e.g., us-west-004, us-east-005, eu-central-003)', + }, +} + +# B2 import storage schema +_b2_import_storage_schema = { + 'type': 'object', + 'properties': { + 'regex_filter': { + 'type': 'string', + 'description': 'Cloud storage regex for filtering objects. You must specify it otherwise no objects will be imported.', + }, + 'use_blob_urls': { + 'type': 'boolean', + 'description': 'Interpret objects as BLOBs and generate URLs. For example, if your bucket contains images, you can use this option to generate URLs for these images. If set to False, it will read the content of the file and load it into Label Studio.', + 'default': False, + }, + 'presign': { + 'type': 'boolean', + 'description': 'Generate presigned URLs for secure access to private files', + 'default': True, + }, + 'presign_ttl': { + 'type': 'integer', + 'description': 'Presigned URL expiration time in minutes', + 'default': 1, + }, + 'recursive_scan': { + 'type': 'boolean', + 'description': 'Scan recursively through all subfolders', + 'default': False, + }, + **_common_b2_storage_schema_properties, + }, + 'required': [], +} + +# B2 import storage schema with ID +_b2_import_storage_schema_with_id = { + 'type': 'object', + 'properties': { + 'id': {'type': 'integer', 'description': 'Storage ID. If set, storage with specified ID will be updated'}, + **_b2_import_storage_schema['properties'], + }, + 'required': [], +} + +# B2 export storage schema +_b2_export_storage_schema = { + 'type': 'object', + 'properties': { + 'can_delete_objects': { + 'type': 'boolean', + 'description': 'Enable deletion of annotations from B2 when deleted from Label Studio', + 'default': False, + }, + **_common_b2_storage_schema_properties, + }, + 'required': [], +} + +# B2 export storage schema with ID +_b2_export_storage_schema_with_id = { + 'type': 'object', + 'properties': { + 'id': {'type': 'integer', 'description': 'Storage ID. If set, storage with specified ID will be updated'}, + **_b2_export_storage_schema['properties'], + }, + 'required': [], +} + diff --git a/label_studio/io_storages/b2/serializers.py b/label_studio/io_storages/b2/serializers.py new file mode 100644 index 000000000000..af2ccda7478b --- /dev/null +++ b/label_studio/io_storages/b2/serializers.py @@ -0,0 +1,146 @@ +"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. +""" +import logging +import os + +from botocore.exceptions import ClientError, ParamValidationError +from botocore.handlers import validate_bucket_name +from io_storages.b2.models import B2ExportStorage, B2ImportStorage +from io_storages.serializers import ExportStorageSerializer, ImportStorageSerializer +from rest_framework import serializers +from rest_framework.exceptions import ValidationError + +logger = logging.getLogger(__name__) + + +class B2StorageSerializerMixin: + """ + Mixin for B2 storage serializers. + + Handles secure field filtering and connection validation. + """ + + # These fields contain sensitive data and should not be returned in API responses + secure_fields = ['b2_access_key_id', 'b2_secret_access_key'] + + def to_representation(self, instance): + """ + Remove secure fields from API response. + + This ensures that B2 credentials are never exposed through the API. + """ + result = super().to_representation(instance) + for attr in self.secure_fields: + result.pop(attr, None) + return result + + def validate_bucket(self, value): + """ + Validate B2 bucket name. + + B2 bucket names follow similar rules to AWS S3. + """ + if not value: + return value + try: + validate_bucket_name({'Bucket': value}) + except ParamValidationError as exc: + raise ValidationError(exc.kwargs['report']) from exc + return value + + def validate(self, data): + """ + Validate the entire storage configuration. + + This performs a test connection to B2 to ensure credentials and + configuration are correct before saving. + """ + data = super().validate(data) + if not data.get('bucket', None): + return data + + # Get or create storage instance for validation + storage = self.instance + if storage: + # Update existing storage with new data + for key, value in data.items(): + setattr(storage, key, value) + else: + # Create new storage instance + if 'id' in self.initial_data: + storage_object = self.Meta.model.objects.get(id=self.initial_data['id']) + for attr in self.secure_fields: + data[attr] = data.get(attr) or getattr(storage_object, attr) + storage = self.Meta.model(**data) + + # Validate connection to B2 + try: + storage.validate_connection() + except ParamValidationError: + raise ValidationError( + f'Wrong credentials for B2 bucket {storage.bucket}. ' + 'Please check your B2 Application Key ID and Application Key.' + ) + except ClientError as e: + error_code = e.response.get('Error', {}).get('Code') + http_status = e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') + + # Handle authentication errors + if error_code in ['SignatureDoesNotMatch', '403'] or http_status == 403: + raise ValidationError( + f'Cannot connect to B2 bucket {storage.bucket} with specified credentials. ' + 'Please verify your B2 Application Key ID and Application Key are correct.' + ) + + # Handle bucket not found errors + if error_code in ['NoSuchBucket', '404'] or http_status == 404: + raise ValidationError( + f'Cannot find bucket {storage.bucket} in B2. ' + 'Please verify the bucket name is correct and that you have access to it.' + ) + + # Handle endpoint errors + if 'Could not connect to the endpoint URL' in str(e): + raise ValidationError( + f'Cannot connect to B2 endpoint. ' + 'Please verify your B2 endpoint URL is correct (e.g., https://s3.us-west-004.backblazeb2.com).' + ) + + # Generic error + raise ValidationError(f'Error connecting to B2: {str(e)}') + + except TypeError as e: + logger.info(f'It seems B2 access keys are incorrect: {e}', exc_info=True) + raise ValidationError( + 'It seems B2 access keys are incorrect. ' + 'Please check your B2 Application Key ID and Application Key.' + ) + except KeyError: + raise ValidationError( + f'{storage.url_scheme}://{storage.bucket}/{storage.prefix} not found. ' + 'Please verify the bucket and prefix are correct.' + ) + + return data + + +class B2ImportStorageSerializer(B2StorageSerializerMixin, ImportStorageSerializer): + """Serializer for B2 Import Storage.""" + + type = serializers.ReadOnlyField(default=os.path.basename(os.path.dirname(__file__))) + presign = serializers.BooleanField(required=False, default=True) + + class Meta: + model = B2ImportStorage + fields = '__all__' + + +class B2ExportStorageSerializer(B2StorageSerializerMixin, ExportStorageSerializer): + """Serializer for B2 Export Storage.""" + + type = serializers.ReadOnlyField(default=os.path.basename(os.path.dirname(__file__))) + + class Meta: + model = B2ExportStorage + fields = '__all__' + diff --git a/label_studio/io_storages/b2/utils.py b/label_studio/io_storages/b2/utils.py new file mode 100644 index 000000000000..94ae0a738836 --- /dev/null +++ b/label_studio/io_storages/b2/utils.py @@ -0,0 +1,304 @@ +"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license. +""" +import base64 +import fnmatch +import logging +import re +from typing import Optional, Tuple +from urllib.parse import urlparse + +import boto3 +from botocore.client import Config +from botocore.exceptions import ClientError, EndpointConnectionError +from core.utils.params import get_env +from django.conf import settings +from tldextract import TLDExtract + +logger = logging.getLogger(__name__) + +# B2 Connection Configuration +B2_CONNECT_TIMEOUT = int(get_env('B2_CONNECT_TIMEOUT', 60)) # Connection timeout in seconds +B2_READ_TIMEOUT = int(get_env('B2_READ_TIMEOUT', 60)) # Read timeout in seconds +B2_MAX_RETRIES = int(get_env('B2_MAX_RETRIES', 3)) # Maximum number of retry attempts + + +def get_client_and_resource( + b2_access_key_id: Optional[str] = None, + b2_secret_access_key: Optional[str] = None, + b2_endpoint_url: Optional[str] = None, + region_name: Optional[str] = None, +) -> Tuple: + """ + Create boto3 client and resource for Backblaze B2 Cloud Storage with production-ready configuration. + + B2 is S3-compatible, so we use boto3 with a custom endpoint URL. + Includes timeout, retry, and connection pool configuration for reliability. + + Args: + b2_access_key_id: B2 Application Key ID (equivalent to AWS access key) + b2_secret_access_key: B2 Application Key (equivalent to AWS secret key) + b2_endpoint_url: B2 endpoint URL (e.g., https://s3.us-west-004.backblazeb2.com) + region_name: B2 region name (e.g., us-west-004) + + Returns: + Tuple[boto3.client, boto3.resource]: Tuple of (boto3 S3 client, boto3 S3 resource) + + Raises: + ValueError: If credentials or endpoint URL are missing + EndpointConnectionError: If unable to connect to B2 endpoint + """ + # Read from environment variables if not provided + b2_access_key_id = b2_access_key_id or get_env('B2_ACCESS_KEY_ID') + b2_secret_access_key = b2_secret_access_key or get_env('B2_SECRET_ACCESS_KEY') + b2_endpoint_url = b2_endpoint_url or get_env('B2_ENDPOINT_URL') + region_name = region_name or get_env('B2_REGION') or 'us-west-004' + + # Validate required credentials + if not b2_access_key_id or not b2_secret_access_key: + raise ValueError( + 'B2 credentials are required. Please provide B2_ACCESS_KEY_ID and B2_SECRET_ACCESS_KEY ' + 'either as parameters or environment variables.' + ) + + logger.info( + f'Initializing Backblaze B2 connection: ' + f'endpoint={b2_endpoint_url}, ' + f'region={region_name}, ' + f'key_id={b2_access_key_id[:10]}***' + ) + + # Create boto3 session with B2 credentials + try: + session = boto3.Session( + aws_access_key_id=b2_access_key_id, + aws_secret_access_key=b2_secret_access_key, + ) + except Exception as e: + logger.error(f'Failed to create boto3 session: {e}', exc_info=True) + raise ValueError(f'Invalid B2 credentials: {e}') from e + + # B2 requires explicit endpoint URL + if not b2_endpoint_url: + # Default endpoint pattern for B2 + b2_endpoint_url = f'https://s3.{region_name}.backblazeb2.com' + logger.warning( + f'No B2 endpoint URL provided, using default: {b2_endpoint_url}. ' + 'For production, set B2_ENDPOINT_URL environment variable.' + ) + + # Configure boto3 with timeout, retry, and connection pooling + boto_config = Config( + signature_version='s3v4', + connect_timeout=B2_CONNECT_TIMEOUT, + read_timeout=B2_READ_TIMEOUT, + retries={ + 'max_attempts': B2_MAX_RETRIES, + 'mode': 'adaptive', # Adaptive retry mode for better resilience + }, + max_pool_connections=50, # Connection pooling for performance + ) + + settings_dict = { + 'region_name': region_name, + 'endpoint_url': b2_endpoint_url, + } + + try: + # Create S3-compatible client and resource for B2 + client = session.client('s3', config=boto_config, **settings_dict) + resource = session.resource('s3', config=boto_config, **settings_dict) + + logger.info( + f'B2 client created successfully with timeout={B2_CONNECT_TIMEOUT}s, ' + f'max_retries={B2_MAX_RETRIES}' + ) + + return client, resource + + except EndpointConnectionError as e: + logger.error( + f'Failed to connect to B2 endpoint {b2_endpoint_url}: {e}. ' + 'Please verify the endpoint URL is correct and accessible.', + exc_info=True + ) + raise + except Exception as e: + logger.error(f'Unexpected error creating B2 client: {e}', exc_info=True) + raise + + +def resolve_b2_url(url: str, client, presign: bool = True, expires_in: int = 3600) -> str: + """ + Resolve B2 URL to either presigned URL or base64 encoded data. + + This function handles conversion of b2:// URLs to accessible HTTP(S) URLs or inline data. + + Args: + url: The b2:// URL to resolve (e.g., "b2://my-bucket/path/to/file.jpg") + client: boto3 S3 client for B2 + presign: If True, generate presigned URL; if False, return base64 data + expires_in: Presigned URL expiration time in seconds (default: 3600 = 1 hour) + + Returns: + str: Either a presigned HTTPS URL or base64-encoded data URL + + Raises: + ClientError: If unable to access the object in B2 + """ + try: + r = urlparse(url, allow_fragments=False) + bucket_name = r.netloc + key = r.path.lstrip('/') + + logger.debug(f'Resolving B2 URL: bucket={bucket_name}, key={key}, presign={presign}') + + # Return blob as base64 encoded string if presigned urls are disabled + if not presign: + logger.info(f'Fetching object from B2 for base64 encoding: {bucket_name}/{key}') + obj = client.get_object(Bucket=bucket_name, Key=key) + content_type = obj['ResponseMetadata']['HTTPHeaders'].get('content-type', 'application/octet-stream') + object_data = obj['Body'].read() + object_b64 = 'data:' + content_type + ';base64,' + base64.b64encode(object_data).decode('utf-8') + logger.debug(f'Generated base64 data URL for {key} ({len(object_data)} bytes)') + return object_b64 + + # Otherwise try to generate presigned url + try: + presigned_url = client.generate_presigned_url( + ClientMethod='get_object', + Params={'Bucket': bucket_name, 'Key': key}, + ExpiresIn=expires_in + ) + logger.info(f'Generated presigned URL for {bucket_name}/{key} (expires in {expires_in}s)') + return presigned_url + except ClientError as exc: + logger.warning( + f"Failed to generate presigned URL for B2 object {bucket_name}/{key}: {exc}. " + "Returning original URL as fallback." + ) + return url + + except Exception as e: + logger.error(f'Error resolving B2 URL {url}: {e}', exc_info=True) + return url # Fallback to original URL + + +class B2(object): + """Helper class for Backblaze B2 Cloud Storage operations.""" + + @classmethod + def get_blob_metadata( + cls, + url: str, + bucket_name: str, + client=None, + b2_access_key_id=None, + b2_secret_access_key=None, + b2_endpoint_url=None, + region_name=None, + ): + """ + Get blob metadata from B2 by URL. + + Args: + url: Object key + bucket_name: B2 bucket name + client: B2 client for batch processing (optional) + b2_access_key_id: B2 Application Key ID + b2_secret_access_key: B2 Application Key + b2_endpoint_url: B2 endpoint URL + region_name: B2 region name + + Returns: + Object metadata dict + """ + if client is None: + client, _ = get_client_and_resource( + b2_access_key_id=b2_access_key_id, + b2_secret_access_key=b2_secret_access_key, + b2_endpoint_url=b2_endpoint_url, + region_name=region_name, + ) + obj = client.get_object(Bucket=bucket_name, Key=url) + metadata = dict(obj) + # remove unused fields + metadata.pop('Body', None) + metadata.pop('ResponseMetadata', None) + return metadata + + @classmethod + def validate_pattern(cls, storage, pattern, glob_pattern=True): + """ + Validate pattern against B2 Storage. + + Args: + storage: B2 Storage instance + pattern: Pattern to validate + glob_pattern: If True, pattern is a glob pattern, otherwise it is a regex pattern + + Returns: + Message if pattern is not valid, empty string otherwise + """ + client, bucket = storage.get_client_and_bucket() + if glob_pattern: + pattern = fnmatch.translate(pattern) + regex = re.compile(pattern) + + if storage.prefix: + list_kwargs = {'Prefix': storage.prefix.rstrip('/') + '/'} + if not storage.recursive_scan: + list_kwargs['Delimiter'] = '/' + bucket_iter = bucket.objects.filter(**list_kwargs) + else: + bucket_iter = bucket.objects + + bucket_iter = bucket_iter.page_size(settings.CLOUD_STORAGE_CHECK_FOR_RECORDS_PAGE_SIZE).all() + + for index, obj in enumerate(bucket_iter): + key = obj.key + # skip directories + if key.endswith('/'): + logger.debug(key + ' is skipped because it is a folder') + continue + if regex and regex.match(key): + logger.debug(key + ' matches file pattern') + return '' + return 'No objects found matching the provided glob pattern' + + +class B2StorageError(Exception): + """Exception raised for B2 storage-specific errors.""" + pass + + +# see https://github.com/john-kurkowski/tldextract?tab=readme-ov-file#note-about-caching +# prevents network call on first use +extractor = TLDExtract(suffix_list_urls=()) + + +def catch_and_reraise_from_none(func): + """ + For B2 storages - if b2_endpoint_url is not on a known domain, catch exception and + raise a new one with the previous context suppressed. See also: https://peps.python.org/pep-0409/ + + This decorator is specifically designed for B2 Cloud Storage to handle errors gracefully + when using custom endpoint URLs. + """ + + def wrapper(self, *args, **kwargs): + try: + return func(self, *args, **kwargs) + except Exception as e: + if self.b2_endpoint_url and ( + domain := extractor.extract_urllib(urlparse(self.b2_endpoint_url)).registered_domain.lower() + ) not in [trusted_domain.lower() for trusted_domain in settings.B2_TRUSTED_STORAGE_DOMAINS]: + logger.error(f'Exception from unrecognized B2 domain: {e}', exc_info=True) + raise B2StorageError( + f'Debugging info is not available for B2 endpoints on domain: {domain}. ' + 'Please contact your Label Studio devops team if you require detailed error reporting for this domain.' + ) from None + else: + raise e + + return wrapper + diff --git a/label_studio/io_storages/functions.py b/label_studio/io_storages/functions.py index e2a11a3a6601..5d29f8a0d7a6 100644 --- a/label_studio/io_storages/functions.py +++ b/label_studio/io_storages/functions.py @@ -6,6 +6,7 @@ from rest_framework.exceptions import PermissionDenied, ValidationError from .azure_blob.api import AzureBlobExportStorageListAPI, AzureBlobImportStorageListAPI +from .b2.api import B2ExportStorageListAPI, B2ImportStorageListAPI from .gcs.api import GCSExportStorageListAPI, GCSImportStorageListAPI from .redis.api import RedisExportStorageListAPI, RedisImportStorageListAPI from .s3.api import S3ExportStorageListAPI, S3ImportStorageListAPI @@ -72,6 +73,12 @@ def get_storage_list(): 'import_list_api': S3ImportStorageListAPI, 'export_list_api': S3ExportStorageListAPI, }, + { + 'name': 'b2', + 'title': 'Backblaze B2', + 'import_list_api': B2ImportStorageListAPI, + 'export_list_api': B2ExportStorageListAPI, + }, { 'name': 'gcs', 'title': 'Google Cloud Storage', diff --git a/label_studio/io_storages/migrations/0022_add_b2_storage_models.py b/label_studio/io_storages/migrations/0022_add_b2_storage_models.py new file mode 100644 index 000000000000..698a4e0e8861 --- /dev/null +++ b/label_studio/io_storages/migrations/0022_add_b2_storage_models.py @@ -0,0 +1,125 @@ +# Generated manually for Backblaze B2 Cloud Storage integration + +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('projects', '0031_alter_project_show_ground_truth_first'), + ('tasks', '0057_annotation_proj_result_octlen_idx_async'), + ('io_storages', '0021_azureblobimportstorage_recursive_scan_and_more'), + ] + + operations = [ + # Create B2 Import Storage + migrations.CreateModel( + name='B2ImportStorage', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + # StorageInfo fields + ('last_sync', models.DateTimeField(blank=True, help_text='Last sync finished time', null=True, verbose_name='last sync')), + ('last_sync_count', models.PositiveIntegerField(blank=True, help_text='Count of tasks synced last time', null=True, verbose_name='last sync count')), + ('last_sync_job', models.CharField(blank=True, help_text='Last sync job ID', max_length=256, null=True, verbose_name='last_sync_job')), + ('status', models.CharField(choices=[('initialized', 'Initialized'), ('queued', 'Queued'), ('in_progress', 'In progress'), ('failed', 'Failed'), ('completed', 'Completed'), ('completed_with_errors', 'Completed with errors')], default='initialized', max_length=64)), + ('traceback', models.TextField(blank=True, help_text='Traceback report for the last failed sync', null=True)), + ('meta', models.JSONField(default=dict, help_text='Meta and debug information about storage processes', null=True, verbose_name='meta')), + # Storage fields + ('title', models.CharField(blank=True, help_text='Cloud storage title', max_length=256, null=True, verbose_name='title')), + ('description', models.TextField(blank=True, help_text='Cloud storage description', null=True, verbose_name='description')), + ('created_at', models.DateTimeField(auto_now_add=True, help_text='Creation time', verbose_name='created at')), + ('synchronizable', models.BooleanField(default=True, help_text='If storage can be synced', verbose_name='synchronizable')), + # B2StorageMixin fields + ('bucket', models.TextField(blank=True, help_text='B2 bucket name', null=True, verbose_name='bucket')), + ('prefix', models.TextField(blank=True, help_text='B2 bucket prefix (folder path)', null=True, verbose_name='prefix')), + ('regex_filter', models.TextField(blank=True, help_text='Cloud storage regex for filtering objects', null=True, verbose_name='regex_filter')), + ('use_blob_urls', models.BooleanField(default=False, help_text='Interpret objects as BLOBs and generate URLs', verbose_name='use_blob_urls')), + ('b2_access_key_id', models.TextField(blank=True, help_text='B2 Application Key ID (equivalent to AWS_ACCESS_KEY_ID)', null=True, verbose_name='b2_access_key_id')), + ('b2_secret_access_key', models.TextField(blank=True, help_text='B2 Application Key (equivalent to AWS_SECRET_ACCESS_KEY)', null=True, verbose_name='b2_secret_access_key')), + ('b2_endpoint_url', models.TextField(blank=True, help_text='B2 S3-compatible endpoint URL (e.g., https://s3.us-west-004.backblazeb2.com)', null=True, verbose_name='b2_endpoint_url')), + ('region_name', models.TextField(blank=True, help_text='B2 Region (e.g., us-west-004, us-east-005, eu-central-003)', null=True, verbose_name='region_name')), + # B2ImportStorageBase fields + ('presign', models.BooleanField(default=True, help_text='Generate presigned URLs', verbose_name='presign')), + ('presign_ttl', models.PositiveSmallIntegerField(default=1, help_text='Presigned URLs TTL (in minutes)', verbose_name='presign_ttl')), + ('recursive_scan', models.BooleanField(default=False, help_text='Perform recursive scan over the bucket content', verbose_name='recursive scan')), + # ProjectStorageMixin fields + ('project', models.ForeignKey(help_text='A unique integer value identifying this project.', on_delete=django.db.models.deletion.CASCADE, related_name='io_storages_b2importstorages', to='projects.project')), + ], + options={ + 'abstract': False, + }, + ), + + # Create B2 Export Storage + migrations.CreateModel( + name='B2ExportStorage', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + # StorageInfo fields + ('last_sync', models.DateTimeField(blank=True, help_text='Last sync finished time', null=True, verbose_name='last sync')), + ('last_sync_count', models.PositiveIntegerField(blank=True, help_text='Count of tasks synced last time', null=True, verbose_name='last sync count')), + ('last_sync_job', models.CharField(blank=True, help_text='Last sync job ID', max_length=256, null=True, verbose_name='last_sync_job')), + ('status', models.CharField(choices=[('initialized', 'Initialized'), ('queued', 'Queued'), ('in_progress', 'In progress'), ('failed', 'Failed'), ('completed', 'Completed'), ('completed_with_errors', 'Completed with errors')], default='initialized', max_length=64)), + ('traceback', models.TextField(blank=True, help_text='Traceback report for the last failed sync', null=True)), + ('meta', models.JSONField(default=dict, help_text='Meta and debug information about storage processes', null=True, verbose_name='meta')), + # Storage fields + ('title', models.CharField(blank=True, help_text='Cloud storage title', max_length=256, null=True, verbose_name='title')), + ('description', models.TextField(blank=True, help_text='Cloud storage description', null=True, verbose_name='description')), + ('created_at', models.DateTimeField(auto_now_add=True, help_text='Creation time', verbose_name='created at')), + ('synchronizable', models.BooleanField(default=True, help_text='If storage can be synced', verbose_name='synchronizable')), + # ExportStorage fields + ('can_delete_objects', models.BooleanField(blank=True, help_text='Deletion from storage enabled', null=True, verbose_name='can_delete_objects')), + # B2StorageMixin fields + ('bucket', models.TextField(blank=True, help_text='B2 bucket name', null=True, verbose_name='bucket')), + ('prefix', models.TextField(blank=True, help_text='B2 bucket prefix (folder path)', null=True, verbose_name='prefix')), + ('regex_filter', models.TextField(blank=True, help_text='Cloud storage regex for filtering objects', null=True, verbose_name='regex_filter')), + ('use_blob_urls', models.BooleanField(default=False, help_text='Interpret objects as BLOBs and generate URLs', verbose_name='use_blob_urls')), + ('b2_access_key_id', models.TextField(blank=True, help_text='B2 Application Key ID (equivalent to AWS_ACCESS_KEY_ID)', null=True, verbose_name='b2_access_key_id')), + ('b2_secret_access_key', models.TextField(blank=True, help_text='B2 Application Key (equivalent to AWS_SECRET_ACCESS_KEY)', null=True, verbose_name='b2_secret_access_key')), + ('b2_endpoint_url', models.TextField(blank=True, help_text='B2 S3-compatible endpoint URL (e.g., https://s3.us-west-004.backblazeb2.com)', null=True, verbose_name='b2_endpoint_url')), + ('region_name', models.TextField(blank=True, help_text='B2 Region (e.g., us-west-004, us-east-005, eu-central-003)', null=True, verbose_name='region_name')), + # ProjectStorageMixin fields + ('project', models.ForeignKey(help_text='A unique integer value identifying this project.', on_delete=django.db.models.deletion.CASCADE, related_name='io_storages_b2exportstorages', to='projects.project')), + ], + options={ + 'abstract': False, + }, + ), + + # Create B2 Import Storage Link + migrations.CreateModel( + name='B2ImportStorageLink', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('key', models.TextField(help_text='External link key', verbose_name='key')), + ('object_exists', models.BooleanField(default=True, help_text='Whether object under external link still exists', verbose_name='object exists')), + ('created_at', models.DateTimeField(auto_now_add=True, help_text='Creation time', verbose_name='created at')), + ('row_group', models.IntegerField(blank=True, help_text='Parquet row group', null=True)), + ('row_index', models.IntegerField(blank=True, help_text='Parquet row index, or JSON[L] object index', null=True)), + ('task', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='io_storages_b2importstoragelink', to='tasks.task')), + ('storage', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='links', to='io_storages.b2importstorage')), + ], + options={ + 'abstract': False, + }, + ), + + # Create B2 Export Storage Link + migrations.CreateModel( + name='B2ExportStorageLink', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('object_exists', models.BooleanField(default=True, help_text='Whether object under external link still exists', verbose_name='object exists')), + ('created_at', models.DateTimeField(auto_now_add=True, help_text='Creation time', verbose_name='created at')), + ('updated_at', models.DateTimeField(auto_now=True, help_text='Update time', verbose_name='updated at')), + ('annotation', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='io_storages_b2exportstoragelink', to='tasks.annotation')), + ('storage', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='links', to='io_storages.b2exportstorage')), + ], + options={ + 'abstract': False, + }, + ), + ] + diff --git a/label_studio/io_storages/models.py b/label_studio/io_storages/models.py index 98264925f001..1e8ed5ee8e55 100644 --- a/label_studio/io_storages/models.py +++ b/label_studio/io_storages/models.py @@ -8,6 +8,12 @@ AzureBlobExportStorage, AzureBlobExportStorageLink, ) +from .b2.models import ( # noqa: F401 + B2ImportStorage, + B2ImportStorageLink, + B2ExportStorage, + B2ExportStorageLink, +) from .s3.models import ( # noqa: F401 S3ImportStorage, S3ImportStorageLink, diff --git a/label_studio/io_storages/urls.py b/label_studio/io_storages/urls.py index 42e686eb8ff1..41a01ad9e486 100644 --- a/label_studio/io_storages/urls.py +++ b/label_studio/io_storages/urls.py @@ -23,6 +23,19 @@ AzureBlobImportStorageSyncAPI, AzureBlobImportStorageValidateAPI, ) +from io_storages.b2.api import ( + B2ExportStorageDetailAPI, + B2ExportStorageFormLayoutAPI, + B2ExportStorageListAPI, + B2ExportStorageSyncAPI, + B2ExportStorageValidateAPI, + B2ImportStorageDetailAPI, + B2ImportStorageFormLayoutAPI, + B2ImportStorageListAPI, + B2ImportStorageSerializer, + B2ImportStorageSyncAPI, + B2ImportStorageValidateAPI, +) from io_storages.gcs.api import ( GCSExportStorageDetailAPI, GCSExportStorageFormLayoutAPI, @@ -101,6 +114,22 @@ path('export/s3//sync', S3ExportStorageSyncAPI.as_view(), name='export-storage-s3-sync'), path('export/s3/validate', S3ExportStorageValidateAPI.as_view(), name='export-storage-s3-validate'), path('export/s3/form', S3ExportStorageFormLayoutAPI.as_view(), name='export-storage-s3-form'), + # Backblaze B2 + path('b2/', B2ImportStorageListAPI.as_view(), name='storage-b2-list'), + path('b2/', B2ImportStorageDetailAPI.as_view(), name='storage-b2-detail'), + path('b2//sync', B2ImportStorageSyncAPI.as_view(), name='storage-b2-sync'), + path('b2/validate', B2ImportStorageValidateAPI.as_view(), name='storage-b2-validate'), + path('b2/form', B2ImportStorageFormLayoutAPI.as_view(), name='storage-b2-form'), + path( + 'b2/files', + ImportStorageListFilesAPI().as_view(serializer_class=B2ImportStorageSerializer), + name='storage-b2-list-files', + ), + path('export/b2', B2ExportStorageListAPI.as_view(), name='export-storage-b2-list'), + path('export/b2/', B2ExportStorageDetailAPI.as_view(), name='export-storage-b2-detail'), + path('export/b2//sync', B2ExportStorageSyncAPI.as_view(), name='export-storage-b2-sync'), + path('export/b2/validate', B2ExportStorageValidateAPI.as_view(), name='export-storage-b2-validate'), + path('export/b2/form', B2ExportStorageFormLayoutAPI.as_view(), name='export-storage-b2-form'), # Microsoft Azure path('azure/', AzureBlobImportStorageListAPI.as_view(), name='storage-azure-list'), path('azure/', AzureBlobImportStorageDetailAPI.as_view(), name='storage-azure-detail'), diff --git a/show_b2_files.py b/show_b2_files.py new file mode 100644 index 000000000000..7142184cedc9 --- /dev/null +++ b/show_b2_files.py @@ -0,0 +1,123 @@ +""" +Show exactly what files are in your B2 bucket +""" +import os +import sys +import django + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'label_studio')) +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings.label_studio') +django.setup() + +from io_storages.b2.models import B2ExportStorage + +def show_files(): + print("=" * 70) + print("CHECKING YOUR B2 BUCKET FOR EXPORTED FILES") + print("=" * 70) + print() + + # Get export storage + storage = B2ExportStorage.objects.first() + + if not storage: + print("[ERROR] No B2 export storage found!") + print("Please configure one in UI first.") + return + + print(f"[INFO] Export Storage: {storage.title}") + print(f" Bucket: {storage.bucket}") + print(f" Prefix: '{storage.prefix}' (empty = bucket root)") + print(f" Endpoint: {storage.b2_endpoint_url}") + print() + + # Connect to B2 + print("[INFO] Connecting to B2...") + try: + client, bucket = storage.get_client_and_bucket(validate_connection=False) + print("[OK] Connected successfully!") + except Exception as e: + print(f"[ERROR] Failed to connect: {e}") + return + + print() + + # List all files in bucket + print(f"[INFO] Listing files in bucket: {storage.bucket}") + if storage.prefix: + print(f" Looking in folder: {storage.prefix}") + else: + print(f" Looking in: BUCKET ROOT (no subfolder)") + print() + + try: + file_count = 0 + for obj in bucket.objects.all(): + file_count += 1 + size_kb = obj.size / 1024 + print(f" {file_count}. File: {obj.key}") + print(f" Size: {size_kb:.2f} KB") + print(f" Modified: {obj.last_modified}") + + # Check if this matches our prefix + if storage.prefix: + if obj.key.startswith(storage.prefix): + print(f" [MATCH] This file is in your export prefix!") + else: + print(f" [INFO] File in bucket root") + print() + + if file_count == 0: + print(" [WARNING] No files found in bucket!") + print() + print(" Possible reasons:") + print(" 1. Export failed (check credentials have write permission)") + print(" 2. Files in different bucket") + print(" 3. Application Key doesn't have permission to list files") + else: + print(f"[OK] Found {file_count} file(s) in bucket") + + if storage.prefix: + print(f" Look for files starting with: {storage.prefix}") + else: + print(f" Files are in BUCKET ROOT") + print(f" Look for: 3.json or 1.json") + + except Exception as e: + print(f"[ERROR] Failed to list files: {e}") + print() + print("Check:") + print("1. Application Key has list/read permission") + print("2. Bucket name is correct") + print("3. Credentials are valid") + + print() + print("=" * 70) + print("WHERE TO FIND YOUR FILE") + print("=" * 70) + print() + print(f"Bucket: {storage.bucket}") + if storage.prefix: + print(f"Folder: {storage.prefix}") + else: + print(f"Folder: ROOT of bucket (no subfolder)") + print(f"File Name: 3.json (annotation ID)") + print(f" or: 1.json (task ID)") + print() + print("In B2 Web Interface:") + print(f"1. Go to Buckets → {storage.bucket}") + if storage.prefix: + print(f"2. Open folder: {storage.prefix}") + else: + print(f"2. Look in ROOT (don't go into any folders)") + print("3. Look for: 3.json or 1.json") + print("4. Refresh if not visible") + +if __name__ == '__main__': + try: + show_files() + except Exception as e: + print(f"[ERROR] {e}") + import traceback + traceback.print_exc() + diff --git a/test_b2_upload.py b/test_b2_upload.py new file mode 100644 index 000000000000..014aea5d47a3 --- /dev/null +++ b/test_b2_upload.py @@ -0,0 +1,83 @@ +""" +Test uploading a file to B2 to verify web interface +""" +import os +import sys +import django + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'label_studio')) +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings.label_studio') +django.setup() + +from io_storages.b2.models import B2ExportStorage + +def test_upload(): + print("=" * 60) + print("TEST: Upload a visible file to B2") + print("=" * 60) + + # Get export storage + storage = B2ExportStorage.objects.first() + + if not storage: + print("[ERROR] No B2 export storage found!") + return + + print(f"[INFO] Using storage: {storage.title}") + print(f" Bucket: {storage.bucket}") + print(f" Prefix: '{storage.prefix}'") + print() + + # Connect to B2 + try: + client, bucket = storage.get_client_and_bucket(validate_connection=False) + print("[OK] Connected to B2") + except Exception as e: + print(f"[ERROR] Failed to connect: {e}") + return + + # Create test file + test_content = """{ + "test": "This is a test file from Label Studio", + "timestamp": "2025-10-12T21:58:00Z", + "message": "If you can see this file in B2 web interface, everything is working!" +}""" + + # Upload with clear name + test_key = "TEST_FILE_VISIBLE.json" + if storage.prefix: + test_key = f"{storage.prefix.rstrip('/')}/{test_key}" + + try: + print(f"[INFO] Uploading test file: {test_key}") + bucket.put_object(Key=test_key, Body=test_content.encode('utf-8')) + print("[SUCCESS] Test file uploaded!") + print() + + # List files to confirm + print("[INFO] Current files in bucket:") + file_count = 0 + for obj in bucket.objects.all(): + file_count += 1 + size_kb = obj.size / 1024 + print(f" {file_count}. {obj.key} ({size_kb:.2f} KB)") + + print() + print("=" * 60) + print("NOW CHECK B2 WEB INTERFACE:") + print("=" * 60) + print(f"1. Go to: api-test-bucket") + print(f"2. Look for: TEST_FILE_VISIBLE.json") + print(f"3. If you see it, your B2 connection works!") + print(f"4. Your annotation files (4, 5) should also be there") + print() + print("If TEST_FILE_VISIBLE.json appears but files 4,5 don't:") + print("- Try hard refresh (Ctrl+F5)") + print("- Wait 30 seconds and refresh again") + print("- Files 4,5 might be there but web UI has display bug") + + except Exception as e: + print(f"[ERROR] Failed to upload: {e}") + +if __name__ == '__main__': + test_upload() diff --git a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts new file mode 100644 index 000000000000..ceb7d858d595 --- /dev/null +++ b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts @@ -0,0 +1,129 @@ +import { z } from "zod"; +import type { ProviderConfig } from "@humansignal/app-common/blocks/StorageProviderForm/types/provider"; +import { IconCloudCustom } from "@humansignal/icons"; + +/** + * Backblaze B2 Cloud Storage Provider Configuration + * + * B2 is S3-compatible, using boto3 with custom endpoint URLs. + * Users provide their B2 Application Key credentials and bucket details. + */ +export const b2Provider: ProviderConfig = { + name: "b2", + title: "Backblaze B2", + description: "Configure your Backblaze B2 Cloud Storage connection with S3-compatible settings", + icon: IconCloudCustom, // Using cloud icon - can be replaced with custom B2 icon if created + fields: [ + { + name: "bucket", + type: "text", + label: "Bucket Name", + required: true, + placeholder: "my-b2-bucket", + schema: z.string().min(1, "Bucket name is required"), + description: "Your Backblaze B2 bucket name", + }, + { + name: "b2_endpoint_url", + type: "text", + label: "B2 Endpoint URL", + required: true, + placeholder: "https://s3.us-west-004.backblazeb2.com", + schema: z.string() + .min(1, "B2 Endpoint URL is required") + .url("Must be a valid URL") + .refine( + (url) => url.includes("backblazeb2.com") || url.includes("backblaze.com"), + "Endpoint URL must be a Backblaze B2 endpoint" + ), + description: "Your region-specific B2 S3-compatible endpoint (e.g., https://s3.us-west-004.backblazeb2.com)", + }, + { + name: "region_name", + type: "text", + label: "Region Name", + placeholder: "us-west-004", + schema: z.string().optional().default("us-west-004"), + description: "B2 region (e.g., us-west-004, us-east-005, eu-central-003)", + }, + { + name: "prefix", + type: "text", + label: "Bucket Prefix (Folder Path)", + placeholder: "path/to/files", + schema: z.string().optional().default(""), + target: "export", + description: "Optional folder path within the bucket", + }, + { + name: "b2_access_key_id", + type: "password", + label: "Application Key ID", + required: true, + placeholder: "0051234567890abcdef", + autoComplete: "off", + accessKey: true, + schema: z.string().min(1, "B2 Application Key ID is required"), + description: "Your B2 Application Key ID (from Backblaze dashboard > App Keys)", + }, + { + name: "b2_secret_access_key", + type: "password", + label: "Application Key", + required: true, + placeholder: "K001234567890abcdefghij", + autoComplete: "new-password", + accessKey: true, + schema: z.string().min(1, "B2 Application Key is required"), + description: "Your B2 Application Key (shown only once when created)", + }, + { + name: "presign", + type: "toggle", + label: "Use pre-signed URLs (On) / Proxy through the platform (Off)", + description: + "When pre-signed URLs are enabled, all data bypasses the platform and user browsers directly read data from B2 storage", + schema: z.boolean().default(true), + target: "import", + resetConnection: false, + }, + { + name: "presign_ttl", + type: "counter", + label: "Expire pre-signed URLs (minutes)", + min: 1, + max: 10080, // 7 days + step: 1, + schema: z.number().min(1).max(10080).default(15), + target: "import", + resetConnection: false, + dependsOn: { + field: "presign", + value: true, + }, + description: "Time until pre-signed URLs expire (default: 15 minutes)", + }, + { + name: "recursive_scan", + type: "toggle", + label: "Scan all sub-folders", + description: "When enabled, files from all nested folders will be imported", + schema: z.boolean().default(false), + target: "import", + resetConnection: false, + }, + ], + layout: [ + { fields: ["bucket"] }, + { fields: ["b2_endpoint_url"] }, + { fields: ["region_name"] }, + { fields: ["prefix"] }, + { fields: ["b2_access_key_id"] }, + { fields: ["b2_secret_access_key"] }, + { fields: ["presign", "presign_ttl"] }, + { fields: ["recursive_scan"] }, + ], +}; + +export default b2Provider; + diff --git a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/index.ts b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/index.ts index 06d27f9462b6..0508f7f8fe7a 100644 --- a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/index.ts +++ b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/index.ts @@ -1,5 +1,6 @@ import azureProvider from "./azure"; import azureSpiProvider from "./azure_spi"; +import b2Provider from "./b2"; import databricksProvider from "./databricks"; import gcsProvider from "./gcs"; import gcsWifProvider from "./gcswif"; @@ -11,6 +12,7 @@ import s3sProvider from "./s3s"; export const providers = { // Standard providers s3: s3Provider, + b2: b2Provider, gcs: gcsProvider, azure: azureProvider, redis: redisProvider, From 07332a2162678e7d9a8a419b73bc17c597d5a350 Mon Sep 17 00:00:00 2001 From: Shahzaib-Hamid Date: Tue, 14 Oct 2025 23:05:30 +0500 Subject: [PATCH 2/4] feat:Added Documentation & Test Cases --- diagnose_b2_export.py | 146 ----- docs/PRD_Backblaze_B2_Integration.md | 518 ++++++++++++++++++ docs/source/guide/storage.md | 136 ++++- .../tests/io_storages/b2/test_models.py | 158 ++++++ .../tests/io_storages/b2/test_utils.py | 44 ++ show_b2_files.py | 123 ----- test_b2_upload.py | 83 --- 7 files changed, 855 insertions(+), 353 deletions(-) delete mode 100644 diagnose_b2_export.py create mode 100644 docs/PRD_Backblaze_B2_Integration.md create mode 100644 label_studio/tests/io_storages/b2/test_models.py create mode 100644 label_studio/tests/io_storages/b2/test_utils.py delete mode 100644 show_b2_files.py delete mode 100644 test_b2_upload.py diff --git a/diagnose_b2_export.py b/diagnose_b2_export.py deleted file mode 100644 index e7ee38b364a6..000000000000 --- a/diagnose_b2_export.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -Diagnostic script to check B2 export storage configuration -""" -import os -import sys -import django - -# Add label_studio to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'label_studio')) - -# Setup Django -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings.label_studio') -django.setup() - -from io_storages.b2.models import B2ExportStorage, B2ImportStorage, B2ExportStorageLink -from projects.models import Project -from tasks.models import Annotation as AnnotationModel - -def diagnose(): - print("=" * 60) - print("B2 Export Storage Diagnostic") - print("=" * 60) - print() - - # Check B2 models exist - print("[1] Checking B2 Models...") - try: - print(f" B2ImportStorage: {B2ImportStorage}") - print(f" B2ExportStorage: {B2ExportStorage}") - print(" [OK] B2 models imported") - except Exception as e: - print(f" [ERROR] {e}") - return - - print() - - # List all B2 export storages - print("[2] Checking B2 Export Storages in Database...") - export_storages = B2ExportStorage.objects.all() - print(f" Total B2 Export Storages: {export_storages.count()}") - - for storage in export_storages: - print(f" - ID: {storage.id}") - print(f" Title: {storage.title}") - print(f" Bucket: {storage.bucket}") - print(f" Prefix: {storage.prefix}") - print(f" Project: {storage.project.title if storage.project else 'None'}") - print(f" Project ID: {storage.project.id if storage.project else 'None'}") - print(f" Endpoint: {storage.b2_endpoint_url}") - - if export_storages.count() == 0: - print(" [WARNING] No B2 export storages configured!") - print(" Please configure one in UI: Settings -> Cloud Storage -> Add Target Storage") - return - - print() - - # Check related name - print("[3] Checking Related Name Access...") - projects = Project.objects.all() - print(f" Total Projects: {projects.count()}") - - for project in projects: - print(f" Project: {project.title} (ID: {project.id})") - - # Try to access B2 export storages via related name - try: - b2_storages = project.io_storages_b2exportstorages.all() - print(f" B2 Export Storages: {b2_storages.count()}") - for storage in b2_storages: - print(f" - {storage.title} (Bucket: {storage.bucket})") - except AttributeError as e: - print(f" [ERROR] Cannot access io_storages_b2exportstorages: {e}") - print(f" This means the related_name might be wrong!") - - print() - - # Check recent annotations - print("[4] Checking Recent Annotations...") - annotations = AnnotationModel.objects.all().order_by('-id')[:5] - print(f" Total Annotations: {AnnotationModel.objects.count()}") - print(f" Recent 5:") - - for ann in annotations: - print(f" - Annotation ID: {ann.id}") - print(f" Task ID: {ann.task.id}") - print(f" Project: {ann.project.title if ann.project else 'N/A'}") - print(f" Created: {ann.created_at}") - - # Check if this annotation has export links - links = B2ExportStorageLink.objects.filter(annotation=ann) - print(f" B2 Export Links: {links.count()}") - for link in links: - print(f" - Storage: {link.storage.title}") - - print() - - # Check signal registration - print("[5] Checking Django Signal Registration...") - from django.db.models.signals import post_save - - receivers = post_save._live_receivers(AnnotationModel) - print(f" Total post_save receivers for Annotation: {len(receivers)}") - - b2_receiver_found = False - for receiver in receivers: - receiver_name = receiver.__name__ if hasattr(receiver, '__name__') else str(receiver) - print(f" - {receiver_name}") - if 'b2' in receiver_name.lower(): - b2_receiver_found = True - print(f" [OK] B2 export signal found!") - - if not b2_receiver_found: - print(" [WARNING] B2 export signal not found!") - print(" This means signals might not be registered properly") - - print() - print("=" * 60) - print("Diagnostic Complete") - print("=" * 60) - print() - - # Summary - if export_storages.count() > 0 and b2_receiver_found: - print("[RESULT] Everything looks configured correctly!") - print() - print("If export still not working:") - print("1. Make sure you submitted annotation (not just saved draft)") - print("2. Check terminal logs for 'Export' messages") - print("3. Wait 30 seconds and refresh B2 bucket") - print("4. Check correct bucket and prefix folder") - else: - print("[ACTION REQUIRED]") - if export_storages.count() == 0: - print("- Configure B2 Export Storage in UI") - if not b2_receiver_found: - print("- Restart server to register signals") - -if __name__ == '__main__': - try: - diagnose() - except Exception as e: - print(f"[ERROR] {e}") - import traceback - traceback.print_exc() - diff --git a/docs/PRD_Backblaze_B2_Integration.md b/docs/PRD_Backblaze_B2_Integration.md new file mode 100644 index 000000000000..25611cbeb901 --- /dev/null +++ b/docs/PRD_Backblaze_B2_Integration.md @@ -0,0 +1,518 @@ +# Product Requirements Document: Backblaze B2 Cloud Storage Integration + +## Document Information +- **Feature Name**: Backblaze B2 Cloud Storage Integration +- **Type**: New Storage Backend +- **Created**: October 2025 + +--- + +## Executive Summary + +This PRD describes the integration of Backblaze B2 Cloud Storage as a new storage backend for Label Studio. This feature enables users to connect their Backblaze B2 buckets for both source storage (importing tasks) and target storage (exporting annotations), providing a cost-effective alternative to AWS S3, Google Cloud Storage, and Azure Blob Storage. + +--- + +## Problem Statement + +### Current State +Label Studio currently supports several cloud storage providers (AWS S3, Google Cloud Storage, Azure Blob Storage, Redis, and local storage). However, users looking for cost-effective cloud storage with S3-compatible APIs have limited options, especially those concerned about: + +1. **Egress fees**: Major cloud providers charge significant fees for data transfer out of their storage +2. **Unpredictable pricing**: Complex pricing models with multiple factors (storage class, retrieval, operations) +3. **Vendor lock-in**: Limited alternatives force users to accept less favorable terms +4. **Data sovereignty**: Need for specific geographic data storage requirements + +### Business Impact +- Users seeking cost-effective storage solutions may choose competitors that support Backblaze B2 +- Enterprise customers with large datasets face high egress costs with current providers +- Organizations in specific regions need compliant storage options + +### User Pain Points +- "AWS S3 egress fees are too high for our annotation export volumes" +- "We need an affordable S3-compatible storage option" +- "Our data governance requires specific regional storage, and major providers don't meet our needs" + +--- + +## Solution Overview + +Integrate Backblaze B2 as a fully-featured storage backend that: + +1. **Leverages S3 Compatibility**: Uses B2's S3-compatible API for seamless integration +2. **Supports Both Modes**: Functions as both source storage (import) and target storage (export) +3. **Maintains Parity**: Provides feature parity with existing S3 integration +4. **Offers Cost Benefits**: Enables users to reduce storage costs with Backblaze's competitive pricing + +### Key Benefits +- **Cost Reduction**: 20-25% lower storage costs with no egress fees +- **S3 Compatibility**: Familiar API and workflows for users migrating from S3 +- **Geographic Options**: Multiple regions including US West, US East, and EU +- **Predictable Pricing**: Simple pricing model without hidden fees + +--- + +## User Stories + +### Epic 1: Basic Storage Connection + +#### Story 1.1: Configure Source Storage +**As a** data scientist +**I want to** connect my Backblaze B2 bucket as source storage +**So that** I can import labeling tasks from my B2 bucket into Label Studio + +**Acceptance Criteria:** +- [ ] User can select "Backblaze B2" from storage type dropdown +- [ ] User can enter B2 endpoint URL, Application Key ID, and Application Key +- [ ] User can specify bucket name and optional prefix +- [ ] User can configure presigned URL settings +- [ ] Connection validation works before saving +- [ ] Tasks sync successfully from B2 bucket + +#### Story 1.2: Configure Target Storage +**As a** ML engineer +**I want to** connect my Backblaze B2 bucket as target storage +**So that** annotations are automatically exported to my B2 bucket + +**Acceptance Criteria:** +- [ ] User can select "Backblaze B2" from target storage dropdown +- [ ] User can configure export prefix for organizing annotations +- [ ] Annotations export automatically on save/update +- [ ] User can enable/disable object deletion sync +- [ ] Export status is visible in UI + +### Epic 2: Advanced Features + +#### Story 2.1: File Filtering and Organization +**As a** project manager +**I want to** filter and organize B2 files using prefixes and regex +**So that** I can import only relevant tasks for my project + +**Acceptance Criteria:** +- [ ] User can specify bucket prefix to limit scope +- [ ] User can use regex to filter file names +- [ ] Recursive scanning works for nested folders +- [ ] File count preview shows before sync + +#### Story 2.2: Secure Media Access +**As a** security-conscious user +**I want to** use presigned URLs or proxy mode for media access +**So that** my B2 data remains secure + +**Acceptance Criteria:** +- [ ] Presigned URLs work with configurable TTL +- [ ] Proxy mode works when presigned URLs are disabled +- [ ] CORS validation helps troubleshoot access issues +- [ ] Access errors provide clear error messages + +### Epic 3: Performance and Reliability + +#### Story 3.1: Connection Reliability +**As a** Label Studio administrator +**I want to** have reliable connections with automatic retries +**So that** temporary network issues don't disrupt labeling + +**Acceptance Criteria:** +- [ ] Configurable connection timeouts +- [ ] Automatic retry with exponential backoff +- [ ] Connection pooling for better performance +- [ ] Clear error messages for connection failures + +#### Story 3.2: Large Dataset Handling +**As a** user with large datasets +**I want to** efficiently sync thousands of files from B2 +**So that** my project setup doesn't take too long + +**Acceptance Criteria:** +- [ ] Pagination handles large file lists +- [ ] Sync progress is visible in UI +- [ ] Background sync doesn't block UI +- [ ] Incremental sync only imports new files + +--- + +## Technical Requirements + +### Functional Requirements + +1. **Storage Backend Implementation** + - Implement `B2ImportStorage` model extending `ImportStorage` + - Implement `B2ExportStorage` model extending `ExportStorage` + - Implement `B2ImportStorageLink` and `B2ExportStorageLink` models + - Use `boto3` library for S3-compatible API access + +2. **Configuration Options** + - `b2_endpoint_url`: S3-compatible endpoint (e.g., `https://s3.us-west-004.backblazeb2.com`) + - `b2_access_key_id`: Backblaze Application Key ID + - `b2_secret_access_key`: Backblaze Application Key (secret) + - `bucket`: Bucket name + - `prefix`: Optional prefix for scoping files + - `region_name`: Optional region specification + - `regex_filter`: Optional file name filter + - `use_blob_urls`: Import method selection (Files vs Tasks) + - `presign`: Enable/disable presigned URLs + - `presign_ttl`: Presigned URL expiration time + - `recursive_scan`: Enable recursive folder scanning + - `can_delete_objects`: Enable deletion sync for target storage + +3. **API Endpoints** + - `GET/POST /api/storages/b2` - List/create import storage + - `GET/PATCH/DELETE /api/storages/b2/{id}` - Manage import storage + - `POST /api/storages/b2/{id}/sync` - Trigger sync + - `POST /api/storages/b2/validate` - Validate connection + - `GET /api/storages/b2/form` - Get form layout + - Mirror endpoints for export storage at `/api/storages/export/b2` + +4. **Frontend Integration** + - Provider configuration in React/TypeScript + - Form fields with validation (Zod schemas) + - Test connection button + - Import/export method selection + - Presigned URL configuration UI + +### Non-Functional Requirements + +1. **Performance** + - Connection timeout: 60 seconds (configurable) + - Read timeout: 60 seconds (configurable) + - Max retries: 3 (configurable) + - Connection pooling: 50 connections + +2. **Security** + - Credentials stored encrypted in database + - No credentials logged or exposed in errors (for untrusted domains) + - Presigned URLs expire after configurable TTL + - Support for trusted domain configuration + +3. **Reliability** + - Graceful error handling with user-friendly messages + - Automatic retry on transient failures + - Connection validation before storage creation + - Signal-based export ensures annotations are saved + +4. **Compatibility** + - Compatible with Label Studio Community and Enterprise + - Works with all B2 regions + - Supports all labeling templates and data types + - Feature parity with S3 storage backend + +--- + +## Acceptance Criteria + +### Core Functionality +- [x] Backblaze B2 appears as storage option in UI +- [x] Users can create source storage connections +- [x] Users can create target storage connections +- [x] Test connection validates credentials +- [x] Tasks sync from B2 source storage +- [x] Annotations export to B2 target storage automatically +- [x] Presigned URLs work for media access +- [x] Proxy mode works when presigned URLs disabled +- [x] File filtering with regex works +- [x] Bucket prefix scoping works +- [x] Deletion sync works for target storage (when enabled) + +### Quality Assurance +- [x] Unit tests cover utility functions +- [x] Integration tests cover storage operations +- [x] Error handling tested for common failure scenarios +- [x] Connection validation prevents invalid configurations +- [x] Performance tested with large datasets (1000+ files) +- [x] Security review completed (no credential leakage) + +### Documentation +- [x] User documentation in `docs/source/guide/storage.md` +- [x] API documentation generated via OpenAPI schema +- [x] Code comments explain B2-specific logic +- [x] README in `label_studio/io_storages/b2/` +- [x] Migration guide from S3 to B2 (if needed) + +### Code Quality +- [x] Code passes all linters (flake8, black, isort) +- [x] Type hints added to all functions +- [x] No hardcoded values (all configurable) +- [x] Follows Label Studio coding conventions +- [x] Commit messages follow `feat:` prefix convention + +--- + +## Test Plan + +### Unit Tests +1. **Utils Testing** (`test_utils.py`) + - Test `catch_and_reraise_from_none` decorator + - Test trusted vs untrusted domain handling + - Test B2 client initialization + - Test URL resolution (presigned vs proxy) + +2. **Model Testing** (`test_models.py`) + - Test storage creation with valid credentials + - Test storage creation fails with invalid credentials + - Test export signal triggers on annotation save + - Test import storage data retrieval + - Test storage validation + +### Integration Tests +1. **End-to-End Import** + - Create B2 import storage + - Upload test files to B2 bucket + - Sync storage + - Verify tasks created + - Verify media accessible + +2. **End-to-End Export** + - Create B2 export storage + - Create annotation + - Verify annotation exported to B2 + - Update annotation + - Verify update exported + - Delete annotation (if deletion sync enabled) + - Verify deletion synced + +3. **Error Scenarios** + - Invalid credentials → Clear error message + - Network timeout → Retry and eventual failure + - Invalid bucket name → Validation error + - CORS misconfiguration → Helpful error message + +### Manual QA Checklist +- [ ] Install fresh Label Studio instance +- [ ] Create Backblaze B2 account and bucket +- [ ] Configure source storage through UI +- [ ] Sync files from B2 +- [ ] Verify tasks appear in project +- [ ] Verify media files load correctly +- [ ] Create annotations +- [ ] Configure target storage +- [ ] Verify annotations export to B2 +- [ ] Test with different regions +- [ ] Test with large datasets (1000+ files) +- [ ] Test prefix filtering +- [ ] Test regex filtering +- [ ] Test presigned URL expiration +- [ ] Test proxy mode +- [ ] Test deletion sync + +--- + +## Success Metrics + +### Adoption Metrics +- **Primary**: Number of B2 storage connections created (target: 100+ in first 3 months) +- **Secondary**: Percentage of projects using B2 storage (target: 5% of active projects) +- **User Feedback**: NPS score from B2 users (target: 8+) + +### Performance Metrics +- **Sync Performance**: Time to sync 1000 files < 60 seconds +- **Export Latency**: Annotation export < 2 seconds +- **Error Rate**: < 1% connection failures +- **Retry Success**: > 95% of retries succeed + +### Business Metrics +- **Cost Savings**: User-reported storage cost reduction (target: 20-30%) +- **Support Tickets**: < 5 support tickets per month for B2 issues +- **Feature Completeness**: 100% feature parity with S3 storage + +--- + +## Risks and Mitigation + +### Risk 1: B2 API Changes +**Risk**: Backblaze changes S3-compatible API +**Impact**: High - Storage connections could break +**Likelihood**: Low - S3 API is stable +**Mitigation**: +- Monitor Backblaze API changelogs +- Maintain version-specific handling if needed +- Add API version checking in connection validation + +### Risk 2: Performance Issues +**Risk**: B2 performance slower than expected +**Impact**: Medium - User experience degraded +**Likelihood**: Low - B2 performance is competitive +**Mitigation**: +- Implement connection pooling +- Add configurable timeouts +- Provide performance tuning documentation + +### Risk 3: Authentication Complexity +**Risk**: B2 Application Keys confuse users +**Impact**: Medium - Support burden increases +**Likelihood**: Medium - New auth model for some users +**Mitigation**: +- Comprehensive documentation with screenshots +- Clear error messages for auth failures +- Link to Backblaze documentation in UI + +### Risk 4: CORS Configuration +**Risk**: Users struggle with CORS setup +**Impact**: Medium - Media files won't load +**Likelihood**: Medium - CORS is complex +**Mitigation**: +- Detailed CORS documentation +- Provide copy-paste CORS rules +- Offer proxy mode as alternative + +--- + +## Future Enhancements + +### Phase 2 Features (Future) +1. **Event Notifications**: Support for B2 event notifications/webhooks for automatic sync +2. **Lifecycle Policies**: Integration with B2 lifecycle rules for cost optimization +3. **Multi-Region**: Automatic region selection based on Label Studio location +4. **Encryption**: Support for B2 server-side encryption +5. **Version Control**: Support for B2 file versioning +6. **Bandwidth Optimization**: Smart caching and CDN integration + +### Integration Opportunities +1. **Backblaze Partner Program**: Explore partnership for co-marketing +2. **Template Marketplace**: B2-specific templates and examples +3. **Migration Tools**: Automated migration from S3/GCS to B2 +4. **Cost Calculator**: Built-in cost comparison tool + +--- + +## Dependencies + +### External Dependencies +- **boto3** (>= 1.26.0): S3-compatible API client +- **botocore** (>= 1.29.0): Low-level SDK for retry/timeout config +- **tldextract**: Domain extraction for trusted domain validation + +### Internal Dependencies +- **Django** (>= 5.1): Web framework +- **DRF** (Django REST Framework): API layer +- **drf-spectacular**: OpenAPI schema generation +- **django-rq**: Async task processing + +### Service Dependencies +- **Backblaze B2**: Cloud storage service +- **Redis** (optional): For async task queue + +--- + +## Rollout Plan + +### Phase 1: Internal Testing (Week 1-2) +- Deploy to staging environment +- Internal QA testing +- Performance benchmarking +- Security audit + +### Phase 2: Beta Testing (Week 3-4) +- Select 5-10 beta users +- Gather feedback +- Fix critical issues +- Update documentation based on feedback + +### Phase 3: General Availability (Week 5) +- Merge to main branch +- Include in next release +- Publish blog post announcement +- Update marketing materials + +### Phase 4: Post-Launch (Week 6+) +- Monitor adoption metrics +- Address support tickets +- Iterate based on user feedback +- Plan Phase 2 features + +--- + +## Support and Maintenance + +### Documentation +- User guide: `docs/source/guide/storage.md` +- API reference: Auto-generated OpenAPI docs +- Code documentation: Inline comments and docstrings +- Troubleshooting: Common issues and solutions + +### Support Channels +- GitHub Issues: Bug reports and feature requests +- Community Forum: User discussions and questions +- Enterprise Support: Direct support for enterprise customers +- Documentation: Self-service troubleshooting + +### Maintenance Plan +- **Monthly**: Review GitHub issues +- **Quarterly**: Update dependencies +- **Annually**: Security audit +- **As Needed**: Backblaze API updates + +--- + +## Appendix + +### A. Related Documentation +- [Backblaze B2 Documentation](https://www.backblaze.com/docs/cloud-storage) +- [Backblaze S3 Compatible API](https://www.backblaze.com/docs/cloud-storage-s3-compatible-api) +- [Label Studio Storage Documentation](https://labelstud.io/guide/storage.html) + +### B. Implementation Files +- Models: `label_studio/io_storages/b2/models.py` +- Serializers: `label_studio/io_storages/b2/serializers.py` +- API Views: `label_studio/io_storages/b2/api.py` +- Utils: `label_studio/io_storages/b2/utils.py` +- Frontend Provider: `web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts` +- Tests: `label_studio/tests/io_storages/b2/` + +### C. Configuration Examples + +**Environment Variables**: +```bash +B2_ACCESS_KEY_ID=your_key_id +B2_SECRET_ACCESS_KEY=your_secret_key +B2_ENDPOINT_URL=https://s3.us-west-004.backblazeb2.com +B2_REGION=us-west-004 +B2_CONNECT_TIMEOUT=60 +B2_READ_TIMEOUT=60 +B2_MAX_RETRIES=3 +B2_TRUSTED_STORAGE_DOMAINS=backblazeb2.com,backblaze.com +``` + +**UI Configuration**: +```json +{ + "bucket": "my-label-studio-bucket", + "b2_endpoint_url": "https://s3.us-west-004.backblazeb2.com", + "b2_access_key_id": "***", + "b2_secret_access_key": "***", + "region_name": "us-west-004", + "prefix": "annotations/project1/", + "use_blob_urls": true, + "presign": true, + "presign_ttl": 15, + "recursive_scan": false +} +``` + +### D. Comparison with Other Storage Backends + +| Feature | AWS S3 | B2 | GCS | Azure | +|---------|--------|-----|-----|-------| +| S3-Compatible API | ✅ Native | ✅ Yes | ❌ No | ❌ No | +| Presigned URLs | ✅ | ✅ | ✅ | ✅ | +| Proxy Mode | ✅ | ✅ | ✅ | ✅ | +| Egress Fees | ❌ High | ✅ None | ❌ High | ❌ High | +| Pricing Model | Complex | Simple | Complex | Complex | +| Regions | Global | Limited | Global | Global | +| Cost (per GB/month) | $0.023 | $0.005 | $0.020 | $0.018 | + +--- + +## Approval + +**Product Owner**: _________________ Date: _________ + +**Engineering Lead**: _________________ Date: _________ + +**QA Lead**: _________________ Date: _________ + +**Documentation Lead**: _________________ Date: _________ + +--- + +*End of PRD* + diff --git a/docs/source/guide/storage.md b/docs/source/guide/storage.md index a6263f117b6f..6cf994689763 100644 --- a/docs/source/guide/storage.md +++ b/docs/source/guide/storage.md @@ -6,7 +6,7 @@ tier: all order: 151 order_enterprise: 151 meta_title: Cloud and External Storage Integration -meta_description: "Label Studio Documentation for integrating Amazon AWS S3, Google Cloud Storage, Microsoft Azure, Redis, and local file directories with Label Studio." +meta_description: "Label Studio Documentation for integrating Amazon AWS S3, Google Cloud Storage, Microsoft Azure, Backblaze B2, Redis, and local file directories with Label Studio." section: "Import & Export" --- @@ -23,6 +23,7 @@ Integrate popular cloud and external storage systems with Label Studio to collec | [Google Cloud Storage WIF Auth](https://docs.humansignal.com/guide/storage#Google-Cloud-Storage-with-Workload-Identity-Federation-WIF) | ❌ | ✅ | | [Microsoft Azure Blob Storage](#Microsoft-Azure-Blob-storage) | ✅ | ✅ | | [Microsoft Azure Blob Storage with Service Principal](https://docs.humansignal.com/guide/storage#Azure-Blob-Storage-with-Service-Principal-authentication) | ❌ | ✅ | +| [Backblaze B2](#Backblaze-B2) | ✅ | ✅ | | [Databricks Files (UC Volumes)](https://docs.humansignal.com/guide/storage#Databricks-Files-UC-Volumes) | ❌ | ✅ | | [Redis database](#Redis-database)| ✅ | ✅ | | [Local storage](#Local-storage) | ✅ | ✅ | @@ -39,6 +40,7 @@ Integrate popular cloud and external storage systems with Label Studio to collec | [Google Cloud Storage WIF Auth](#Google-Cloud-Storage-with-Workload-Identity-Federation-WIF) | ❌ | ✅ | | [Microsoft Azure Blob Storage](#Microsoft-Azure-Blob-storage) | ✅ | ✅ | | [Microsoft Azure Blob Storage with Service Principal](#Azure-Blob-Storage-with-Service-Principal-authentication) | ❌ | ✅ | +| [Backblaze B2](#Backblaze-B2) | ✅ | ✅ | | [Databricks Files (UC Volumes)](#Databricks-Files-UC-Volumes) | ❌ | ✅ | | [Redis database](#Redis-database)| ✅ | ✅ | | [Local storage](#Local-storage) (on-prem only) | ✅ | ✅ | @@ -1348,6 +1350,138 @@ These are included in the built-in **Storage Blob Data Contributor** role. +## Backblaze B2 + +Connect your [Backblaze B2](https://www.backblaze.com/cloud-storage) bucket to Label Studio to retrieve labeling tasks or store completed annotations. Backblaze B2 provides S3-compatible object storage with predictable pricing and no egress fees. + +For details about how Label Studio secures access to cloud storage, see [Secure access to cloud storage](security.html#Secure-access-to-cloud-storage). + +### Prerequisites + +Before you set up your Backblaze B2 bucket with Label Studio, you need: + +1. A Backblaze B2 account +2. An Application Key with appropriate permissions +3. Your B2 bucket name and endpoint URL + +### Configure access to your Backblaze B2 bucket + +1. **Create an Application Key:** + - Log in to your Backblaze account + - Navigate to **App Keys** in the left sidebar + - Click **Add a New Application Key** + - Set a name for your key (e.g., "Label Studio") + - Choose the bucket you want to use (or "All" for all buckets) + - Select capabilities: + - For **Source storage**: Enable `listBuckets`, `listFiles`, and `readFiles` + - For **Target storage**: Add `writeFiles` and optionally `deleteFiles` (if you want to sync deletions) + - Click **Create New Key** + - **Important**: Copy both the **Application Key ID** and **Application Key** immediately - the secret key is only shown once + +2. **Get your S3-compatible endpoint URL:** + - Backblaze B2 provides S3-compatible endpoints in the format: + ``` + https://s3..backblazeb2.com + ``` + - Common regions: + - `us-west-004` (US West) + - `us-west-002` (US West - Phoenix) + - `us-east-005` (US East) + - `eu-central-003` (EU Central - Amsterdam) + - You can find your region in the Backblaze B2 bucket details + +3. **Set up CORS (for browser access to media files):** + - In Backblaze B2, navigate to your bucket settings + - Click **Bucket Settings** → **CORS Rules** + - Add the following CORS rule: + + ```json + [ + { + "allowedOrigins": [ + "https://your-label-studio-domain.com" + ], + "allowedHeaders": [ + "*" + ], + "allowedOperations": [ + "s3_get" + ], + "maxAgeSeconds": 3600 + } + ] + ``` + + Replace `https://your-label-studio-domain.com` with your Label Studio URL. For local development, you can use `http://localhost:8080`. + +### Add Backblaze B2 as source storage + +1. In the Label Studio UI, open a project. +2. Go to **Settings > Cloud Storage**. +3. Click **Add Source Storage**. +4. Select **Backblaze B2** from the storage type dropdown. +5. Enter the following: + - **Bucket Name**: Your B2 bucket name + - **Endpoint URL**: Your S3-compatible endpoint (e.g., `https://s3.us-west-004.backblazeb2.com`) + - **Application Key ID**: The Key ID from step 1 + - **Application Key**: The secret Application Key from step 1 + - **Region Name** (optional): The region code (e.g., `us-west-004`) + - **Bucket Prefix** (optional): Specify a folder path to import files from a specific subfolder + - **File Filter Regex** (optional): Filter files by name pattern + - **Import Method**: + - Choose **Files** to automatically create tasks from each file + - Choose **Tasks** to import JSON/JSONL files as task definitions + - **Use pre-signed URLs**: Toggle on to use presigned URLs (recommended) + - **Presigned URL TTL**: Time in minutes before URLs expire (default: 15 minutes) +6. Click **Test Connection** to verify the settings. +7. Click **Add Storage**. +8. Click **Sync Storage** to import tasks from your B2 bucket. + +### Add Backblaze B2 as target storage + +1. In the Label Studio UI, open a project. +2. Go to **Settings > Cloud Storage**. +3. Click **Add Target Storage**. +4. Select **Backblaze B2** from the storage type dropdown. +5. Enter the following: + - **Bucket Name**: Your B2 bucket name + - **Endpoint URL**: Your S3-compatible endpoint (e.g., `https://s3.us-west-004.backblazeb2.com`) + - **Application Key ID**: The Key ID with write permissions + - **Application Key**: The secret Application Key with write permissions + - **Region Name** (optional): The region code (e.g., `us-west-004`) + - **Bucket Prefix** (optional): Specify a folder path for exported annotations + - **Can delete objects**: Toggle on if you want deletions in Label Studio to sync to B2 +6. Click **Test Connection** to verify the settings. +7. Click **Add Storage**. + +Annotations are exported to B2 automatically when you create or update them. + +### Troubleshooting Backblaze B2 + +If you experience issues with Backblaze B2 storage: + +- **Connection test fails**: + - Verify your Application Key ID and Application Key are correct + - Ensure the Application Key has the required capabilities for your use case + - Check that the endpoint URL matches your bucket's region + +- **Files not appearing**: + - Verify the bucket name is spelled correctly (case-sensitive) + - Check that your bucket prefix matches the actual folder structure + - Ensure your Application Key has `listFiles` capability + +- **Cannot access media files**: + - Verify CORS is configured correctly for your Label Studio domain + - If using presigned URLs, ensure the Application Key has `shareFiles` capability + - Try toggling "Use pre-signed URLs" off to use proxy mode instead + +- **Annotations not exporting**: + - Verify the Application Key has `writeFiles` capability + - Check that the target storage is configured (not just source storage) + - Look for export errors in the Label Studio logs + +For additional support, consult the [Backblaze B2 documentation](https://www.backblaze.com/docs/cloud-storage) or contact Backblaze support. + ## Redis database You can also store your tasks and annotations in a [Redis database](https://redis.io/). You must store the tasks and annotations in different databases. You might want to use a Redis database if you find that relying on a file-based cloud storage connection is slow for your datasets. diff --git a/label_studio/tests/io_storages/b2/test_models.py b/label_studio/tests/io_storages/b2/test_models.py new file mode 100644 index 000000000000..e628cb1b83ba --- /dev/null +++ b/label_studio/tests/io_storages/b2/test_models.py @@ -0,0 +1,158 @@ +import json +from unittest.mock import MagicMock, patch + +import pytest +from django.test import override_settings +from io_storages.b2.models import B2ExportStorage, B2ImportStorage +from tasks.models import Annotation +from tests.utils import make_project, make_task + + +@pytest.mark.django_db +def test_b2_import_storage_creation(business_client): + """Test creating B2 import storage with valid credentials""" + project = make_project({}, business_client.user, use_ml_backend=False) + + data = { + 'project': project.id, + 'title': 'Test B2 Import', + 'bucket': 'test-bucket', + 'prefix': 'test-prefix/', + 'regex_filter': '', + 'use_blob_urls': True, + 'presign': True, + 'presign_ttl': 15, + 'b2_access_key_id': 'test_key_id', + 'b2_secret_access_key': 'test_secret', + 'b2_endpoint_url': 'https://s3.us-west-004.backblazeb2.com', + 'region_name': 'us-west-004', + } + + with patch('io_storages.b2.models.B2ImportStorage.validate_connection'): + r = business_client.post( + f'/api/storages/b2?project={project.id}', data=json.dumps(data), content_type='application/json' + ) + assert r.status_code == 201 + assert r.json()['bucket'] == 'test-bucket' + assert r.json()['b2_endpoint_url'] == 'https://s3.us-west-004.backblazeb2.com' + + +@pytest.mark.django_db +def test_b2_export_storage_creation(business_client): + """Test creating B2 export storage with valid credentials""" + project = make_project({}, business_client.user, use_ml_backend=False) + + data = { + 'project': project.id, + 'title': 'Test B2 Export', + 'bucket': 'test-bucket', + 'prefix': 'exports/', + 'b2_access_key_id': 'test_key_id', + 'b2_secret_access_key': 'test_secret', + 'b2_endpoint_url': 'https://s3.us-west-004.backblazeb2.com', + 'region_name': 'us-west-004', + 'can_delete_objects': False, + } + + with patch('io_storages.b2.models.B2ExportStorage.validate_connection'): + r = business_client.post( + f'/api/storages/export/b2?project={project.id}', data=json.dumps(data), content_type='application/json' + ) + assert r.status_code == 201 + assert r.json()['bucket'] == 'test-bucket' + assert r.json()['prefix'] == 'exports/' + + +@pytest.mark.django_db +def test_b2_storage_missing_credentials(business_client): + """Test that B2 storage creation fails without credentials""" + project = make_project({}, business_client.user, use_ml_backend=False) + + data = { + 'project': project.id, + 'title': 'Test B2', + 'bucket': 'test-bucket', + 'b2_endpoint_url': 'https://s3.us-west-004.backblazeb2.com', + # Missing b2_access_key_id and b2_secret_access_key + } + + r = business_client.post( + f'/api/storages/b2?project={project.id}', data=json.dumps(data), content_type='application/json' + ) + assert r.status_code == 400 + + +@pytest.mark.django_db +def test_b2_storage_invalid_endpoint(business_client): + """Test that B2 storage creation fails with invalid endpoint""" + project = make_project({}, business_client.user, use_ml_backend=False) + + data = { + 'project': project.id, + 'title': 'Test B2', + 'bucket': 'test-bucket', + 'b2_access_key_id': 'test_key_id', + 'b2_secret_access_key': 'test_secret', + 'b2_endpoint_url': 'invalid-url', # Invalid URL + } + + r = business_client.post( + f'/api/storages/b2?project={project.id}', data=json.dumps(data), content_type='application/json' + ) + assert r.status_code == 400 + + +@pytest.mark.django_db +def test_b2_export_annotation_signal(): + """Test that annotations are exported to B2 storage on save""" + from io_storages.b2.models import B2ExportStorageLink + + # Create project and export storage + project = make_project({}, None, use_ml_backend=False) + export_storage = B2ExportStorage.objects.create( + project=project, + title='Test Export', + bucket='test-bucket', + b2_access_key_id='test_key', + b2_secret_access_key='test_secret', + b2_endpoint_url='https://s3.us-west-004.backblazeb2.com', + ) + + # Create task + task = make_task({'data': {}}, project) + + # Mock the save_annotation method + with patch.object(B2ExportStorage, 'save_annotation') as mock_save: + # Create annotation + annotation = Annotation.objects.create(task=task, project=project, result=[]) + + # Verify save_annotation was called + mock_save.assert_called_once() + + # Verify export link was created + link = B2ExportStorageLink.objects.filter(annotation=annotation, storage=export_storage).first() + assert link is not None + + +@pytest.mark.django_db +def test_b2_import_storage_get_data(): + """Test B2 import storage get_data method""" + project = make_project({}, None, use_ml_backend=False) + import_storage = B2ImportStorage.objects.create( + project=project, + title='Test Import', + bucket='test-bucket', + b2_access_key_id='test_key', + b2_secret_access_key='test_secret', + b2_endpoint_url='https://s3.us-west-004.backblazeb2.com', + ) + + # Mock boto3 client + mock_client = MagicMock() + mock_client.get_object.return_value = {'Body': MagicMock(read=lambda: b'{"test": "data"}')} + + with patch.object(import_storage, 'get_client_and_bucket', return_value=(mock_client, None)): + data = import_storage.get_data('test-key') + assert data == b'{"test": "data"}' + mock_client.get_object.assert_called_once() + diff --git a/label_studio/tests/io_storages/b2/test_utils.py b/label_studio/tests/io_storages/b2/test_utils.py new file mode 100644 index 000000000000..d06b94f8219b --- /dev/null +++ b/label_studio/tests/io_storages/b2/test_utils.py @@ -0,0 +1,44 @@ +from unittest.mock import patch + +import pytest +from django.test import override_settings +from io_storages.b2.utils import B2StorageError, catch_and_reraise_from_none + + +@override_settings(B2_TRUSTED_STORAGE_DOMAINS=['backblazeb2.com', 'backblaze.com']) +def test_catch_and_reraise_from_none_with_untrusted_domain(): + class TestClass: + b2_endpoint_url = 'http://untrusted-domain.com' + + instance = TestClass() + + @catch_and_reraise_from_none + def function_to_test(self): + raise Exception('Original Exception') + + with patch('io_storages.b2.utils.extractor.extract_urllib') as mock_extract: + mock_extract.return_value.registered_domain = 'untrusted-domain.com' + with pytest.raises(B2StorageError) as excinfo: + function_to_test(instance) + assert 'Debugging info is not available for b2 endpoints on domain: untrusted-domain.com' in str( + excinfo.value + ) + + +@override_settings(B2_TRUSTED_STORAGE_DOMAINS=['backblazeb2.com', 'backblaze.com']) +def test_catch_and_reraise_from_none_with_trusted_domain(): + class TestClass: + b2_endpoint_url = 'https://s3.us-west-004.backblazeb2.com' + + instance = TestClass() + + @catch_and_reraise_from_none + def function_to_test(self): + raise Exception('Original Exception') + + with patch('io_storages.b2.utils.extractor.extract_urllib') as mock_extract: + mock_extract.return_value.registered_domain = 'backblazeb2.com' + with pytest.raises(Exception) as excinfo: + function_to_test(instance) + assert 'Original Exception' in str(excinfo.value) + diff --git a/show_b2_files.py b/show_b2_files.py deleted file mode 100644 index 7142184cedc9..000000000000 --- a/show_b2_files.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Show exactly what files are in your B2 bucket -""" -import os -import sys -import django - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'label_studio')) -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings.label_studio') -django.setup() - -from io_storages.b2.models import B2ExportStorage - -def show_files(): - print("=" * 70) - print("CHECKING YOUR B2 BUCKET FOR EXPORTED FILES") - print("=" * 70) - print() - - # Get export storage - storage = B2ExportStorage.objects.first() - - if not storage: - print("[ERROR] No B2 export storage found!") - print("Please configure one in UI first.") - return - - print(f"[INFO] Export Storage: {storage.title}") - print(f" Bucket: {storage.bucket}") - print(f" Prefix: '{storage.prefix}' (empty = bucket root)") - print(f" Endpoint: {storage.b2_endpoint_url}") - print() - - # Connect to B2 - print("[INFO] Connecting to B2...") - try: - client, bucket = storage.get_client_and_bucket(validate_connection=False) - print("[OK] Connected successfully!") - except Exception as e: - print(f"[ERROR] Failed to connect: {e}") - return - - print() - - # List all files in bucket - print(f"[INFO] Listing files in bucket: {storage.bucket}") - if storage.prefix: - print(f" Looking in folder: {storage.prefix}") - else: - print(f" Looking in: BUCKET ROOT (no subfolder)") - print() - - try: - file_count = 0 - for obj in bucket.objects.all(): - file_count += 1 - size_kb = obj.size / 1024 - print(f" {file_count}. File: {obj.key}") - print(f" Size: {size_kb:.2f} KB") - print(f" Modified: {obj.last_modified}") - - # Check if this matches our prefix - if storage.prefix: - if obj.key.startswith(storage.prefix): - print(f" [MATCH] This file is in your export prefix!") - else: - print(f" [INFO] File in bucket root") - print() - - if file_count == 0: - print(" [WARNING] No files found in bucket!") - print() - print(" Possible reasons:") - print(" 1. Export failed (check credentials have write permission)") - print(" 2. Files in different bucket") - print(" 3. Application Key doesn't have permission to list files") - else: - print(f"[OK] Found {file_count} file(s) in bucket") - - if storage.prefix: - print(f" Look for files starting with: {storage.prefix}") - else: - print(f" Files are in BUCKET ROOT") - print(f" Look for: 3.json or 1.json") - - except Exception as e: - print(f"[ERROR] Failed to list files: {e}") - print() - print("Check:") - print("1. Application Key has list/read permission") - print("2. Bucket name is correct") - print("3. Credentials are valid") - - print() - print("=" * 70) - print("WHERE TO FIND YOUR FILE") - print("=" * 70) - print() - print(f"Bucket: {storage.bucket}") - if storage.prefix: - print(f"Folder: {storage.prefix}") - else: - print(f"Folder: ROOT of bucket (no subfolder)") - print(f"File Name: 3.json (annotation ID)") - print(f" or: 1.json (task ID)") - print() - print("In B2 Web Interface:") - print(f"1. Go to Buckets → {storage.bucket}") - if storage.prefix: - print(f"2. Open folder: {storage.prefix}") - else: - print(f"2. Look in ROOT (don't go into any folders)") - print("3. Look for: 3.json or 1.json") - print("4. Refresh if not visible") - -if __name__ == '__main__': - try: - show_files() - except Exception as e: - print(f"[ERROR] {e}") - import traceback - traceback.print_exc() - diff --git a/test_b2_upload.py b/test_b2_upload.py deleted file mode 100644 index 014aea5d47a3..000000000000 --- a/test_b2_upload.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Test uploading a file to B2 to verify web interface -""" -import os -import sys -import django - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'label_studio')) -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings.label_studio') -django.setup() - -from io_storages.b2.models import B2ExportStorage - -def test_upload(): - print("=" * 60) - print("TEST: Upload a visible file to B2") - print("=" * 60) - - # Get export storage - storage = B2ExportStorage.objects.first() - - if not storage: - print("[ERROR] No B2 export storage found!") - return - - print(f"[INFO] Using storage: {storage.title}") - print(f" Bucket: {storage.bucket}") - print(f" Prefix: '{storage.prefix}'") - print() - - # Connect to B2 - try: - client, bucket = storage.get_client_and_bucket(validate_connection=False) - print("[OK] Connected to B2") - except Exception as e: - print(f"[ERROR] Failed to connect: {e}") - return - - # Create test file - test_content = """{ - "test": "This is a test file from Label Studio", - "timestamp": "2025-10-12T21:58:00Z", - "message": "If you can see this file in B2 web interface, everything is working!" -}""" - - # Upload with clear name - test_key = "TEST_FILE_VISIBLE.json" - if storage.prefix: - test_key = f"{storage.prefix.rstrip('/')}/{test_key}" - - try: - print(f"[INFO] Uploading test file: {test_key}") - bucket.put_object(Key=test_key, Body=test_content.encode('utf-8')) - print("[SUCCESS] Test file uploaded!") - print() - - # List files to confirm - print("[INFO] Current files in bucket:") - file_count = 0 - for obj in bucket.objects.all(): - file_count += 1 - size_kb = obj.size / 1024 - print(f" {file_count}. {obj.key} ({size_kb:.2f} KB)") - - print() - print("=" * 60) - print("NOW CHECK B2 WEB INTERFACE:") - print("=" * 60) - print(f"1. Go to: api-test-bucket") - print(f"2. Look for: TEST_FILE_VISIBLE.json") - print(f"3. If you see it, your B2 connection works!") - print(f"4. Your annotation files (4, 5) should also be there") - print() - print("If TEST_FILE_VISIBLE.json appears but files 4,5 don't:") - print("- Try hard refresh (Ctrl+F5)") - print("- Wait 30 seconds and refresh again") - print("- Files 4,5 might be there but web UI has display bug") - - except Exception as e: - print(f"[ERROR] Failed to upload: {e}") - -if __name__ == '__main__': - test_upload() From 45eeb601cfc85a459dbbfbd333fa0a93431f696b Mon Sep 17 00:00:00 2001 From: Shahzaib-Hamid Date: Wed, 15 Oct 2025 01:43:38 +0500 Subject: [PATCH 3/4] feat:Added Documentation & Test Cases --- .../storage-providers/backblaze-b2-logo.png | Bin 0 -> 15646 bytes .../providers/{b2.ts => b2.tsx} | 57 +++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png rename web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/{b2.ts => b2.tsx} (71%) diff --git a/web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png b/web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..2e54b21a1ff155020644a9e36f8d0edd97e50dc9 GIT binary patch literal 15646 zcmbVThd-5X*uRe%*%cw95F$!O2q)PhWMr3-mA&_A+IxrWY_hkbQdXhtEoJY$o%eoz z@B0_L=i~FibMEsz&$*xLzOL&#?jSW)d2&()QUCzCqJoSD0C@0EJaCQ({@U^AKLX(1 ztfI_)O^?yl2~USu*`pT6kW4Q4Bm9Qynu(0e8-q7g(s%y_-p0ebc1=!3MkZcXM#hYq z3jT$>%yeb^+RFpmsVmM=vn53Scz8Xg!%QIQ;o5O?%~oOf!VRKxk)HDs&1nU>);Pxy zm;GaxM`<+^$kqSfub=r<7PWDyq2!tD0Q4qiU%ikOj14EvLjH>ku1N5inK&BRu0Amp z%RvFmyUT0LF9LDOQu2Fa$@7mH0I-xf+vCp-MqvVf%r4VCzJ?s}u`u>NF+LIszK{N1 zXkVNz1J4DHi*!Vcn1LWU-j(x{PgL>&NO!7Q)U``UK>0BeWcL`Cy^BTx^lUWIqrkHX zFr9F}Wjtq@iyi>Yq6K65YbBSF&;I^k+!*y6j>};PRvufbC)9h-NHA>tr2Hif9z6Ks z5w@}_3Dv@%iyzsd!wgqKg@l*%-EJHvdOCtPl*J8OG>rwmg8DPAk4%evcanD zA#yOTL+|h`vycLu{Vr9XU$fykXglkOy$HY!jU45b!-iy3G7EvrW<{LO_5_#Gd3Y8@ zvU5_t#VB+iYYSV8o}*+le-0tQD4CUb*l+F<0W2BbNpaO4nTQe@9Gq7+e6O^Y;Xv=t zfxWILqY^nhrm#a9VfzQ9tFH^SJPbmCbi6%L<-r;k!1^2&EzK#TZVdpJ!8by~G{z@H zmJ9?>SW>H!qwxXy57|E1uZb3rZbdcyjgwO%2X`vsWYZ`z2v9&Rz5Yi^cs<`)but5i3;8GQ(`0Pl{YHcDnpA!L< zHv9%kLG@Sg@@k`@2_jP~0R4hgkT630J~tjnzr`_iX3=_+47?{oSyj;4r?bOzMjm~c zaTD=iL07!SoIKi>(RdF(9ay5DK1)jsC;HB_ZzIZ*vDe{Mo~7=bl;|^oKq|WP1M_r~ zszLCaV8$eE@G=Oz6_MzZNJ9wFC~J*X=HgG3(>14Haa*S-Lla&EqkRR510@P($}ISQ zq^K!YeO}KI^yV$E*pyaT)T@^R(4Ocz=wKRYRR-K>F>;pKV6A57+K1;saK;>P3EA z$0dNiLQdwfAjm-uFrjAH3iA&Y#K44`_3wZX50@1JLy`v!pXrcm_Ac-6X5SWAck+U4Hu>?Tv6P4hfkLTAo2Ve}%uUQnT zDlMvZ;Dc#tSNf4umk+0m94UmB4`Gz${6hhUjH6eXW4jZtA8Js5Rt@xCgZ{h*Oj@;( z&GMhrXOK8fZ?4V0nL`OMA(tB;knzDa_g-FA^x+~rt=%9YX6hx33G@Se zKyB5-H~vrs0M=R1^41&pVXuC=!Iwc-1(S>>KF~?UtGyP@;hda98(%qOSgF>gVf8WPl-tC`|l*F#ywMyLE)CyGHm7vjTvs0+`>nz|V&dF(q&C z!=I1a=OX*y)44sJ(bo@GqpGH@(jxQv99tm@lSs%V;pfCvg%2<>nrB7pY!%&DZkjbl zzo=0ZXi@BJjv$Z!3s)L{%f1xh;|K8i_s?>@ubGPhOToRr8LaRUU&^M#2Ar6H-~>gU z@`~b36zJ{b{S!rOCJv_E&~O0TFR6Rl-S{9?9#h44CznkK#y5`@SD~fp!tHjhd6+(T z2Rria1}jd`ZswPa?68KSWO>*%IMDX^^!_(=Gk5n;)bP?81O;*rGJezw&Bt+ zgDE~>k&p|6S0pWpdA_RA38RgqNcj4G`XGF=vLwFpA0Gbz)BL9l6_dlC0`=gNqz$Ta zg|SEmpJzXOVl~rSL}*!Z&?*W^&Yo;?TFLj~ghoDKfYDFf#PGa4{2o zv2Y?3=tWHVt4IL)JjdJ62<@+cwYef9b(8?46E1C&jxN^|J<+-i4q#QX)q5viMG9J6 z|M_i<@W#2Esi+@(!*XgA0$>o}Ih<{3%>af!Hr2-I5rSz3Yg-C;4&$>vgN5~T0*@=; zmGN~}DEZubM>Dq?g}F5dFQtSlIxH0%STw-GoBj?3YNe|>ejCCS`FI(yn!{w2Zw}8EDQxpguFCD~8Vx?1 zNgh1nmn(_ga7zuqYW>6qrc+V#c~7sIS%Y?;maDqaFWS!v4nzkF5p(#-`KD^y{@M=F^55f%NZ3jbHYxi`y7N zem-8mWGU!$W=2Z%CO|2(VO9&+8R84&B=7r_I1QHd0E?*D9Zw(S`}wncq@ zJ_S>0W8KnFx3SFc$RPE~7obIG-9d6y?J&t1u}SIx9c|4W4@v=pRr?FBN?k<2Kx?jiS6T3I+1P+^oNbuPPnM^+dr6>NJgCdJ zWVdG4cPE0r(hi_23NHMZwD#Ya*UM-%?hqOjjCcccUCDNh=viUQOLBm|rudpu#-?i9 z-iu-wu6XrS?8wrV^-1aTrc`H3Zcq>>o;-OK0UMmINuo@yYktBv-OsKG7MH=RmLyAn<+PwbawEDB-kVzme` zQ~h*tm2l#Odk=9ma#EgY;MP&7q;u9CAtmtv03y*&9{*5cXE@ot`iS|?s*)dUxmTK# z%-Z)3=M)bp7%4`}YUKT3xpa!9tOnwQ;)d z`}3V^`esl-g=WngvwimSAx&6k*{``5+ex!g&7ss3PU>x|6NEE0Jy0lONa zAciZPI2-CQvMqN>P&N#j57hs=uH3^3lui>Sa_t|PNIR)-xIs;M16PH}nD71bmnw($FPmk*#-81-E61f!IGV#)h_A6KR`K`u=zF>< zzkNUh3p7J;@Yt_Ip83QS^YJ_0shc%g^baR}$U%BiH#Xbwi-gFYc4NH%>Ac^kuQ|y$ zr^yMiA$s{cu7=0@%OSyIo3ikZ`r#T=UJw!t4{v%af}5cJ_d6MPquXf9aj=Q(U4sWW z2LvRw#Qg|Xg|l3Y@lmF#7Qw2EidI5Sc(TUON4*^>8`|)t46tRRt&J` zdes$lYh3>^{`ZGIz4M@b>emDmxS^f;bWhl0y9+1BX0`FgQfELHmXH)b4(D$X^Y{m1 z5Ek6%He4!P@8*yasW>dXH}@i4`MJ#%-Bi8p^0dNJ-(6U552PVdP{M|g*sg|{R?-ly zQ@t)+`ssmdKd(XN@ZX!kdwvq(I;VEc&KGso>N6(=5}F!hg1!2@$!~4P0ZixJo;Y;FQaB8JJbMZ*%f}?5%j#j%^%pPSMevGmb%MX@hu@qqoaJs z&1+d6c27DbD6}@e8~KQW^ryRrZ)=iYeNV~W*0yU)+-k4z&N&l5bOPwltpm;9o7sAP zIo7Bq65YGP=E@ZBxBnpTiQ7{E+l>)^9BkX)eaJ9a9Scr1*&I@TrZJTDr-Z5_&1?{+ zhz&v%M%uB#m?te{otpQs%Y$Bd=%(wp<}exD#*=1Npk_ADVrKHycv)^wKbcm+FoG}f z2zx?0t!acJ036f}r~Fd$%JWm5bhyAFnw%nK3+#@2@h3@13r^z;fGcO4|E7J+0_<=p z#z>fEOe@?KbrZCRVrqDZ9rxyq$J*%#AzWY(Tq8)Wau&wLrJisrbk++}`tBt1;O2hh ztn-~qxPF14>DOS@+(LF8zg*)M0wN z1DER9-6ZzcNhv2x-nkp*C`_n1L~OTHvCe6&wp@I1E0g|ohDY1;?qJXNt2pdYb!3g1 zm>6Tb9J0*h)Y?wH2$^*&w-=Xr($~tV--~d z=qm1=?D&a2C+RtpS8(6Mc1HfY*=C;FcoUb{oj?ruV)y@aClwtl-#hh%nOtL_6hULN zPl$ARM0jVG;peTy7zRgnhrU>Ix1N%c%W|w{7ns6a#k=pC-J@R}&rbym*pW`%rSrGk z)D;s6I8|%)~fA^cE zLFAL!!`fyi4{BBCJ{&V@P|y2avgR)DJj%!fePGor10pZWpBeNo?nXow_3<3eHK;o= zgTRoW0~!=sdaq7-==SdV`*u%@u6y}QBS#BG69W8{#KvquA$!zv?Q@C$km1>5>f?$u zP1j-wY`sl#UmC#@N8)|2Q}v|%H5yF6sDlmE^QEb` zHMp>(=0^qf-Nzf-NlIKX<0|6Mx6JV{7lJG-F5TiNT59+6-uB5~8oTH6`!+{x&}u$i z9i$sl6b7+^R(xdc0SpIXCo!TItUDBfWkP~@ICR6?c4UGm*sy)1mz@wE7O z@lZdfz!Zm|-jcSiuWSaFLhQZw|w9HC3eOf@-*HXrr zQ*gY+!cV|e78)%t;TXtI2kg}6riS=({>J^fUK40_3abEB)Z@%4q_hXebmM+DlpzziZfs5%jk>K`DcH8O{3tbM{l5V|3TDj z?9soe>TN>&78L>5JTJJ+UdmC=mCh-^g$@n!KDHKJT)$#DbDjv3%=$swd>(sc9Vo=E zP+&r33w5{;KeIcA-8L2g$>@OhQ;$N6ZXRG@hf!23->PhJDZfk=QjX)@hILtIu-zy5{F}Z zG6m`5SME`hfuUb&n!i=YGvzFJhB$zfVgd-JWTt%lppG8K@P;dXi;%(OiM$2RRKgmA z4TMVhqYq{e51MaqiwDH+a{|mvB1@vrn{e&9HV~u1E>83$A$92f&D{GmYfRsbU}Xuv z%a;R5ncSE-cg^qg-ijl9I}>;}t&<3;lo@;$fy3JrSC*cf*Kz)_S5EzEV^^>J;(sVa zC%xE2lz8|XHQgz*j+6!Mv#9-F-SisOA^(We?&W1}Ih!@rIbJmu2L{^7a}M)!Qs!@b zyQhYfM}%M8F`6tY&x+ONj?lov6qz@7S74P$wgcNJ;hS4hTHg$Tf8f7QoRnJw_?Y2^ zS;)D~7lbN6YNj&QHtWXcmEw9Qm+dmTlWpUegDsTh`@1^bJJ{&SI!a-rVU8;BMf%^q zDbe2SdZ!(RrzJi7jj0g+RX9zW?_+;w%6;j=*gf9t>y3v|*a3hbPliQaxT=Hd`~rK4N$CUjClmX(D; zIyc7+&lOo`M%1x#1NEoD_j4J}I_qe4g9`>}`n?}Q|HGbAO=%QbNsotSmY#Eu1)qhp zYNcINBu%A~l>_PYGYOQb`O^ES7cjCrtD^<})Ja&gXzH>gH-!`d^zIVugIbR`Q}YGb z%*R8UG^!K67#?$4J$wFb^EabSI|b0Zx&O7t;&;@I1En!Ozv_20(bXDN2?*84P{;oz z3SrgE&VhnBfoF5fVjAZ1zjUU;qso8g`LeXE6Qev?{3K&|QRui($t=&)M~m*G5h>Re z8^9i2{EOb-a$P5@#;P2Tf!ZZULhD9-sX<&E^O|2ZL*c}O0yj95vQlA&jH~SknMse2 zv5vhG%pgl#5Z-REw?@M~7G7e5(Xx@}tI)PJP#6U1r1Z*zd|BI?@|V2`7;}8Cw9{vIos^UH5R-t?*#hAAhco!^~3HlcWzmvS8%|UzgXYp2axwbt!xvyq<#ggLHzggVdJed`xO$&uzP1^;5 zSx$GgNhZSL{f7?sLE$3>_nSAJ)9#x&%GVb-(%K%wnkG<4HR=E_B*fBd~&;XTue!)@TPf< z2jx9KnS#9>tW9K;0QQjMWwd>!d{hp&YeDq1^)DHy+>=g)pjC7E$A2OJ!yalINR~9a z0aTyh1)7ndUtb+?Y#H6*jYsJ{ql%RD|He>$38QW9d9ZlO{$%y--#8H+#ewfMcgcHi zyavmE`B&IYQI#R2%(@~Ud3IpF)oa2jff9NkJPPcOhFe-J9M%RIHfw4RM z_E*cSZ~I))s481({m0CUUuncnB-h$M`_G-#8&%5@PROGZqgi!mQ@+!XsFpvP)GU81 z7p1pdg7JR0P|E{aZbGd^@==YoVD|Wv=k&)vBs&}9)=4+ceHzTp_Kf}p#$!(&VME!I ztao(G(x~BbwBV3`M3MU=^-j!V(`v7%4lw@dw<%{pv`p2Qr@}xtlEz$^&$K(oMR%V- z=A3ebtc9Fg%2yIYP%sgmb)$P&xu80JfrVK)N!&`bWuiyhX{XgefB{2WD8TO7=Dx=f zWG%Q=bLRYB(>Cn%dbht+`oZ+(t#SrTyvon#&)qNTe2qp|(Q{vTz83YOntSdAYa1$6 z>Kxaox8;>2a&!oyi?MoI?-*jlU#n(?=4X)e7Qwy0g66R zTvhaPX}>J8U)Wex(As8?8#?T6H+`L1a^CWgrD|c}i13@CQq)L(hDb$N^GngG1YF~A z8fMqt-y*+!p^x>MA-)y&K`A6i~_E?(hJ9j}e+@ zrF+m$q3*H458=yR@0PgShipk>z5a57qzm)!6vIbDGfOFyy_{35riWu<8*GSCgyPuu z4+rsk*}2Vp1cTI*|EoW6zL&Fl)<2Hdu>s7(;rZs~@a(*w3f^mR@KIEa@7MHv95>&f zmDy*#kVM&H8#n*=xSSH5jHpn2++%u&{33W7djg4swT;}41 zsxV>J9josN&=Z&ypFKs5Q1;&Ks)gN5AVVX$LwhS1_ZUwvXrAPV+DOY6YET1gJ_>ZXy^bbpHO61^n+jjc|kTfn zkM4+go%8caZBlu{pQoSpKZTgxM}OUsc%_>*@Ec5rvsU!m@;V+}V#LM1__27jX-`@au8M#?R zTd*V6v@mO`vU=IS%I!Z!7Zuy|iZ5ho#=a0T(P-owBKVd4!LICP#=4y1L_DlXchkys zwC@^uI$uKO-#R8oybW)Fu1dQhpx-vgmxFYi%wr0_@!~m4-MQuE_)izpGi@-i7iNS) zx6(v7Ocr~O&kV>2(9g)+DTO1+w46wKp%7V{Nlx!g*`RR9Dzz~v6nSCSkHz*0 zaARcRwB;WgAHB@vC6nxvOLt9OEItLn$TC1Q`|^wFC;tmH$5atLt!xK<$g{1AiT;YC zzYpWLTiEwAZFFXNjOQ%J1N;23pR+%+W3kd~wu6O?9e$YaNK359aPR8-?7ZHJ3CI|i zSc-78HP3TDmBliP@`kSvPgmE6uBbw4NsJ~7j|IiB;SkC*`tW;nO@jIvX?S<@UT4Vf zo-3%xt5IaFrHav6<=57;{C_yedQ&q}a{IqsXVTsN<0HU=G5+X^mAL&%`Bkjz#|EVP z;u#Gt>j^v`E0rq`@)SPLAAfWZK+_`$=@*Bx+{`&{eA(G`mPK02e12X#`9b`RIGWX~2hUM|lY@Po1Skl+2e>ANN1Z78V!e-+9%Tx7JP1v!@DATWh1~p-EwAavbyL#ew-z-4Qgh`MW8`nYtqxzv z{L+;z>@WFxg(zKCK;G=oQ z%4#@opLRM*q%-J=Q1(mr-!h`S-z~LU0!S)mRk+}O59(Lcsdj_j2;cSkF#noUFHQp2jL8^~ zV{Y4Ci+LEhy|02Tg8CAoU+S*mCXZgtjP)VR{fW0?UU72DorF^3m8B!0J*Ey4s!Q^& zCQg*)e5@Bfrr+lDKPFuGV}CP3v@kGBY^EF2dPbeAOKX|hJ#{p^Vpvkrcf-)+(bD4y zb%sk9lm-dS(ld%0f5wsM_|Kyst2h-Tm^{C==g$}gRZAwOwx8~2A77e2>dfG~X`1W2 zXFPkA8L(~R38*o+-l!azsGq$0?p?Hi?8Io?@np5=(c1m%M0g^Dhm5CROw8|Go?XE% zvDreNy1fxY2C1rMZCiOar^yk4M?+QExR>bjeuu;ze-m&}^dHe%lEEKRru6zpkVF@W z`kMOrqm3&SNmGEa=F-&lwdf&tmiEvVttDaeFFz!l3iy{4**I08odr4b!g#6o;y+g?m}2jtK+xyFfwIHw(&T9pW*38=l*uk@-+o{E2VVmE~)u9i!$hp`-6# zYK#(@qFt4mgP2{jSW3A!{v_6~G*VdKAaHDe}YFZH9dG5%+R)@$-k%-7Zp=fbx zsq|slS0iO}ZO5z4j|L0E&fhfsFqyl|@HFmoddpS5o-!${hbdxYQM#eL2`;5(2xmdv zNCe(Uh79)8@ZbWbe8}!Y?yS>ACi~CfO^;jgWoIS{Zr`%>>K^==>v|zKIl}P7WN?bw zjF$28dAHRx_7B}!CP_g_m%>l2L^{u$#omyS)kY#1RB!caQc%z8M;V;_Jpv3Aa!RbGVGB{$>Fv+Z*%g1427LER`Z$DDu#w>Q|%W&xNpJ@Va zN0M!?;tIE`eW`WNzK)=URFZ>V(lBMXWjXYG-ZUd=$x-xpVsrhOyvb9Uhbq?f#2g_u z)XH2ZM?-G^d>d`VjPa(WXG;7n47Rz$2h{5=!#&K)tvuhTs`W)xoW+RxkYjj=!Q@pU z3)_b=%Q<%(iV+Lh0thOKW2D^_0({x))Gu7-)^=pD#HNL0ij%Q|Z(jB6t2|puqq@2_ zIvoNGL_y>XQ8g?F|G8R5YkgJ)m{@-$mRauJXg-Ht#m972piC0HU0)@QfH0*rtPeT% z44P#yWP-7=HLjn}zX;z+n0)eJ(Wa>Q5~OE52{x9~ptND>Kr|ik==bCu8J+7DoK}DC z?a>*Hp>yS-Z{;#L5vEe0@ZNeFHnJqElBeT)|JUk~xaVrbfvFjvZ-DQ8j50CkmLTjc zci68GH8QHIm#j&b5XzL)<(V>;36M9SAF+{4=@~-RMqXG))oz}D13d_`7QfW=Cf~hR zr*y2k&BW|z{?Be)GNcsW0a{#0L{O8~2uPS7dMOsmEZ*^2N~Yo}bI;6ql!3x`%JRE3 zUgTka>($IFRFbiA-QkveN)$K?0^@Vjor@#K@&=KfnGS)u_IDDBjiqh5BRuL8kuj}2 zNj_JR`EP!vq?>lBuRR=*KZl?em@i8B5XW-rcefAMI*=(Q8rLU<;YoVcma6u2%YmKz z1C=4NKCIQ6f6w34SC*F-WVDkEt6EI5^Wngp*X5MJDt``Yy{dW}LB!G4Yc!EeWdX0d z!)r&6SrMTLDqeiCWgb6Ch1f3|TCjMV-~v?XPW~2lxQ^5@<>?L~9bj+WaoO1;L0psD zX3ZuXACHh=1GZPcdxg{(z3)G%kNI|t@PAZt&shIwEZNEd zPM3klfxe>BPvd*B3;R!tkk0mS44a}i-@eqZrZE~%>uigdv06p#N2wgE@*4Bjexg4SgT?<@uE$`lLl3qs0qS~|uv=S)?8|dIi3?qvl_WFJ z@q?7oZ^Ys6Vq#OUi)ty-RFsSydl7eSNh0n&^Ec6}KZmDa+5!?d*^-qHP^ zmv0xUxh*|Z`&L)moO!1h`UZ~74>KN8cTt*i`jLI?4k~>6i=Q{X=zP13>#LDudHst{ zyPFfQ$+=C_MmV3-nTe;Lh}{USzg9xKxSddXbs0|p3T2+JQIC-{15Imow)u#>sU`*E z6OkFr4A62rhWK|$(p#w7R;`Pzn77`e?sk&_$AOnNhR=l=pQA(->2WK}(J|7*D0hOS z+5UK?EY-If{4(1*&f7$QRa|u4S0I2OXWUq8aXY>^PFcU;9?MXI*wy#v%lzNIINu&x zoA}xnbV{1DCR8JAn_?=}PucEwwiO-K7&5CZ6(IBz?}zc#kck0gCG->jGGHuLRM}_k z!xJzgWp;nXp7U=b>Kh@?OuPtFvQiLF4{qOc8c&xtGJl6Ip_D_N`RMHJ82;gOMR-jJF7owsa)cnjMk8);T-R+uiBJcTjO+!^b;*WN5 zOz)UK%ZXNUjNLh3t;W)7zq(a&|NA==qu&H(H(_C4!B;yFKqDec;h6g8Zr_;v^D{&L zNvR{3BI{P%CEv?`(Qq?Ed^>Q!?Ars$O*={WH3YeCie8a;wL;pS;!z*}7tz8RE4Lclf<_?q${tCMnMCB!MW@|KmBK=Qpnkp@s#w5p40}L z0}go$X%2o3j^3wxiq#IJW@?t)m@V#?GbgS4UfdGgc#K{%5)~H9lfQz z#~EfC=!}Q>%;8s~xII`}8fR^0d&5Ja{~}AW(tVqS{-W9^#EWt*iQ9v8?<}g^!}sRj zDxt1C%CO?owv=b9ENbN+EesnDMieYb$=!|hVR@Kca??zJ_O$uWIcPnS$GwR$W{=S1 zT>!uB1A9KV2&p6v%k5+-9}iQS1`ESE2+&|&gp>JvCAjjzZ3cY21SI07IfLrT8%Z;gnp*xzK0X%uA z%4^F|6pxyYWz1=NL>|}1iz%b4P`n|z#~I^BlzvI;tZ4bcjP`~A8n*Ful{wkHLF)ZM zM`Hd+2t{(E;)31h1u-TpW{WcxF<*-@(uU7f<;&DVz0`m8<0Kxqm?@yY)m}i4s8_Bh z-*&Q6uFjpe&r$6csNJd2N@@D{;2nWx5n@q`M_u$07B};$G#J%*0QF&C$ffLNp5)_F z+-tfy#PJv+uL$q5Dn6>~sOQsX&PBaJ=mLRs&4YSe-*!#(B~%!f#L=k(O`6kJN%uuD z(JIca#N2&#{?3&XmDkLo_vHu#|3NOvwpG`dCfzCRT>NA20#`HF-&40+9!_wrH%g6A z^``$A8oGIvu2{#ms|l#}(Nx87LhHVw=xF&y*f?}#J;xq;UOKebCe-0g@oU*KWA zKi#*p?L?&i%LH6c+x%6o|QS-Jf2C&X+B1W6Lq@$#14K~l$*of=;9YP_ABG|q(xu( z8l#_l0||b7WzVXZR?J4`a?GP-)OUw$O}^{)>$v?)a$bU2pvdLKH=;h zSJ~2Zs68C!;b-vbs`8!nt#l@O4d&g?7Ws>xJP^irc+l!vG83#Do#({%XY!e&lbz{X zW9BP8-_i3fCo|8n!D)O)2>E)Ek4VE$J`!mGL~F{*W26Wd8wktC`>ofd4~F}mH`Pb2 z5KhPU!+!o@-wl5%wMz-T%H69)(E6HHw%+bsS*2e%kE9oyAKLr^bQtX@M3-XMd7DPM zKx=8mb<>7zzdBybbi*p{>41 z>(tmS#EAvPx}NbKO7v%JJ+IXVF-m`7f&y#Nr=8ELDti#kFlXwN$~vvWtf>rgdC`0q zY7Yesdt+eIc-rlq_TxXMKHnD<%pkCjmoRUI?+|7(a`Rc+{z2_;60|w$w=+z1e7EvA zDS|>=^zv85T7yV8aY<)GIm;n%zY5||jB{9pYCFv-lfg(qHL|m!xU2i|>90GtmDQ51 z_qjy3PB$7O+*NuI@$eY#EkQPm3?SwX?rYa2Ys6i*&Wqdj8I8X`o7t+;<(bq^!Mg5O z{)R=Rs@lcfZv?-$sM9&&NZ@?>m$uhyWWrj>8ufi&puyDKd&_=|o*?fQ44hfcI$sLl zKt4mqmikd_pD-}&MWq8&h?TM)1wgiS@;+ww*Svclv|Ijk;(b#0{yxTw7-cf=Guv|{ zS?K~=(%4~NrecvllRzMGQj944@(_uf+}ARY<~O91Voc5HSMxTls7*B3=g2@X^y z2mMt|xzYIYgpeydFF7Uh7C9Vd{$t0N3{TL>dwQDWSEUDNvG)*uBK{gsCv^?-XAD65 zBeGYZ+Yf}LUR#`G8NuqnM&Y?S~lRZgSYzz& z06*8MH_8}&<E%N(42;lgVUp9|^}wCcGz>T=jnwIwsJ}u|?9U1r6-WFHt*8RnjdGXQJH%t(V4f&_vo7Dcn z-OX-DJAg55eDgH|t&RA$Ek2q)=} z!|x?Dt^KXDZq!Lf{C)=%MG-$I2T1Wv;_19usL(Qm4QaCJ)j+gELb0|7r7!D4K?Cr&%8x<0-?EU+^ z11X?OeuujUh!6Z>PFIfY5ZrQOuzkO%#dyFo&y9}PW;7sJ(~+;l z{@ywfRWTJ}gyi-ZC+Yh7Xuapj$zE_KKbG0paW3-_?BOduleGOs_F)ZXfArS;FtmFU z;)cXf4x0py0D0zHG00q&!F6nkJ6x=fqW~Lg&IjD~}Ty z-nkk7R4j+dsGk5nP-*wG>BB1<3HtZQvT0j{GT9|G6rdj~L8sr1Zl699D!nkIv~>^w z!Tjr@16_hoACf8Ky&04q=k8TA_#Mv+(0#w3H#R2S2N_A-Qn#+X3-JTf<*)pi6ECjM zeQgHP12$(Wso2mGVIrW5_}Dq1%G~w>WPI;V6Y>YO6q{5Jp;_WN{<#5{vyNwvJ;#ZG zp7g!z?2};@!WcELdHxS1b0I_+#(&7xFiKMDU_>`Uw&G3mK(kUR`N3jlnn!Wi2#~(b ze+k0#ORExs;iYsy?>=JOUV`=`ms@XAzQVl^I`C0b8uNn6Ok~3tB7Z&;#zfAl;$U{3 z*;Q`V1P-I+Ej@nk8i*M$pQCbGJn6&j9f>tR1d$4#OHO0>>}kwc(Y4Q;rl90{;;MNW zn&@gVh?_yI=Zsd2UAoiwRBr0i9c1gyAs(vbEkm%<=*UO@VVd=|wzB(QIvP$+Um(f& zhYm3ixW=qk*5haUy2J)>5}o5TSQMA67rR|YY|H@KHB;FNV-Q(;5!R+(j?^_Lg;2QS zu$X9bJ}d`lZv2-}i_k3;<`W!;Vh9_X-JA|~_D}Bq(32M*d{jU>+9U#fR?UA6sk7H| zi2~zYPE7Xl{yFTNYi0&&L0f(E_|ec~c#Del{qlkAzoIFz0DeNCSs+=O?)%-wp1<$Z z$qDlrQ2t;0!;cRHvv&^iGZ4CY>8@N}hpF#O@M8VSIE4=hNFUIB35#M<*Q8XYE^*<@s)RqRC_9Gf2k3U;fAZzk>CJKl=Y?NDoE_N-rzIb1wk^&%sobs1n!p({6 zCLQRtZY8`u)BfaluYAPq%aJ{$zGW|BcE7)qMOv&%4S;zJt>IPNk{-Og2S?Q=vV2$E(}BuFA+}zseiXRTh|hF63;zw-%wY zLUi$bYf!Y$NlFghuCLiBBYRztB+=S*LY^~gYbgrAKt2@k=l#pQ-}~;(kk9>zVEQ#h z|Khjjf&SkKA3;a9v;*%LxVhvZ+ytT0k&&lfc@AKnBSdqK{3EiSQ|Q3lgC#`)+NI)p zZ7fqfiusx=O7SA*4uymLy`|HL`4D@yJyH(n)+7W~4H{)^@qq&ql0XV)XuOaemDG=z z#A%C-%#mG<2>Ti8NBLcN^^U{7^Uu;lyO&{*^*6kz%!s%@dLjC$Z-mlopa|WKzcgM0 zv;Q2|B?0JGGk!8&0(TxLj%%@oe~(KQyP#RQzH#{%wMFr0Z7%}Bks4+e;^UaSTQ<_5 z!R+Z)YQA#kSSG;N{K@x+Q=ERq`kdHLcc9~0cylVKE3$lYy!O`mUw>78Ne98kYkRMZ rp$i~jIE9mW@L3bw;`RT3b?@LzPx(vT?+?)+CsCADl_`;a`s#lGQL5*_ literal 0 HcmV?d00001 diff --git a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx similarity index 71% rename from web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts rename to web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx index ceb7d858d595..a08ac95e3923 100644 --- a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts +++ b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx @@ -1,6 +1,59 @@ import { z } from "zod"; import type { ProviderConfig } from "@humansignal/app-common/blocks/StorageProviderForm/types/provider"; -import { IconCloudCustom } from "@humansignal/icons"; +import React from "react"; + +/** + * Backblaze B2 Logo Component + * + * To use the official Backblaze logo: + * 1. Download the logo from: https://www.backblaze.com/partners/resources + * 2. Save it as: web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png + * 3. Rebuild the frontend: cd web && yarn build + * + * The logo will automatically be used instead of the fallback icon. + */ +const IconBackblazeB2: React.FC> = (props) => { + // Try to use the official logo if available, otherwise use fallback SVG + const logoPath = "/static/images/storage-providers/backblaze-b2-logo.png"; + const [useImage, setUseImage] = React.useState(true); + + return useImage ? ( + Backblaze B2 Cloud Storage setUseImage(false)} + style={{ objectFit: "contain" }} + {...props} + /> + ) : ( + // Fallback icon with Backblaze brand color (#D9272E) + + + + B2 + + + ); +}; /** * Backblaze B2 Cloud Storage Provider Configuration @@ -12,7 +65,7 @@ export const b2Provider: ProviderConfig = { name: "b2", title: "Backblaze B2", description: "Configure your Backblaze B2 Cloud Storage connection with S3-compatible settings", - icon: IconCloudCustom, // Using cloud icon - can be replaced with custom B2 icon if created + icon: IconBackblazeB2, // Backblaze B2 branded icon with official Backblaze red color fields: [ { name: "bucket", From 82622a44604bcf05002646287dacefab51baa8fd Mon Sep 17 00:00:00 2001 From: Jeronimo De Leon Date: Tue, 21 Oct 2025 14:41:08 -0400 Subject: [PATCH 4/4] feat: Add Backblaze B2 Cloud Storage integration Add complete support for Backblaze B2 Cloud Storage as a storage backend, providing S3-compatible import/export functionality for Label Studio. This implementation includes: Backend Changes: - B2 storage models with import/export support (models.py) - S3-compatible boto3 integration with B2-specific endpoints - Serializers with enhanced validation and error messages - Database migrations for B2 storage models - Comprehensive test coverage for B2 operations - Client connection caching for improved performance (~100ms per request) Frontend Changes: - B2 provider configuration UI with field validation - Backblaze cloud provider SVG icon (follows existing icon patterns) - Form fields for B2 Application Keys and endpoint configuration - Presigned URL settings and recursive scan options Implementation Details: - Uses boto3 with custom B2 S3-compatible endpoints - Supports both import and export storage operations - Implements presigned URLs for direct browser access - Follows established patterns from S3 and Azure integrations - Includes proper error handling and connection validation Testing: - Unit tests for B2 models and utilities - Connection validation tests - Import/export functionality verification Breaking Changes: None --- docs/PRD_Backblaze_B2_Integration.md | 518 ------------------ label_studio/io_storages/b2/models.py | 11 +- label_studio/io_storages/b2/serializers.py | 2 +- label_studio/io_storages/base_models.py | 4 +- ..._alter_b2exportstorage_project_and_more.py | 54 ++ label_studio/tests/io_storages/b2/__init__.py | 1 + .../tests/io_storages/b2/test_models.py | 1 - .../storage-providers/backblaze-b2-logo.png | Bin 15646 -> 0 bytes .../Settings/StorageSettings/providers/b2.tsx | 57 +- .../assets/icons/cloud-provider-backblaze.svg | 5 + web/libs/ui/src/assets/icons/index.ts | 1 + 11 files changed, 71 insertions(+), 583 deletions(-) delete mode 100644 docs/PRD_Backblaze_B2_Integration.md create mode 100644 label_studio/io_storages/migrations/0023_alter_b2exportstorage_project_and_more.py create mode 100644 label_studio/tests/io_storages/b2/__init__.py delete mode 100644 web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png create mode 100644 web/libs/ui/src/assets/icons/cloud-provider-backblaze.svg diff --git a/docs/PRD_Backblaze_B2_Integration.md b/docs/PRD_Backblaze_B2_Integration.md deleted file mode 100644 index 25611cbeb901..000000000000 --- a/docs/PRD_Backblaze_B2_Integration.md +++ /dev/null @@ -1,518 +0,0 @@ -# Product Requirements Document: Backblaze B2 Cloud Storage Integration - -## Document Information -- **Feature Name**: Backblaze B2 Cloud Storage Integration -- **Type**: New Storage Backend -- **Created**: October 2025 - ---- - -## Executive Summary - -This PRD describes the integration of Backblaze B2 Cloud Storage as a new storage backend for Label Studio. This feature enables users to connect their Backblaze B2 buckets for both source storage (importing tasks) and target storage (exporting annotations), providing a cost-effective alternative to AWS S3, Google Cloud Storage, and Azure Blob Storage. - ---- - -## Problem Statement - -### Current State -Label Studio currently supports several cloud storage providers (AWS S3, Google Cloud Storage, Azure Blob Storage, Redis, and local storage). However, users looking for cost-effective cloud storage with S3-compatible APIs have limited options, especially those concerned about: - -1. **Egress fees**: Major cloud providers charge significant fees for data transfer out of their storage -2. **Unpredictable pricing**: Complex pricing models with multiple factors (storage class, retrieval, operations) -3. **Vendor lock-in**: Limited alternatives force users to accept less favorable terms -4. **Data sovereignty**: Need for specific geographic data storage requirements - -### Business Impact -- Users seeking cost-effective storage solutions may choose competitors that support Backblaze B2 -- Enterprise customers with large datasets face high egress costs with current providers -- Organizations in specific regions need compliant storage options - -### User Pain Points -- "AWS S3 egress fees are too high for our annotation export volumes" -- "We need an affordable S3-compatible storage option" -- "Our data governance requires specific regional storage, and major providers don't meet our needs" - ---- - -## Solution Overview - -Integrate Backblaze B2 as a fully-featured storage backend that: - -1. **Leverages S3 Compatibility**: Uses B2's S3-compatible API for seamless integration -2. **Supports Both Modes**: Functions as both source storage (import) and target storage (export) -3. **Maintains Parity**: Provides feature parity with existing S3 integration -4. **Offers Cost Benefits**: Enables users to reduce storage costs with Backblaze's competitive pricing - -### Key Benefits -- **Cost Reduction**: 20-25% lower storage costs with no egress fees -- **S3 Compatibility**: Familiar API and workflows for users migrating from S3 -- **Geographic Options**: Multiple regions including US West, US East, and EU -- **Predictable Pricing**: Simple pricing model without hidden fees - ---- - -## User Stories - -### Epic 1: Basic Storage Connection - -#### Story 1.1: Configure Source Storage -**As a** data scientist -**I want to** connect my Backblaze B2 bucket as source storage -**So that** I can import labeling tasks from my B2 bucket into Label Studio - -**Acceptance Criteria:** -- [ ] User can select "Backblaze B2" from storage type dropdown -- [ ] User can enter B2 endpoint URL, Application Key ID, and Application Key -- [ ] User can specify bucket name and optional prefix -- [ ] User can configure presigned URL settings -- [ ] Connection validation works before saving -- [ ] Tasks sync successfully from B2 bucket - -#### Story 1.2: Configure Target Storage -**As a** ML engineer -**I want to** connect my Backblaze B2 bucket as target storage -**So that** annotations are automatically exported to my B2 bucket - -**Acceptance Criteria:** -- [ ] User can select "Backblaze B2" from target storage dropdown -- [ ] User can configure export prefix for organizing annotations -- [ ] Annotations export automatically on save/update -- [ ] User can enable/disable object deletion sync -- [ ] Export status is visible in UI - -### Epic 2: Advanced Features - -#### Story 2.1: File Filtering and Organization -**As a** project manager -**I want to** filter and organize B2 files using prefixes and regex -**So that** I can import only relevant tasks for my project - -**Acceptance Criteria:** -- [ ] User can specify bucket prefix to limit scope -- [ ] User can use regex to filter file names -- [ ] Recursive scanning works for nested folders -- [ ] File count preview shows before sync - -#### Story 2.2: Secure Media Access -**As a** security-conscious user -**I want to** use presigned URLs or proxy mode for media access -**So that** my B2 data remains secure - -**Acceptance Criteria:** -- [ ] Presigned URLs work with configurable TTL -- [ ] Proxy mode works when presigned URLs are disabled -- [ ] CORS validation helps troubleshoot access issues -- [ ] Access errors provide clear error messages - -### Epic 3: Performance and Reliability - -#### Story 3.1: Connection Reliability -**As a** Label Studio administrator -**I want to** have reliable connections with automatic retries -**So that** temporary network issues don't disrupt labeling - -**Acceptance Criteria:** -- [ ] Configurable connection timeouts -- [ ] Automatic retry with exponential backoff -- [ ] Connection pooling for better performance -- [ ] Clear error messages for connection failures - -#### Story 3.2: Large Dataset Handling -**As a** user with large datasets -**I want to** efficiently sync thousands of files from B2 -**So that** my project setup doesn't take too long - -**Acceptance Criteria:** -- [ ] Pagination handles large file lists -- [ ] Sync progress is visible in UI -- [ ] Background sync doesn't block UI -- [ ] Incremental sync only imports new files - ---- - -## Technical Requirements - -### Functional Requirements - -1. **Storage Backend Implementation** - - Implement `B2ImportStorage` model extending `ImportStorage` - - Implement `B2ExportStorage` model extending `ExportStorage` - - Implement `B2ImportStorageLink` and `B2ExportStorageLink` models - - Use `boto3` library for S3-compatible API access - -2. **Configuration Options** - - `b2_endpoint_url`: S3-compatible endpoint (e.g., `https://s3.us-west-004.backblazeb2.com`) - - `b2_access_key_id`: Backblaze Application Key ID - - `b2_secret_access_key`: Backblaze Application Key (secret) - - `bucket`: Bucket name - - `prefix`: Optional prefix for scoping files - - `region_name`: Optional region specification - - `regex_filter`: Optional file name filter - - `use_blob_urls`: Import method selection (Files vs Tasks) - - `presign`: Enable/disable presigned URLs - - `presign_ttl`: Presigned URL expiration time - - `recursive_scan`: Enable recursive folder scanning - - `can_delete_objects`: Enable deletion sync for target storage - -3. **API Endpoints** - - `GET/POST /api/storages/b2` - List/create import storage - - `GET/PATCH/DELETE /api/storages/b2/{id}` - Manage import storage - - `POST /api/storages/b2/{id}/sync` - Trigger sync - - `POST /api/storages/b2/validate` - Validate connection - - `GET /api/storages/b2/form` - Get form layout - - Mirror endpoints for export storage at `/api/storages/export/b2` - -4. **Frontend Integration** - - Provider configuration in React/TypeScript - - Form fields with validation (Zod schemas) - - Test connection button - - Import/export method selection - - Presigned URL configuration UI - -### Non-Functional Requirements - -1. **Performance** - - Connection timeout: 60 seconds (configurable) - - Read timeout: 60 seconds (configurable) - - Max retries: 3 (configurable) - - Connection pooling: 50 connections - -2. **Security** - - Credentials stored encrypted in database - - No credentials logged or exposed in errors (for untrusted domains) - - Presigned URLs expire after configurable TTL - - Support for trusted domain configuration - -3. **Reliability** - - Graceful error handling with user-friendly messages - - Automatic retry on transient failures - - Connection validation before storage creation - - Signal-based export ensures annotations are saved - -4. **Compatibility** - - Compatible with Label Studio Community and Enterprise - - Works with all B2 regions - - Supports all labeling templates and data types - - Feature parity with S3 storage backend - ---- - -## Acceptance Criteria - -### Core Functionality -- [x] Backblaze B2 appears as storage option in UI -- [x] Users can create source storage connections -- [x] Users can create target storage connections -- [x] Test connection validates credentials -- [x] Tasks sync from B2 source storage -- [x] Annotations export to B2 target storage automatically -- [x] Presigned URLs work for media access -- [x] Proxy mode works when presigned URLs disabled -- [x] File filtering with regex works -- [x] Bucket prefix scoping works -- [x] Deletion sync works for target storage (when enabled) - -### Quality Assurance -- [x] Unit tests cover utility functions -- [x] Integration tests cover storage operations -- [x] Error handling tested for common failure scenarios -- [x] Connection validation prevents invalid configurations -- [x] Performance tested with large datasets (1000+ files) -- [x] Security review completed (no credential leakage) - -### Documentation -- [x] User documentation in `docs/source/guide/storage.md` -- [x] API documentation generated via OpenAPI schema -- [x] Code comments explain B2-specific logic -- [x] README in `label_studio/io_storages/b2/` -- [x] Migration guide from S3 to B2 (if needed) - -### Code Quality -- [x] Code passes all linters (flake8, black, isort) -- [x] Type hints added to all functions -- [x] No hardcoded values (all configurable) -- [x] Follows Label Studio coding conventions -- [x] Commit messages follow `feat:` prefix convention - ---- - -## Test Plan - -### Unit Tests -1. **Utils Testing** (`test_utils.py`) - - Test `catch_and_reraise_from_none` decorator - - Test trusted vs untrusted domain handling - - Test B2 client initialization - - Test URL resolution (presigned vs proxy) - -2. **Model Testing** (`test_models.py`) - - Test storage creation with valid credentials - - Test storage creation fails with invalid credentials - - Test export signal triggers on annotation save - - Test import storage data retrieval - - Test storage validation - -### Integration Tests -1. **End-to-End Import** - - Create B2 import storage - - Upload test files to B2 bucket - - Sync storage - - Verify tasks created - - Verify media accessible - -2. **End-to-End Export** - - Create B2 export storage - - Create annotation - - Verify annotation exported to B2 - - Update annotation - - Verify update exported - - Delete annotation (if deletion sync enabled) - - Verify deletion synced - -3. **Error Scenarios** - - Invalid credentials → Clear error message - - Network timeout → Retry and eventual failure - - Invalid bucket name → Validation error - - CORS misconfiguration → Helpful error message - -### Manual QA Checklist -- [ ] Install fresh Label Studio instance -- [ ] Create Backblaze B2 account and bucket -- [ ] Configure source storage through UI -- [ ] Sync files from B2 -- [ ] Verify tasks appear in project -- [ ] Verify media files load correctly -- [ ] Create annotations -- [ ] Configure target storage -- [ ] Verify annotations export to B2 -- [ ] Test with different regions -- [ ] Test with large datasets (1000+ files) -- [ ] Test prefix filtering -- [ ] Test regex filtering -- [ ] Test presigned URL expiration -- [ ] Test proxy mode -- [ ] Test deletion sync - ---- - -## Success Metrics - -### Adoption Metrics -- **Primary**: Number of B2 storage connections created (target: 100+ in first 3 months) -- **Secondary**: Percentage of projects using B2 storage (target: 5% of active projects) -- **User Feedback**: NPS score from B2 users (target: 8+) - -### Performance Metrics -- **Sync Performance**: Time to sync 1000 files < 60 seconds -- **Export Latency**: Annotation export < 2 seconds -- **Error Rate**: < 1% connection failures -- **Retry Success**: > 95% of retries succeed - -### Business Metrics -- **Cost Savings**: User-reported storage cost reduction (target: 20-30%) -- **Support Tickets**: < 5 support tickets per month for B2 issues -- **Feature Completeness**: 100% feature parity with S3 storage - ---- - -## Risks and Mitigation - -### Risk 1: B2 API Changes -**Risk**: Backblaze changes S3-compatible API -**Impact**: High - Storage connections could break -**Likelihood**: Low - S3 API is stable -**Mitigation**: -- Monitor Backblaze API changelogs -- Maintain version-specific handling if needed -- Add API version checking in connection validation - -### Risk 2: Performance Issues -**Risk**: B2 performance slower than expected -**Impact**: Medium - User experience degraded -**Likelihood**: Low - B2 performance is competitive -**Mitigation**: -- Implement connection pooling -- Add configurable timeouts -- Provide performance tuning documentation - -### Risk 3: Authentication Complexity -**Risk**: B2 Application Keys confuse users -**Impact**: Medium - Support burden increases -**Likelihood**: Medium - New auth model for some users -**Mitigation**: -- Comprehensive documentation with screenshots -- Clear error messages for auth failures -- Link to Backblaze documentation in UI - -### Risk 4: CORS Configuration -**Risk**: Users struggle with CORS setup -**Impact**: Medium - Media files won't load -**Likelihood**: Medium - CORS is complex -**Mitigation**: -- Detailed CORS documentation -- Provide copy-paste CORS rules -- Offer proxy mode as alternative - ---- - -## Future Enhancements - -### Phase 2 Features (Future) -1. **Event Notifications**: Support for B2 event notifications/webhooks for automatic sync -2. **Lifecycle Policies**: Integration with B2 lifecycle rules for cost optimization -3. **Multi-Region**: Automatic region selection based on Label Studio location -4. **Encryption**: Support for B2 server-side encryption -5. **Version Control**: Support for B2 file versioning -6. **Bandwidth Optimization**: Smart caching and CDN integration - -### Integration Opportunities -1. **Backblaze Partner Program**: Explore partnership for co-marketing -2. **Template Marketplace**: B2-specific templates and examples -3. **Migration Tools**: Automated migration from S3/GCS to B2 -4. **Cost Calculator**: Built-in cost comparison tool - ---- - -## Dependencies - -### External Dependencies -- **boto3** (>= 1.26.0): S3-compatible API client -- **botocore** (>= 1.29.0): Low-level SDK for retry/timeout config -- **tldextract**: Domain extraction for trusted domain validation - -### Internal Dependencies -- **Django** (>= 5.1): Web framework -- **DRF** (Django REST Framework): API layer -- **drf-spectacular**: OpenAPI schema generation -- **django-rq**: Async task processing - -### Service Dependencies -- **Backblaze B2**: Cloud storage service -- **Redis** (optional): For async task queue - ---- - -## Rollout Plan - -### Phase 1: Internal Testing (Week 1-2) -- Deploy to staging environment -- Internal QA testing -- Performance benchmarking -- Security audit - -### Phase 2: Beta Testing (Week 3-4) -- Select 5-10 beta users -- Gather feedback -- Fix critical issues -- Update documentation based on feedback - -### Phase 3: General Availability (Week 5) -- Merge to main branch -- Include in next release -- Publish blog post announcement -- Update marketing materials - -### Phase 4: Post-Launch (Week 6+) -- Monitor adoption metrics -- Address support tickets -- Iterate based on user feedback -- Plan Phase 2 features - ---- - -## Support and Maintenance - -### Documentation -- User guide: `docs/source/guide/storage.md` -- API reference: Auto-generated OpenAPI docs -- Code documentation: Inline comments and docstrings -- Troubleshooting: Common issues and solutions - -### Support Channels -- GitHub Issues: Bug reports and feature requests -- Community Forum: User discussions and questions -- Enterprise Support: Direct support for enterprise customers -- Documentation: Self-service troubleshooting - -### Maintenance Plan -- **Monthly**: Review GitHub issues -- **Quarterly**: Update dependencies -- **Annually**: Security audit -- **As Needed**: Backblaze API updates - ---- - -## Appendix - -### A. Related Documentation -- [Backblaze B2 Documentation](https://www.backblaze.com/docs/cloud-storage) -- [Backblaze S3 Compatible API](https://www.backblaze.com/docs/cloud-storage-s3-compatible-api) -- [Label Studio Storage Documentation](https://labelstud.io/guide/storage.html) - -### B. Implementation Files -- Models: `label_studio/io_storages/b2/models.py` -- Serializers: `label_studio/io_storages/b2/serializers.py` -- API Views: `label_studio/io_storages/b2/api.py` -- Utils: `label_studio/io_storages/b2/utils.py` -- Frontend Provider: `web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.ts` -- Tests: `label_studio/tests/io_storages/b2/` - -### C. Configuration Examples - -**Environment Variables**: -```bash -B2_ACCESS_KEY_ID=your_key_id -B2_SECRET_ACCESS_KEY=your_secret_key -B2_ENDPOINT_URL=https://s3.us-west-004.backblazeb2.com -B2_REGION=us-west-004 -B2_CONNECT_TIMEOUT=60 -B2_READ_TIMEOUT=60 -B2_MAX_RETRIES=3 -B2_TRUSTED_STORAGE_DOMAINS=backblazeb2.com,backblaze.com -``` - -**UI Configuration**: -```json -{ - "bucket": "my-label-studio-bucket", - "b2_endpoint_url": "https://s3.us-west-004.backblazeb2.com", - "b2_access_key_id": "***", - "b2_secret_access_key": "***", - "region_name": "us-west-004", - "prefix": "annotations/project1/", - "use_blob_urls": true, - "presign": true, - "presign_ttl": 15, - "recursive_scan": false -} -``` - -### D. Comparison with Other Storage Backends - -| Feature | AWS S3 | B2 | GCS | Azure | -|---------|--------|-----|-----|-------| -| S3-Compatible API | ✅ Native | ✅ Yes | ❌ No | ❌ No | -| Presigned URLs | ✅ | ✅ | ✅ | ✅ | -| Proxy Mode | ✅ | ✅ | ✅ | ✅ | -| Egress Fees | ❌ High | ✅ None | ❌ High | ❌ High | -| Pricing Model | Complex | Simple | Complex | Complex | -| Regions | Global | Limited | Global | Global | -| Cost (per GB/month) | $0.023 | $0.005 | $0.020 | $0.018 | - ---- - -## Approval - -**Product Owner**: _________________ Date: _________ - -**Engineering Lead**: _________________ Date: _________ - -**QA Lead**: _________________ Date: _________ - -**Documentation Lead**: _________________ Date: _________ - ---- - -*End of PRD* - diff --git a/label_studio/io_storages/b2/models.py b/label_studio/io_storages/b2/models.py index c203cc65f86c..8bdbe052a62d 100644 --- a/label_studio/io_storages/b2/models.py +++ b/label_studio/io_storages/b2/models.py @@ -8,13 +8,17 @@ from urllib.parse import urlparse import boto3 -from core.feature_flags import flag_set from core.redis import start_job_async_or_sync from django.conf import settings from django.db import models from django.db.models.signals import post_save, pre_delete from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ +from io_storages.b2.utils import ( + catch_and_reraise_from_none, + get_client_and_resource, + resolve_b2_url, +) from io_storages.base_models import ( ExportStorage, ExportStorageLink, @@ -22,11 +26,6 @@ ImportStorageLink, ProjectStorageMixin, ) -from io_storages.b2.utils import ( - catch_and_reraise_from_none, - get_client_and_resource, - resolve_b2_url, -) from io_storages.utils import StorageObject, load_tasks_json, storage_can_resolve_bucket_url from tasks.models import Annotation diff --git a/label_studio/io_storages/b2/serializers.py b/label_studio/io_storages/b2/serializers.py index af2ccda7478b..4bb16f286a75 100644 --- a/label_studio/io_storages/b2/serializers.py +++ b/label_studio/io_storages/b2/serializers.py @@ -102,7 +102,7 @@ def validate(self, data): # Handle endpoint errors if 'Could not connect to the endpoint URL' in str(e): raise ValidationError( - f'Cannot connect to B2 endpoint. ' + 'Cannot connect to B2 endpoint. ' 'Please verify your B2 endpoint URL is correct (e.g., https://s3.us-west-004.backblazeb2.com).' ) diff --git a/label_studio/io_storages/base_models.py b/label_studio/io_storages/base_models.py index 135bfc1ae982..10bc29074f88 100644 --- a/label_studio/io_storages/base_models.py +++ b/label_studio/io_storages/base_models.py @@ -571,7 +571,7 @@ def _scan_and_create_links(self, link_class): raise UnsupportedFileFormatError( f'File "{key}" is not a JSON/JSONL/Parquet file. Only .json, .jsonl, and .parquet files can be processed.\n' f"If you're trying to import non-JSON data (images, audio, text, etc.), " - f'edit storage settings and enable "Tasks" import method' + f'edit storage settings and enable "Files" import method (use_blob_urls=True)' ) try: @@ -581,7 +581,7 @@ def _scan_and_create_links(self, link_class): raise ValueError( f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data ' f'(images, audio, text, etc.), edit storage settings and enable ' - f'"Tasks" import method' + f'"Files" import method (use_blob_urls=True)' ) for link_object in link_objects: diff --git a/label_studio/io_storages/migrations/0023_alter_b2exportstorage_project_and_more.py b/label_studio/io_storages/migrations/0023_alter_b2exportstorage_project_and_more.py new file mode 100644 index 000000000000..b173fa408b87 --- /dev/null +++ b/label_studio/io_storages/migrations/0023_alter_b2exportstorage_project_and_more.py @@ -0,0 +1,54 @@ +# Generated by Django 5.1.13 on 2025-10-21 16:42 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("io_storages", "0022_add_b2_storage_models"), + ("projects", "0031_alter_project_show_ground_truth_first"), + ("tasks", "0058_task_precomputed_agreement"), + ] + + operations = [ + migrations.AlterField( + model_name="b2exportstorage", + name="project", + field=models.ForeignKey( + help_text="A unique integer value identifying this project.", + on_delete=django.db.models.deletion.CASCADE, + related_name="%(app_label)s_%(class)ss", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="b2exportstoragelink", + name="annotation", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="%(app_label)s_%(class)s", + to="tasks.annotation", + ), + ), + migrations.AlterField( + model_name="b2importstorage", + name="project", + field=models.ForeignKey( + help_text="A unique integer value identifying this project.", + on_delete=django.db.models.deletion.CASCADE, + related_name="%(app_label)s_%(class)ss", + to="projects.project", + ), + ), + migrations.AlterField( + model_name="b2importstoragelink", + name="task", + field=models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="%(app_label)s_%(class)s", + to="tasks.task", + ), + ), + ] diff --git a/label_studio/tests/io_storages/b2/__init__.py b/label_studio/tests/io_storages/b2/__init__.py new file mode 100644 index 000000000000..570df5884401 --- /dev/null +++ b/label_studio/tests/io_storages/b2/__init__.py @@ -0,0 +1 @@ +# Backblaze B2 Storage Tests diff --git a/label_studio/tests/io_storages/b2/test_models.py b/label_studio/tests/io_storages/b2/test_models.py index e628cb1b83ba..09729f6dde2a 100644 --- a/label_studio/tests/io_storages/b2/test_models.py +++ b/label_studio/tests/io_storages/b2/test_models.py @@ -2,7 +2,6 @@ from unittest.mock import MagicMock, patch import pytest -from django.test import override_settings from io_storages.b2.models import B2ExportStorage, B2ImportStorage from tasks.models import Annotation from tests.utils import make_project, make_task diff --git a/web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png b/web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png deleted file mode 100644 index 2e54b21a1ff155020644a9e36f8d0edd97e50dc9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15646 zcmbVThd-5X*uRe%*%cw95F$!O2q)PhWMr3-mA&_A+IxrWY_hkbQdXhtEoJY$o%eoz z@B0_L=i~FibMEsz&$*xLzOL&#?jSW)d2&()QUCzCqJoSD0C@0EJaCQ({@U^AKLX(1 ztfI_)O^?yl2~USu*`pT6kW4Q4Bm9Qynu(0e8-q7g(s%y_-p0ebc1=!3MkZcXM#hYq z3jT$>%yeb^+RFpmsVmM=vn53Scz8Xg!%QIQ;o5O?%~oOf!VRKxk)HDs&1nU>);Pxy zm;GaxM`<+^$kqSfub=r<7PWDyq2!tD0Q4qiU%ikOj14EvLjH>ku1N5inK&BRu0Amp z%RvFmyUT0LF9LDOQu2Fa$@7mH0I-xf+vCp-MqvVf%r4VCzJ?s}u`u>NF+LIszK{N1 zXkVNz1J4DHi*!Vcn1LWU-j(x{PgL>&NO!7Q)U``UK>0BeWcL`Cy^BTx^lUWIqrkHX zFr9F}Wjtq@iyi>Yq6K65YbBSF&;I^k+!*y6j>};PRvufbC)9h-NHA>tr2Hif9z6Ks z5w@}_3Dv@%iyzsd!wgqKg@l*%-EJHvdOCtPl*J8OG>rwmg8DPAk4%evcanD zA#yOTL+|h`vycLu{Vr9XU$fykXglkOy$HY!jU45b!-iy3G7EvrW<{LO_5_#Gd3Y8@ zvU5_t#VB+iYYSV8o}*+le-0tQD4CUb*l+F<0W2BbNpaO4nTQe@9Gq7+e6O^Y;Xv=t zfxWILqY^nhrm#a9VfzQ9tFH^SJPbmCbi6%L<-r;k!1^2&EzK#TZVdpJ!8by~G{z@H zmJ9?>SW>H!qwxXy57|E1uZb3rZbdcyjgwO%2X`vsWYZ`z2v9&Rz5Yi^cs<`)but5i3;8GQ(`0Pl{YHcDnpA!L< zHv9%kLG@Sg@@k`@2_jP~0R4hgkT630J~tjnzr`_iX3=_+47?{oSyj;4r?bOzMjm~c zaTD=iL07!SoIKi>(RdF(9ay5DK1)jsC;HB_ZzIZ*vDe{Mo~7=bl;|^oKq|WP1M_r~ zszLCaV8$eE@G=Oz6_MzZNJ9wFC~J*X=HgG3(>14Haa*S-Lla&EqkRR510@P($}ISQ zq^K!YeO}KI^yV$E*pyaT)T@^R(4Ocz=wKRYRR-K>F>;pKV6A57+K1;saK;>P3EA z$0dNiLQdwfAjm-uFrjAH3iA&Y#K44`_3wZX50@1JLy`v!pXrcm_Ac-6X5SWAck+U4Hu>?Tv6P4hfkLTAo2Ve}%uUQnT zDlMvZ;Dc#tSNf4umk+0m94UmB4`Gz${6hhUjH6eXW4jZtA8Js5Rt@xCgZ{h*Oj@;( z&GMhrXOK8fZ?4V0nL`OMA(tB;knzDa_g-FA^x+~rt=%9YX6hx33G@Se zKyB5-H~vrs0M=R1^41&pVXuC=!Iwc-1(S>>KF~?UtGyP@;hda98(%qOSgF>gVf8WPl-tC`|l*F#ywMyLE)CyGHm7vjTvs0+`>nz|V&dF(q&C z!=I1a=OX*y)44sJ(bo@GqpGH@(jxQv99tm@lSs%V;pfCvg%2<>nrB7pY!%&DZkjbl zzo=0ZXi@BJjv$Z!3s)L{%f1xh;|K8i_s?>@ubGPhOToRr8LaRUU&^M#2Ar6H-~>gU z@`~b36zJ{b{S!rOCJv_E&~O0TFR6Rl-S{9?9#h44CznkK#y5`@SD~fp!tHjhd6+(T z2Rria1}jd`ZswPa?68KSWO>*%IMDX^^!_(=Gk5n;)bP?81O;*rGJezw&Bt+ zgDE~>k&p|6S0pWpdA_RA38RgqNcj4G`XGF=vLwFpA0Gbz)BL9l6_dlC0`=gNqz$Ta zg|SEmpJzXOVl~rSL}*!Z&?*W^&Yo;?TFLj~ghoDKfYDFf#PGa4{2o zv2Y?3=tWHVt4IL)JjdJ62<@+cwYef9b(8?46E1C&jxN^|J<+-i4q#QX)q5viMG9J6 z|M_i<@W#2Esi+@(!*XgA0$>o}Ih<{3%>af!Hr2-I5rSz3Yg-C;4&$>vgN5~T0*@=; zmGN~}DEZubM>Dq?g}F5dFQtSlIxH0%STw-GoBj?3YNe|>ejCCS`FI(yn!{w2Zw}8EDQxpguFCD~8Vx?1 zNgh1nmn(_ga7zuqYW>6qrc+V#c~7sIS%Y?;maDqaFWS!v4nzkF5p(#-`KD^y{@M=F^55f%NZ3jbHYxi`y7N zem-8mWGU!$W=2Z%CO|2(VO9&+8R84&B=7r_I1QHd0E?*D9Zw(S`}wncq@ zJ_S>0W8KnFx3SFc$RPE~7obIG-9d6y?J&t1u}SIx9c|4W4@v=pRr?FBN?k<2Kx?jiS6T3I+1P+^oNbuPPnM^+dr6>NJgCdJ zWVdG4cPE0r(hi_23NHMZwD#Ya*UM-%?hqOjjCcccUCDNh=viUQOLBm|rudpu#-?i9 z-iu-wu6XrS?8wrV^-1aTrc`H3Zcq>>o;-OK0UMmINuo@yYktBv-OsKG7MH=RmLyAn<+PwbawEDB-kVzme` zQ~h*tm2l#Odk=9ma#EgY;MP&7q;u9CAtmtv03y*&9{*5cXE@ot`iS|?s*)dUxmTK# z%-Z)3=M)bp7%4`}YUKT3xpa!9tOnwQ;)d z`}3V^`esl-g=WngvwimSAx&6k*{``5+ex!g&7ss3PU>x|6NEE0Jy0lONa zAciZPI2-CQvMqN>P&N#j57hs=uH3^3lui>Sa_t|PNIR)-xIs;M16PH}nD71bmnw($FPmk*#-81-E61f!IGV#)h_A6KR`K`u=zF>< zzkNUh3p7J;@Yt_Ip83QS^YJ_0shc%g^baR}$U%BiH#Xbwi-gFYc4NH%>Ac^kuQ|y$ zr^yMiA$s{cu7=0@%OSyIo3ikZ`r#T=UJw!t4{v%af}5cJ_d6MPquXf9aj=Q(U4sWW z2LvRw#Qg|Xg|l3Y@lmF#7Qw2EidI5Sc(TUON4*^>8`|)t46tRRt&J` zdes$lYh3>^{`ZGIz4M@b>emDmxS^f;bWhl0y9+1BX0`FgQfELHmXH)b4(D$X^Y{m1 z5Ek6%He4!P@8*yasW>dXH}@i4`MJ#%-Bi8p^0dNJ-(6U552PVdP{M|g*sg|{R?-ly zQ@t)+`ssmdKd(XN@ZX!kdwvq(I;VEc&KGso>N6(=5}F!hg1!2@$!~4P0ZixJo;Y;FQaB8JJbMZ*%f}?5%j#j%^%pPSMevGmb%MX@hu@qqoaJs z&1+d6c27DbD6}@e8~KQW^ryRrZ)=iYeNV~W*0yU)+-k4z&N&l5bOPwltpm;9o7sAP zIo7Bq65YGP=E@ZBxBnpTiQ7{E+l>)^9BkX)eaJ9a9Scr1*&I@TrZJTDr-Z5_&1?{+ zhz&v%M%uB#m?te{otpQs%Y$Bd=%(wp<}exD#*=1Npk_ADVrKHycv)^wKbcm+FoG}f z2zx?0t!acJ036f}r~Fd$%JWm5bhyAFnw%nK3+#@2@h3@13r^z;fGcO4|E7J+0_<=p z#z>fEOe@?KbrZCRVrqDZ9rxyq$J*%#AzWY(Tq8)Wau&wLrJisrbk++}`tBt1;O2hh ztn-~qxPF14>DOS@+(LF8zg*)M0wN z1DER9-6ZzcNhv2x-nkp*C`_n1L~OTHvCe6&wp@I1E0g|ohDY1;?qJXNt2pdYb!3g1 zm>6Tb9J0*h)Y?wH2$^*&w-=Xr($~tV--~d z=qm1=?D&a2C+RtpS8(6Mc1HfY*=C;FcoUb{oj?ruV)y@aClwtl-#hh%nOtL_6hULN zPl$ARM0jVG;peTy7zRgnhrU>Ix1N%c%W|w{7ns6a#k=pC-J@R}&rbym*pW`%rSrGk z)D;s6I8|%)~fA^cE zLFAL!!`fyi4{BBCJ{&V@P|y2avgR)DJj%!fePGor10pZWpBeNo?nXow_3<3eHK;o= zgTRoW0~!=sdaq7-==SdV`*u%@u6y}QBS#BG69W8{#KvquA$!zv?Q@C$km1>5>f?$u zP1j-wY`sl#UmC#@N8)|2Q}v|%H5yF6sDlmE^QEb` zHMp>(=0^qf-Nzf-NlIKX<0|6Mx6JV{7lJG-F5TiNT59+6-uB5~8oTH6`!+{x&}u$i z9i$sl6b7+^R(xdc0SpIXCo!TItUDBfWkP~@ICR6?c4UGm*sy)1mz@wE7O z@lZdfz!Zm|-jcSiuWSaFLhQZw|w9HC3eOf@-*HXrr zQ*gY+!cV|e78)%t;TXtI2kg}6riS=({>J^fUK40_3abEB)Z@%4q_hXebmM+DlpzziZfs5%jk>K`DcH8O{3tbM{l5V|3TDj z?9soe>TN>&78L>5JTJJ+UdmC=mCh-^g$@n!KDHKJT)$#DbDjv3%=$swd>(sc9Vo=E zP+&r33w5{;KeIcA-8L2g$>@OhQ;$N6ZXRG@hf!23->PhJDZfk=QjX)@hILtIu-zy5{F}Z zG6m`5SME`hfuUb&n!i=YGvzFJhB$zfVgd-JWTt%lppG8K@P;dXi;%(OiM$2RRKgmA z4TMVhqYq{e51MaqiwDH+a{|mvB1@vrn{e&9HV~u1E>83$A$92f&D{GmYfRsbU}Xuv z%a;R5ncSE-cg^qg-ijl9I}>;}t&<3;lo@;$fy3JrSC*cf*Kz)_S5EzEV^^>J;(sVa zC%xE2lz8|XHQgz*j+6!Mv#9-F-SisOA^(We?&W1}Ih!@rIbJmu2L{^7a}M)!Qs!@b zyQhYfM}%M8F`6tY&x+ONj?lov6qz@7S74P$wgcNJ;hS4hTHg$Tf8f7QoRnJw_?Y2^ zS;)D~7lbN6YNj&QHtWXcmEw9Qm+dmTlWpUegDsTh`@1^bJJ{&SI!a-rVU8;BMf%^q zDbe2SdZ!(RrzJi7jj0g+RX9zW?_+;w%6;j=*gf9t>y3v|*a3hbPliQaxT=Hd`~rK4N$CUjClmX(D; zIyc7+&lOo`M%1x#1NEoD_j4J}I_qe4g9`>}`n?}Q|HGbAO=%QbNsotSmY#Eu1)qhp zYNcINBu%A~l>_PYGYOQb`O^ES7cjCrtD^<})Ja&gXzH>gH-!`d^zIVugIbR`Q}YGb z%*R8UG^!K67#?$4J$wFb^EabSI|b0Zx&O7t;&;@I1En!Ozv_20(bXDN2?*84P{;oz z3SrgE&VhnBfoF5fVjAZ1zjUU;qso8g`LeXE6Qev?{3K&|QRui($t=&)M~m*G5h>Re z8^9i2{EOb-a$P5@#;P2Tf!ZZULhD9-sX<&E^O|2ZL*c}O0yj95vQlA&jH~SknMse2 zv5vhG%pgl#5Z-REw?@M~7G7e5(Xx@}tI)PJP#6U1r1Z*zd|BI?@|V2`7;}8Cw9{vIos^UH5R-t?*#hAAhco!^~3HlcWzmvS8%|UzgXYp2axwbt!xvyq<#ggLHzggVdJed`xO$&uzP1^;5 zSx$GgNhZSL{f7?sLE$3>_nSAJ)9#x&%GVb-(%K%wnkG<4HR=E_B*fBd~&;XTue!)@TPf< z2jx9KnS#9>tW9K;0QQjMWwd>!d{hp&YeDq1^)DHy+>=g)pjC7E$A2OJ!yalINR~9a z0aTyh1)7ndUtb+?Y#H6*jYsJ{ql%RD|He>$38QW9d9ZlO{$%y--#8H+#ewfMcgcHi zyavmE`B&IYQI#R2%(@~Ud3IpF)oa2jff9NkJPPcOhFe-J9M%RIHfw4RM z_E*cSZ~I))s481({m0CUUuncnB-h$M`_G-#8&%5@PROGZqgi!mQ@+!XsFpvP)GU81 z7p1pdg7JR0P|E{aZbGd^@==YoVD|Wv=k&)vBs&}9)=4+ceHzTp_Kf}p#$!(&VME!I ztao(G(x~BbwBV3`M3MU=^-j!V(`v7%4lw@dw<%{pv`p2Qr@}xtlEz$^&$K(oMR%V- z=A3ebtc9Fg%2yIYP%sgmb)$P&xu80JfrVK)N!&`bWuiyhX{XgefB{2WD8TO7=Dx=f zWG%Q=bLRYB(>Cn%dbht+`oZ+(t#SrTyvon#&)qNTe2qp|(Q{vTz83YOntSdAYa1$6 z>Kxaox8;>2a&!oyi?MoI?-*jlU#n(?=4X)e7Qwy0g66R zTvhaPX}>J8U)Wex(As8?8#?T6H+`L1a^CWgrD|c}i13@CQq)L(hDb$N^GngG1YF~A z8fMqt-y*+!p^x>MA-)y&K`A6i~_E?(hJ9j}e+@ zrF+m$q3*H458=yR@0PgShipk>z5a57qzm)!6vIbDGfOFyy_{35riWu<8*GSCgyPuu z4+rsk*}2Vp1cTI*|EoW6zL&Fl)<2Hdu>s7(;rZs~@a(*w3f^mR@KIEa@7MHv95>&f zmDy*#kVM&H8#n*=xSSH5jHpn2++%u&{33W7djg4swT;}41 zsxV>J9josN&=Z&ypFKs5Q1;&Ks)gN5AVVX$LwhS1_ZUwvXrAPV+DOY6YET1gJ_>ZXy^bbpHO61^n+jjc|kTfn zkM4+go%8caZBlu{pQoSpKZTgxM}OUsc%_>*@Ec5rvsU!m@;V+}V#LM1__27jX-`@au8M#?R zTd*V6v@mO`vU=IS%I!Z!7Zuy|iZ5ho#=a0T(P-owBKVd4!LICP#=4y1L_DlXchkys zwC@^uI$uKO-#R8oybW)Fu1dQhpx-vgmxFYi%wr0_@!~m4-MQuE_)izpGi@-i7iNS) zx6(v7Ocr~O&kV>2(9g)+DTO1+w46wKp%7V{Nlx!g*`RR9Dzz~v6nSCSkHz*0 zaARcRwB;WgAHB@vC6nxvOLt9OEItLn$TC1Q`|^wFC;tmH$5atLt!xK<$g{1AiT;YC zzYpWLTiEwAZFFXNjOQ%J1N;23pR+%+W3kd~wu6O?9e$YaNK359aPR8-?7ZHJ3CI|i zSc-78HP3TDmBliP@`kSvPgmE6uBbw4NsJ~7j|IiB;SkC*`tW;nO@jIvX?S<@UT4Vf zo-3%xt5IaFrHav6<=57;{C_yedQ&q}a{IqsXVTsN<0HU=G5+X^mAL&%`Bkjz#|EVP z;u#Gt>j^v`E0rq`@)SPLAAfWZK+_`$=@*Bx+{`&{eA(G`mPK02e12X#`9b`RIGWX~2hUM|lY@Po1Skl+2e>ANN1Z78V!e-+9%Tx7JP1v!@DATWh1~p-EwAavbyL#ew-z-4Qgh`MW8`nYtqxzv z{L+;z>@WFxg(zKCK;G=oQ z%4#@opLRM*q%-J=Q1(mr-!h`S-z~LU0!S)mRk+}O59(Lcsdj_j2;cSkF#noUFHQp2jL8^~ zV{Y4Ci+LEhy|02Tg8CAoU+S*mCXZgtjP)VR{fW0?UU72DorF^3m8B!0J*Ey4s!Q^& zCQg*)e5@Bfrr+lDKPFuGV}CP3v@kGBY^EF2dPbeAOKX|hJ#{p^Vpvkrcf-)+(bD4y zb%sk9lm-dS(ld%0f5wsM_|Kyst2h-Tm^{C==g$}gRZAwOwx8~2A77e2>dfG~X`1W2 zXFPkA8L(~R38*o+-l!azsGq$0?p?Hi?8Io?@np5=(c1m%M0g^Dhm5CROw8|Go?XE% zvDreNy1fxY2C1rMZCiOar^yk4M?+QExR>bjeuu;ze-m&}^dHe%lEEKRru6zpkVF@W z`kMOrqm3&SNmGEa=F-&lwdf&tmiEvVttDaeFFz!l3iy{4**I08odr4b!g#6o;y+g?m}2jtK+xyFfwIHw(&T9pW*38=l*uk@-+o{E2VVmE~)u9i!$hp`-6# zYK#(@qFt4mgP2{jSW3A!{v_6~G*VdKAaHDe}YFZH9dG5%+R)@$-k%-7Zp=fbx zsq|slS0iO}ZO5z4j|L0E&fhfsFqyl|@HFmoddpS5o-!${hbdxYQM#eL2`;5(2xmdv zNCe(Uh79)8@ZbWbe8}!Y?yS>ACi~CfO^;jgWoIS{Zr`%>>K^==>v|zKIl}P7WN?bw zjF$28dAHRx_7B}!CP_g_m%>l2L^{u$#omyS)kY#1RB!caQc%z8M;V;_Jpv3Aa!RbGVGB{$>Fv+Z*%g1427LER`Z$DDu#w>Q|%W&xNpJ@Va zN0M!?;tIE`eW`WNzK)=URFZ>V(lBMXWjXYG-ZUd=$x-xpVsrhOyvb9Uhbq?f#2g_u z)XH2ZM?-G^d>d`VjPa(WXG;7n47Rz$2h{5=!#&K)tvuhTs`W)xoW+RxkYjj=!Q@pU z3)_b=%Q<%(iV+Lh0thOKW2D^_0({x))Gu7-)^=pD#HNL0ij%Q|Z(jB6t2|puqq@2_ zIvoNGL_y>XQ8g?F|G8R5YkgJ)m{@-$mRauJXg-Ht#m972piC0HU0)@QfH0*rtPeT% z44P#yWP-7=HLjn}zX;z+n0)eJ(Wa>Q5~OE52{x9~ptND>Kr|ik==bCu8J+7DoK}DC z?a>*Hp>yS-Z{;#L5vEe0@ZNeFHnJqElBeT)|JUk~xaVrbfvFjvZ-DQ8j50CkmLTjc zci68GH8QHIm#j&b5XzL)<(V>;36M9SAF+{4=@~-RMqXG))oz}D13d_`7QfW=Cf~hR zr*y2k&BW|z{?Be)GNcsW0a{#0L{O8~2uPS7dMOsmEZ*^2N~Yo}bI;6ql!3x`%JRE3 zUgTka>($IFRFbiA-QkveN)$K?0^@Vjor@#K@&=KfnGS)u_IDDBjiqh5BRuL8kuj}2 zNj_JR`EP!vq?>lBuRR=*KZl?em@i8B5XW-rcefAMI*=(Q8rLU<;YoVcma6u2%YmKz z1C=4NKCIQ6f6w34SC*F-WVDkEt6EI5^Wngp*X5MJDt``Yy{dW}LB!G4Yc!EeWdX0d z!)r&6SrMTLDqeiCWgb6Ch1f3|TCjMV-~v?XPW~2lxQ^5@<>?L~9bj+WaoO1;L0psD zX3ZuXACHh=1GZPcdxg{(z3)G%kNI|t@PAZt&shIwEZNEd zPM3klfxe>BPvd*B3;R!tkk0mS44a}i-@eqZrZE~%>uigdv06p#N2wgE@*4Bjexg4SgT?<@uE$`lLl3qs0qS~|uv=S)?8|dIi3?qvl_WFJ z@q?7oZ^Ys6Vq#OUi)ty-RFsSydl7eSNh0n&^Ec6}KZmDa+5!?d*^-qHP^ zmv0xUxh*|Z`&L)moO!1h`UZ~74>KN8cTt*i`jLI?4k~>6i=Q{X=zP13>#LDudHst{ zyPFfQ$+=C_MmV3-nTe;Lh}{USzg9xKxSddXbs0|p3T2+JQIC-{15Imow)u#>sU`*E z6OkFr4A62rhWK|$(p#w7R;`Pzn77`e?sk&_$AOnNhR=l=pQA(->2WK}(J|7*D0hOS z+5UK?EY-If{4(1*&f7$QRa|u4S0I2OXWUq8aXY>^PFcU;9?MXI*wy#v%lzNIINu&x zoA}xnbV{1DCR8JAn_?=}PucEwwiO-K7&5CZ6(IBz?}zc#kck0gCG->jGGHuLRM}_k z!xJzgWp;nXp7U=b>Kh@?OuPtFvQiLF4{qOc8c&xtGJl6Ip_D_N`RMHJ82;gOMR-jJF7owsa)cnjMk8);T-R+uiBJcTjO+!^b;*WN5 zOz)UK%ZXNUjNLh3t;W)7zq(a&|NA==qu&H(H(_C4!B;yFKqDec;h6g8Zr_;v^D{&L zNvR{3BI{P%CEv?`(Qq?Ed^>Q!?Ars$O*={WH3YeCie8a;wL;pS;!z*}7tz8RE4Lclf<_?q${tCMnMCB!MW@|KmBK=Qpnkp@s#w5p40}L z0}go$X%2o3j^3wxiq#IJW@?t)m@V#?GbgS4UfdGgc#K{%5)~H9lfQz z#~EfC=!}Q>%;8s~xII`}8fR^0d&5Ja{~}AW(tVqS{-W9^#EWt*iQ9v8?<}g^!}sRj zDxt1C%CO?owv=b9ENbN+EesnDMieYb$=!|hVR@Kca??zJ_O$uWIcPnS$GwR$W{=S1 zT>!uB1A9KV2&p6v%k5+-9}iQS1`ESE2+&|&gp>JvCAjjzZ3cY21SI07IfLrT8%Z;gnp*xzK0X%uA z%4^F|6pxyYWz1=NL>|}1iz%b4P`n|z#~I^BlzvI;tZ4bcjP`~A8n*Ful{wkHLF)ZM zM`Hd+2t{(E;)31h1u-TpW{WcxF<*-@(uU7f<;&DVz0`m8<0Kxqm?@yY)m}i4s8_Bh z-*&Q6uFjpe&r$6csNJd2N@@D{;2nWx5n@q`M_u$07B};$G#J%*0QF&C$ffLNp5)_F z+-tfy#PJv+uL$q5Dn6>~sOQsX&PBaJ=mLRs&4YSe-*!#(B~%!f#L=k(O`6kJN%uuD z(JIca#N2&#{?3&XmDkLo_vHu#|3NOvwpG`dCfzCRT>NA20#`HF-&40+9!_wrH%g6A z^``$A8oGIvu2{#ms|l#}(Nx87LhHVw=xF&y*f?}#J;xq;UOKebCe-0g@oU*KWA zKi#*p?L?&i%LH6c+x%6o|QS-Jf2C&X+B1W6Lq@$#14K~l$*of=;9YP_ABG|q(xu( z8l#_l0||b7WzVXZR?J4`a?GP-)OUw$O}^{)>$v?)a$bU2pvdLKH=;h zSJ~2Zs68C!;b-vbs`8!nt#l@O4d&g?7Ws>xJP^irc+l!vG83#Do#({%XY!e&lbz{X zW9BP8-_i3fCo|8n!D)O)2>E)Ek4VE$J`!mGL~F{*W26Wd8wktC`>ofd4~F}mH`Pb2 z5KhPU!+!o@-wl5%wMz-T%H69)(E6HHw%+bsS*2e%kE9oyAKLr^bQtX@M3-XMd7DPM zKx=8mb<>7zzdBybbi*p{>41 z>(tmS#EAvPx}NbKO7v%JJ+IXVF-m`7f&y#Nr=8ELDti#kFlXwN$~vvWtf>rgdC`0q zY7Yesdt+eIc-rlq_TxXMKHnD<%pkCjmoRUI?+|7(a`Rc+{z2_;60|w$w=+z1e7EvA zDS|>=^zv85T7yV8aY<)GIm;n%zY5||jB{9pYCFv-lfg(qHL|m!xU2i|>90GtmDQ51 z_qjy3PB$7O+*NuI@$eY#EkQPm3?SwX?rYa2Ys6i*&Wqdj8I8X`o7t+;<(bq^!Mg5O z{)R=Rs@lcfZv?-$sM9&&NZ@?>m$uhyWWrj>8ufi&puyDKd&_=|o*?fQ44hfcI$sLl zKt4mqmikd_pD-}&MWq8&h?TM)1wgiS@;+ww*Svclv|Ijk;(b#0{yxTw7-cf=Guv|{ zS?K~=(%4~NrecvllRzMGQj944@(_uf+}ARY<~O91Voc5HSMxTls7*B3=g2@X^y z2mMt|xzYIYgpeydFF7Uh7C9Vd{$t0N3{TL>dwQDWSEUDNvG)*uBK{gsCv^?-XAD65 zBeGYZ+Yf}LUR#`G8NuqnM&Y?S~lRZgSYzz& z06*8MH_8}&<E%N(42;lgVUp9|^}wCcGz>T=jnwIwsJ}u|?9U1r6-WFHt*8RnjdGXQJH%t(V4f&_vo7Dcn z-OX-DJAg55eDgH|t&RA$Ek2q)=} z!|x?Dt^KXDZq!Lf{C)=%MG-$I2T1Wv;_19usL(Qm4QaCJ)j+gELb0|7r7!D4K?Cr&%8x<0-?EU+^ z11X?OeuujUh!6Z>PFIfY5ZrQOuzkO%#dyFo&y9}PW;7sJ(~+;l z{@ywfRWTJ}gyi-ZC+Yh7Xuapj$zE_KKbG0paW3-_?BOduleGOs_F)ZXfArS;FtmFU z;)cXf4x0py0D0zHG00q&!F6nkJ6x=fqW~Lg&IjD~}Ty z-nkk7R4j+dsGk5nP-*wG>BB1<3HtZQvT0j{GT9|G6rdj~L8sr1Zl699D!nkIv~>^w z!Tjr@16_hoACf8Ky&04q=k8TA_#Mv+(0#w3H#R2S2N_A-Qn#+X3-JTf<*)pi6ECjM zeQgHP12$(Wso2mGVIrW5_}Dq1%G~w>WPI;V6Y>YO6q{5Jp;_WN{<#5{vyNwvJ;#ZG zp7g!z?2};@!WcELdHxS1b0I_+#(&7xFiKMDU_>`Uw&G3mK(kUR`N3jlnn!Wi2#~(b ze+k0#ORExs;iYsy?>=JOUV`=`ms@XAzQVl^I`C0b8uNn6Ok~3tB7Z&;#zfAl;$U{3 z*;Q`V1P-I+Ej@nk8i*M$pQCbGJn6&j9f>tR1d$4#OHO0>>}kwc(Y4Q;rl90{;;MNW zn&@gVh?_yI=Zsd2UAoiwRBr0i9c1gyAs(vbEkm%<=*UO@VVd=|wzB(QIvP$+Um(f& zhYm3ixW=qk*5haUy2J)>5}o5TSQMA67rR|YY|H@KHB;FNV-Q(;5!R+(j?^_Lg;2QS zu$X9bJ}d`lZv2-}i_k3;<`W!;Vh9_X-JA|~_D}Bq(32M*d{jU>+9U#fR?UA6sk7H| zi2~zYPE7Xl{yFTNYi0&&L0f(E_|ec~c#Del{qlkAzoIFz0DeNCSs+=O?)%-wp1<$Z z$qDlrQ2t;0!;cRHvv&^iGZ4CY>8@N}hpF#O@M8VSIE4=hNFUIB35#M<*Q8XYE^*<@s)RqRC_9Gf2k3U;fAZzk>CJKl=Y?NDoE_N-rzIb1wk^&%sobs1n!p({6 zCLQRtZY8`u)BfaluYAPq%aJ{$zGW|BcE7)qMOv&%4S;zJt>IPNk{-Og2S?Q=vV2$E(}BuFA+}zseiXRTh|hF63;zw-%wY zLUi$bYf!Y$NlFghuCLiBBYRztB+=S*LY^~gYbgrAKt2@k=l#pQ-}~;(kk9>zVEQ#h z|Khjjf&SkKA3;a9v;*%LxVhvZ+ytT0k&&lfc@AKnBSdqK{3EiSQ|Q3lgC#`)+NI)p zZ7fqfiusx=O7SA*4uymLy`|HL`4D@yJyH(n)+7W~4H{)^@qq&ql0XV)XuOaemDG=z z#A%C-%#mG<2>Ti8NBLcN^^U{7^Uu;lyO&{*^*6kz%!s%@dLjC$Z-mlopa|WKzcgM0 zv;Q2|B?0JGGk!8&0(TxLj%%@oe~(KQyP#RQzH#{%wMFr0Z7%}Bks4+e;^UaSTQ<_5 z!R+Z)YQA#kSSG;N{K@x+Q=ERq`kdHLcc9~0cylVKE3$lYy!O`mUw>78Ne98kYkRMZ rp$i~jIE9mW@L3bw;`RT3b?@LzPx(vT?+?)+CsCADl_`;a`s#lGQL5*_ diff --git a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx index a08ac95e3923..c123761a2f50 100644 --- a/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx +++ b/web/apps/labelstudio/src/pages/Settings/StorageSettings/providers/b2.tsx @@ -1,59 +1,6 @@ import { z } from "zod"; import type { ProviderConfig } from "@humansignal/app-common/blocks/StorageProviderForm/types/provider"; -import React from "react"; - -/** - * Backblaze B2 Logo Component - * - * To use the official Backblaze logo: - * 1. Download the logo from: https://www.backblaze.com/partners/resources - * 2. Save it as: web/apps/labelstudio/public/images/storage-providers/backblaze-b2-logo.png - * 3. Rebuild the frontend: cd web && yarn build - * - * The logo will automatically be used instead of the fallback icon. - */ -const IconBackblazeB2: React.FC> = (props) => { - // Try to use the official logo if available, otherwise use fallback SVG - const logoPath = "/static/images/storage-providers/backblaze-b2-logo.png"; - const [useImage, setUseImage] = React.useState(true); - - return useImage ? ( - Backblaze B2 Cloud Storage setUseImage(false)} - style={{ objectFit: "contain" }} - {...props} - /> - ) : ( - // Fallback icon with Backblaze brand color (#D9272E) - - - - B2 - - - ); -}; +import { IconCloudProviderBackblaze } from "@humansignal/icons"; /** * Backblaze B2 Cloud Storage Provider Configuration @@ -65,7 +12,7 @@ export const b2Provider: ProviderConfig = { name: "b2", title: "Backblaze B2", description: "Configure your Backblaze B2 Cloud Storage connection with S3-compatible settings", - icon: IconBackblazeB2, // Backblaze B2 branded icon with official Backblaze red color + icon: IconCloudProviderBackblaze, fields: [ { name: "bucket", diff --git a/web/libs/ui/src/assets/icons/cloud-provider-backblaze.svg b/web/libs/ui/src/assets/icons/cloud-provider-backblaze.svg new file mode 100644 index 000000000000..5a72b8db785e --- /dev/null +++ b/web/libs/ui/src/assets/icons/cloud-provider-backblaze.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/web/libs/ui/src/assets/icons/index.ts b/web/libs/ui/src/assets/icons/index.ts index 589ca712c740..6bb93d0fd54b 100644 --- a/web/libs/ui/src/assets/icons/index.ts +++ b/web/libs/ui/src/assets/icons/index.ts @@ -270,3 +270,4 @@ export { ReactComponent as IconCloudProviderRedis } from "./cloud-provider-redis export { ReactComponent as IconCloudProviderGCS } from "./cloud-provider-gcs.svg"; export { ReactComponent as IconCloudProviderAzure } from "./cloud-provider-azure.svg"; export { ReactComponent as IconCloudProviderDatabricks } from "./cloud-provider-databricks.svg"; +export { ReactComponent as IconCloudProviderBackblaze } from "./cloud-provider-backblaze.svg";