linkml · dragon-ai-agent · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/README.md b/README.md
@@ -787,6 +787,90 @@ supporting_text: "correct quote"
 
 ## Configuration
 
+### Configuration File
+
+You can create a `.linkml-reference-validator.yaml` file in your project root to configure validation behavior:
+
+```yaml
+validation:
+  cache_dir: references_cache
+  rate_limit_delay: 0.5
+
+  # Skip validation for specific prefixes (useful for unsupported reference types)
+  skip_prefixes:
+    - SRA           # Sequence Read Archive
+    - MGNIFY        # MGnify database
+    - BIOPROJECT    # NCBI BioProject (currently has API issues)
+
+  # Control severity for unfetchable references
+  unknown_prefix_severity: WARNING  # Options: ERROR, WARNING, INFO
+
+  # Map alternate prefixes to canonical ones
+  reference_prefix_map:
+    geo: GEO
+    NCBIGeo: GEO
+```
+
+### Configuration Options
+
+#### `skip_prefixes` (list of strings)
+
+List of reference prefixes to skip during validation. References with these prefixes will return `is_valid=True` with `INFO` severity, allowing validation to pass without blocking your workflow.
+
+**Use cases:**
+- Unsupported reference types (SRA, MGnify, etc.)
+- References that are temporarily unavailable
+- Third-party databases without registered handlers
+
+**Example:**
+```yaml
+validation:
+  skip_prefixes:
+    - SRA
+    - MGNIFY
+    - BIOPROJECT
+```
+
+With this configuration:
+```bash
+# These will pass validation with INFO severity
+linkml-reference-validator validate text "some text" SRA:PRJNA290729
+# ✓ Valid: True (INFO) - Skipping validation for reference with prefix 'SRA'
+
+linkml-reference-validator validate text "some text" MGNIFY:MGYS00000596
+# ✓ Valid: True (INFO) - Skipping validation for reference with prefix 'MGNIFY'
+```
+
+#### `unknown_prefix_severity` (ERROR | WARNING | INFO)
+
+Control the severity level for references that cannot be fetched (unsupported prefix, network error, etc.). Default: `ERROR`
+
+**Options:**
+- `ERROR` (default) - Validation fails, blocking workflow
+- `WARNING` - Validation fails but with lower severity
+- `INFO` - Validation fails but logged as informational
+
+**Note:** `skip_prefixes` takes precedence over `unknown_prefix_severity`. If a prefix is in `skip_prefixes`, it will return `is_valid=True` with `INFO` severity regardless of this setting.
+
+**Example:**
+```yaml
+validation:
+  skip_prefixes:
+    - SRA              # These will be skipped (is_valid=True, INFO)
+  unknown_prefix_severity: WARNING  # Other unfetchable refs get WARNING
+```
+
+With this configuration:
+```bash
+# SRA is skipped (from skip_prefixes)
+linkml-reference-validator validate text "text" SRA:PRJNA290729
+# ✓ Valid: True (INFO) - Skipping validation
+
+# UNKNOWN prefix gets WARNING severity
+linkml-reference-validator validate text "text" UNKNOWN:12345
+# ✗ Valid: False (WARNING) - Could not fetch reference
+```
+
 ### Cache Directory
 
 Default: `references_cache/` in current directory

diff --git a/docs/notebooks/05_geo_validation.ipynb b/docs/notebooks/05_geo_validation.ipynb
@@ -203,33 +203,7 @@
    "id": "a3b4c5d6",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from Bio import Entrez\n",
-    "\n",
-    "Entrez.email = \"your-email@example.com\"\n",
-    "\n",
-    "# Step 1: Convert accession to UID via esearch\n",
-    "handle = Entrez.esearch(db=\"gds\", term=\"GSE67472[Accession]\")\n",
-    "search_result = Entrez.read(handle)\n",
-    "handle.close()\n",
-    "\n",
-    "print(f\"Accession: GSE67472\")\n",
-    "print(f\"UID(s) found: {search_result['IdList']}\")\n",
-    "\n",
-    "if search_result['IdList']:\n",
-    "    uid = search_result['IdList'][0]\n",
-    "    \n",
-    "    # Step 2: Fetch summary using UID\n",
-    "    handle = Entrez.esummary(db=\"gds\", id=uid)\n",
-    "    summary = Entrez.read(handle)\n",
-    "    handle.close()\n",
-    "    \n",
-    "    if summary:\n",
-    "        record = summary[0]\n",
-    "        print(f\"\\nDataset Title: {record.get('title')}\")\n",
-    "        print(f\"Platform: {record.get('GPL')}\")\n",
-    "        print(f\"Samples: {record.get('n_samples')}\")"
-   ]
+   "source": "from Bio import Entrez\n\nEntrez.email = \"your-email@example.com\"\n\n# Step 1: Convert accession to UID via esearch\nhandle = Entrez.esearch(db=\"gds\", term=\"GSE67472[Accession]\")\nsearch_result = Entrez.read(handle)\nhandle.close()\n\nprint(\"Accession: GSE67472\")\nprint(f\"UID(s) found: {search_result['IdList']}\")\n\nif search_result['IdList']:\n    uid = search_result['IdList'][0]\n    \n    # Step 2: Fetch summary using UID\n    handle = Entrez.esummary(db=\"gds\", id=uid)\n    summary = Entrez.read(handle)\n    handle.close()\n    \n    if summary:\n        record = summary[0]\n        print(f\"\\nDataset Title: {record.get('title')}\")\n        print(f\"Platform: {record.get('GPL')}\")\n        print(f\"Samples: {record.get('n_samples')}\")"
   },
   {
    "cell_type": "markdown",
@@ -358,4 +332,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/src/linkml_reference_validator/models.py b/src/linkml_reference_validator/models.py
@@ -344,6 +344,14 @@ class ReferenceValidationConfig(BaseModel):
         ... )
         >>> config.reference_prefix_map["geo"]
         'GEO'
+        >>> config = ReferenceValidationConfig(
+        ...     skip_prefixes=["SRA", "MGNIFY"],
+        ...     unknown_prefix_severity=ValidationSeverity.WARNING
+        ... )
+        >>> config.skip_prefixes
+        ['SRA', 'MGNIFY']
+        >>> config.unknown_prefix_severity
+        <ValidationSeverity.WARNING: 'WARNING'>
     """
 
     cache_dir: Path = Field(
@@ -384,6 +392,24 @@ class ReferenceValidationConfig(BaseModel):
             "e.g. {'geo': 'GEO', 'NCBIGeo': 'GEO'}"
         ),
     )
+    skip_prefixes: list[str] = Field(
+        default_factory=list,
+        description=(
+            "List of reference prefixes to skip during validation. "
+            "References with these prefixes will return is_valid=True with INFO severity. "
+            "Useful for unsupported or unfetchable reference types. "
+            "Case-insensitive. e.g. ['SRA', 'MGNIFY', 'BIOPROJECT']"
+        ),
+    )
+    unknown_prefix_severity: ValidationSeverity = Field(
+        default=ValidationSeverity.ERROR,
+        description=(
+            "Severity level for references that cannot be fetched "
+            "(e.g., unsupported prefix or network error). "
+            "Options: ERROR (default), WARNING, INFO. "
+            "Does not apply to prefixes in skip_prefixes list."
+        ),
+    )
 
     def get_cache_dir(self) -> Path:
         """Create and return the cache directory.

diff --git a/src/linkml_reference_validator/validation/supporting_text_validator.py b/src/linkml_reference_validator/validation/supporting_text_validator.py
@@ -145,14 +145,28 @@ def validate(
             >>> # With title validation:
             >>> # result = validator.validate("quote", "PMID:12345678", expected_title="Study Title")
         """
+        # Check if this prefix should be skipped
+        prefix = reference_id.split(":")[0].upper() if ":" in reference_id else ""
+        skip_prefixes_upper = [p.upper() for p in self.config.skip_prefixes]
+
+        if prefix and prefix in skip_prefixes_upper:
+            return ValidationResult(
+                is_valid=True,
+                reference_id=reference_id,
+                supporting_text=supporting_text,
+                severity=ValidationSeverity.INFO,
+                message=f"Skipping validation for reference with prefix '{prefix}': {reference_id}",
+                path=path,
+            )
+
         reference = self.fetcher.fetch(reference_id)
 
         if not reference:
             return ValidationResult(
                 is_valid=False,
                 reference_id=reference_id,
                 supporting_text=supporting_text,
-                severity=ValidationSeverity.ERROR,
+                severity=self.config.unknown_prefix_severity,
                 message=f"Could not fetch reference: {reference_id}",
                 path=path,
             )