Skip to content

Corrected: Improved AQUA Error Messages for Authorization and Tag-Related Uses #1141

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 3, 2025
25 changes: 25 additions & 0 deletions ads/aqua/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
HF_METADATA_FOLDER = ".cache/"
HF_LOGIN_DEFAULT_TIMEOUT = 2
MODEL_NAME_DELIMITER = ";"
AQUA_TROUBLESHOOTING_LINK = "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/troubleshooting-tips.md"

TRAINING_METRICS_FINAL = "training_metrics_final"
VALIDATION_METRICS_FINAL = "validation_metrics_final"
Expand Down Expand Up @@ -85,3 +86,27 @@
"--host",
}
TEI_CONTAINER_DEFAULT_HOST = "8080"

OCI_OPERATION_FAILURES = {
"list_model_deployments": "Unable to list model deployments. See tips for troubleshooting: ",
"list_models": "Unable to list models. See tips for troubleshooting: ",
"get_namespace": "Unable to access specified Object Storage Bucket. See tips for troubleshooting: ",
"list_log_groups":"Unable to access logs. See tips for troubleshooting: " ,
"list_buckets": "Unable to list Object Storage Bucket. See tips for troubleshooting: ",
"put_object": "Unable to access or find Object Storage Bucket. See tips for troubleshooting: ",
"list_model_version_sets": "Unable to create or fetch model version set. See tips for troubleshooting:",
"update_model": "Unable to update model. See tips for troubleshooting: ",
"list_data_science_private_endpoints": "Unable to access private endpoint. See tips for troubleshooting: ",
"create_model" : "Unable to register model. See tips for troubleshooting: ",
"create_deployment": "Unable to create deployment. See tips for troubleshooting: ",
"create_model_version_sets" : "Unable to create model version set. See tips for troubleshooting: ",
"create_job": "Unable to create job. See tips for troubleshooting: ",
"create_job_run": "Unable to create job run. See tips for troubleshooting: ",
}

STATUS_CODE_MESSAGES = {
"400": "Could not process your request due to invalid input.",
"403": "We're having trouble processing your request with the information provided.",
"404": "Authorization Failed: The resource you're looking for isn't accessible.",
"408": "Server is taking too long to respond, please try again.",
}
42 changes: 6 additions & 36 deletions ads/aqua/extension/aqua_ws_msg_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,18 @@
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import traceback
import uuid
from abc import abstractmethod
from http.client import responses
from typing import List

from tornado.web import HTTPError

from ads.aqua import logger
from ads.aqua.common.decorator import handle_exceptions
from ads.aqua.extension.base_handler import AquaAPIhandler
from ads.aqua.extension.models.ws_models import (
AquaWsError,
BaseRequest,
BaseResponse,
ErrorResponse,
RequestResponseType,
)
from ads.aqua.extension.utils import construct_error
from ads.config import AQUA_TELEMETRY_BUCKET, AQUA_TELEMETRY_BUCKET_NS
from ads.telemetry.client import TelemetryClient

Expand Down Expand Up @@ -55,48 +49,24 @@ def process(self) -> BaseResponse:

def write_error(self, status_code, **kwargs):
"""AquaWSMSGhandler errors are JSON, not human pages."""
reason = kwargs.get("reason")

service_payload = kwargs.get("service_payload", {})
default_msg = responses.get(status_code, "Unknown HTTP Error")
message = AquaAPIhandler.get_default_error_messages(
service_payload, str(status_code), kwargs.get("message", default_msg)
)
reply = {
"status": status_code,
"message": message,
"service_payload": service_payload,
"reason": reason,
"request_id": str(uuid.uuid4()),
}
exc_info = kwargs.get("exc_info")
if exc_info:
logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {''.join(traceback.format_exception(*exc_info))}"
)
e = exc_info[1]
if isinstance(e, HTTPError):
reply["message"] = e.log_message or message
reply["reason"] = e.reason
reply_details = construct_error(status_code, **kwargs)

logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {reply['message']} {reply['reason']}"
)
# telemetry may not be present if there is an error while initializing
if hasattr(self, "telemetry"):
aqua_api_details = kwargs.get("aqua_api_details", {})
self.telemetry.record_event_async(
category="aqua/error",
action=str(status_code),
value=reason,
value=reply_details.reason,
**aqua_api_details,
)
response = AquaWsError(
status=status_code,
message=message,
message=reply_details.message,
service_payload=service_payload,
reason=reason,
reason=reply_details.reason,
)
base_message = BaseRequest.from_json(self.message, ignore_unknown=True)
return ErrorResponse(
Expand Down
78 changes: 8 additions & 70 deletions ads/aqua/extension/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,16 @@
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/


import json
import traceback
import uuid
from dataclasses import asdict, is_dataclass
from http.client import responses
from typing import Any

from notebook.base.handlers import APIHandler
from tornado import httputil
from tornado.web import Application, HTTPError
from tornado.web import Application

from ads.aqua import logger
from ads.aqua.common.utils import is_pydantic_model
from ads.aqua.extension.utils import construct_error
from ads.config import AQUA_TELEMETRY_BUCKET, AQUA_TELEMETRY_BUCKET_NS
from ads.telemetry.client import TelemetryClient

Expand Down Expand Up @@ -75,78 +71,20 @@ def finish(self, payload=None): # pylint: disable=W0221

def write_error(self, status_code, **kwargs):
"""AquaAPIhandler errors are JSON, not human pages."""
self.set_header("Content-Type", "application/json")
reason = kwargs.get("reason")
self.set_status(status_code, reason=reason)
service_payload = kwargs.get("service_payload", {})
default_msg = responses.get(status_code, "Unknown HTTP Error")
message = self.get_default_error_messages(
service_payload, str(status_code), kwargs.get("message", default_msg)
)

reply = {
"status": status_code,
"message": message,
"service_payload": service_payload,
"reason": reason,
"request_id": str(uuid.uuid4()),
}
exc_info = kwargs.get("exc_info")
if exc_info:
logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {''.join(traceback.format_exception(*exc_info))}"
)
e = exc_info[1]
if isinstance(e, HTTPError):
reply["message"] = e.log_message or message
reply["reason"] = e.reason if e.reason else reply["reason"]

logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {reply['message']} {reply['reason']}"
)
reply_details = construct_error(status_code, **kwargs)

self.set_header("Content-Type", "application/json")
self.set_status(status_code, reason=reply_details.reason)

# telemetry may not be present if there is an error while initializing
if hasattr(self, "telemetry"):
aqua_api_details = kwargs.get("aqua_api_details", {})
self.telemetry.record_event_async(
category="aqua/error",
action=str(status_code),
value=reason,
value=reply_details.reason,
**aqua_api_details,
)

self.finish(json.dumps(reply))

@staticmethod
def get_default_error_messages(
service_payload: dict,
status_code: str,
default_msg: str = "Unknown HTTP Error.",
):
"""Method that maps the error messages based on the operation performed or the status codes encountered."""

messages = {
"400": "Something went wrong with your request.",
"403": "We're having trouble processing your request with the information provided.",
"404": "Authorization Failed: The resource you're looking for isn't accessible.",
"408": "Server is taking too long to response, please try again.",
"create": "Authorization Failed: Could not create resource.",
"get": "Authorization Failed: The resource you're looking for isn't accessible.",
}

if service_payload and "operation_name" in service_payload:
operation_name = service_payload["operation_name"]
if operation_name:
if operation_name.startswith("create"):
return messages["create"] + f" Operation Name: {operation_name}."
elif operation_name.startswith("list") or operation_name.startswith(
"get"
):
return messages["get"] + f" Operation Name: {operation_name}."

if status_code in messages:
return messages[status_code]
else:
return default_msg
self.finish(reply_details)
19 changes: 19 additions & 0 deletions ads/aqua/extension/errors.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
#!/usr/bin/env python
# Copyright (c) 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import uuid
from typing import Any, Dict, List, Optional

from pydantic import Field

from ads.aqua.config.utils.serializer import Serializable

from ads.aqua.constants import (
AQUA_TROUBLESHOOTING_LINK
)

class Errors(str):
INVALID_INPUT_DATA_FORMAT = "Invalid format of input data."
NO_INPUT_DATA = "No input data provided."
MISSING_REQUIRED_PARAMETER = "Missing required parameter: '{}'"
MISSING_ONEOF_REQUIRED_PARAMETER = "Either '{}' or '{}' is required."
INVALID_VALUE_OF_PARAMETER = "Invalid value of parameter: '{}'"

class ReplyDetails(Serializable):
"""Structured reply to be returned to the client."""
status: int
troubleshooting_tips: str = Field(f"For general tips on troubleshooting: {AQUA_TROUBLESHOOTING_LINK}",
description="GitHub Link for troubleshooting documentation")
message: str = Field("Unknown HTTP Error.", description="GitHub Link for troubleshooting documentation")
service_payload: Optional[Dict[str, Any]] = Field(default_factory=dict)
reason: str = Field("Unknown error", description="Reason for Error")
request_id: str = Field(str(uuid.uuid4()), description="Unique ID for tracking the error.")
116 changes: 114 additions & 2 deletions ads/aqua/extension/utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
#!/usr/bin/env python
# Copyright (c) 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import re
import traceback
import uuid
from dataclasses import fields
from datetime import datetime, timedelta
from http.client import responses
from typing import Dict, Optional

from cachetools import TTLCache, cached
from tornado.web import HTTPError

from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID
from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
from ads.aqua.common.utils import fetch_service_compartment
from ads.aqua.extension.errors import Errors
from ads.aqua.constants import (
AQUA_TROUBLESHOOTING_LINK,
OCI_OPERATION_FAILURES,
STATUS_CODE_MESSAGES,
)
from ads.aqua.extension.errors import Errors, ReplyDetails


def validate_function_parameters(data_class, input_data: Dict):
Expand All @@ -32,3 +42,105 @@ def ui_compatability_check():
fetched from the configuration. The cached result is returned when multiple calls are made in quick succession
from the UI to avoid multiple config file loads."""
return ODSC_MODEL_COMPARTMENT_OCID or fetch_service_compartment()


def get_default_error_messages(
service_payload: dict,
status_code: str,
default_msg: str = "Unknown HTTP Error.",
)-> str:
"""Method that maps the error messages based on the operation performed or the status codes encountered."""

if service_payload and "operation_name" in service_payload:
operation_name = service_payload.get("operation_name")

if operation_name and status_code in STATUS_CODE_MESSAGES:
return f"{STATUS_CODE_MESSAGES[status_code]}\n{service_payload.get('message')}\nOperation Name: {operation_name}."

return STATUS_CODE_MESSAGES.get(status_code, default_msg)


def get_documentation_link(key: str) -> str:
"""Generates appropriate GitHub link to AQUA Troubleshooting Documentation per the user's error."""
github_header = re.sub(r"_", "-", key)
return f"{AQUA_TROUBLESHOOTING_LINK}#{github_header}"


def get_troubleshooting_tips(service_payload: dict,
status_code: str) -> str:
"""Maps authorization errors to potential solutions on Troubleshooting Page per Aqua Documentation on oci-data-science-ai-samples"""

tip = f"For general tips on troubleshooting: {AQUA_TROUBLESHOOTING_LINK}"

if status_code in (404, 400):
failed_operation = service_payload.get('operation_name')

if failed_operation in OCI_OPERATION_FAILURES:
link = get_documentation_link(failed_operation)
tip = OCI_OPERATION_FAILURES[failed_operation] + link

return tip


def construct_error(status_code: int, **kwargs) -> ReplyDetails:
"""
Formats an error response based on the provided status code and optional details.

Args:
status_code (int): The HTTP status code of the error.
**kwargs: Additional optional parameters:
- reason (str, optional): A brief reason for the error.
- service_payload (dict, optional): Contextual error data from OCI SDK methods
- message (str, optional): A custom error message, from error raised from failed AQUA methods calling OCI SDK methods
- exc_info (tuple, optional): Exception information (e.g., from `sys.exc_info()`), used for logging.

Returns:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: The docstring I guess needs to be changed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed

ReplyDetails: A Pydantic object containing details about the formatted error response.
kwargs:
- "status" (int): The HTTP status code.
- "troubleshooting_tips" (str): a GitHub link to AQUA troubleshooting docs, may be linked to a specific header.
- "message" (str): error message.
- "service_payload" (Dict[str, Any], optional) : Additional context from OCI Python SDK call.
- "reason" (str): The reason for the error.
- "request_id" (str): A unique identifier for tracking the error.

Logs:
- Logs the error details with a unique request ID.
- If `exc_info` is provided and contains an `HTTPError`, updates the response message and reason accordingly.

"""
reason = kwargs.get("reason", "Unknown Error")
service_payload = kwargs.get("service_payload", {})
default_msg = responses.get(status_code, "Unknown HTTP Error")
message = get_default_error_messages(
service_payload, str(status_code), kwargs.get("message", default_msg)
)

tips = get_troubleshooting_tips(service_payload, status_code)


reply = ReplyDetails(
status = status_code,
troubleshooting_tips = tips,
message = message,
service_payload = service_payload,
reason = reason,
request_id = str(uuid.uuid4()),
)

exc_info = kwargs.get("exc_info")
if exc_info:
logger.error(
f"Error Request ID: {reply.request_id}\n"
f"Error: {''.join(traceback.format_exception(*exc_info))}"
)
e = exc_info[1]
if isinstance(e, HTTPError):
reply.message = e.log_message or message
reply.reason = e.reason if e.reason else reply.reason

logger.error(
f"Error Request ID: {reply.request_id}\n"
f"Error: {reply.message} {reply.reason}"
)
return reply
Loading
Loading