Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ ENHANCEMENTS:
* Add support for setting resource processor VMSS SKU via environment variables ([#4936](https://github.com/microsoft/AzureTRE/issues/4936))
* Exclude recovery service vaults from e2e tests ([#4920](https://github.com/microsoft/AzureTRE/issues/4920))

BUG FIXES:
* `address_spaces` will now be removed from a workspace when a workspace service that uses an `address_space` is deleted to prevent IP address range exhaustion ([#4727](https://github.com/microsoft/AzureTRE/issues/4727))

## (0.28.0) (March 2, 2026)
**BREAKING CHANGES**
* Sonatype Nexus shared service now requires explicit EULA acceptance (`accept_nexus_eula: true`) when deploying. This ensures compliance with Sonatype Nexus Community Edition licensing. ([#4842](https://github.com/microsoft/AzureTRE/issues/4842))
Expand Down
2 changes: 1 addition & 1 deletion api_app/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.25.16"
__version__ = "0.25.17"
39 changes: 38 additions & 1 deletion api_app/service_bus/deployment_status_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pydantic import ValidationError, parse_obj_as

from api.routes.resource_helpers import get_timestamp
from models.domain.resource import Output
from models.domain.resource import Output, ResourceType
from db.repositories.resources_history import ResourceHistoryRepository
from models.domain.request_action import RequestAction
from db.repositories.resource_templates import ResourceTemplateRepository
Expand All @@ -21,6 +21,9 @@
from models.domain.operation import DeploymentStatusUpdateMessage, Operation, OperationStep, Status
from resources import strings
from services.logging import logger, tracer
from db.repositories.workspaces import WorkspaceRepository
from models.schemas.resource import ResourcePatch
from azure.cosmos.exceptions import CosmosAccessConditionFailedError


class DeploymentStatusUpdater():
Expand Down Expand Up @@ -187,6 +190,40 @@ async def update_status_in_database(self, message: DeploymentStatusUpdateMessage
next_step.status = Status.UpdatingFailed
await self.update_overall_operation_status(operation, next_step, is_last_step)
await self.operations_repo.update_item(operation)
# If the 'main' step succeeded for an uninstall operation, free any allocated address space
# owned by a WorkspaceService resource. We trigger cleanup when the step with templateStepId == 'main'
# is successful; this ensures the primary resource has been destroyed successfully before attempting to free the ip address space
try:
# if the step that just succeeded is the main step for this operation, and this is an uninstall,
# proceed with post-uninstall cleanup. No need to scan the operation.steps list again.
if step_to_update.templateStepId == "main" and step_to_update.is_success() and operation.action == RequestAction.UnInstall:
if resource_to_persist.get("resourceType") == ResourceType.WorkspaceService:
address_to_free = resource_to_persist.get("properties", {}).get("address_space")
parent_workspace_id = resource_to_persist.get("workspaceId")
if address_to_free and parent_workspace_id:
try:
workspace_repo = await WorkspaceRepository.create()
max_retries = 3
for attempt in range(max_retries):
workspace = await workspace_repo.get_workspace_by_id(parent_workspace_id)
workspace_address_spaces = workspace.properties.get("address_spaces", [])
if address_to_free not in workspace_address_spaces:
break
new_address_spaces = [a for a in workspace_address_spaces if a != address_to_free]
workspace_patch = ResourcePatch()
workspace_patch.properties = {"address_spaces": new_address_spaces}
try:
await workspace_repo.patch_workspace(workspace, workspace_patch, workspace.etag, self.resource_template_repo, self.resource_history_repo, operation.user, False)
logger.info(f"Freed address space {address_to_free} from workspace {parent_workspace_id} after successful uninstall of {resource_id}")
Comment thread
JC-wk marked this conversation as resolved.
break
except CosmosAccessConditionFailedError:
if attempt == max_retries - 1:
raise
logger.warning(f"ETag conflict when freeing workspace address space after successful uninstall. Retrying (attempt {attempt + 1}/{max_retries})...")
except Exception:
logger.exception("Failed to free workspace address space after successful uninstall")
except Exception:
logger.exception("Unexpected error during post-uninstall address space cleanup")

result = True

Expand Down
282 changes: 282 additions & 0 deletions api_app/tests_ma/test_service_bus/test_deployment_status_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,285 @@ async def test_convert_outputs_to_dict():
'list2': ['one', 'two']
}
assert status_updater.convert_outputs_to_dict(deployment_status_update_message.outputs) == expected_result


@patch('service_bus.deployment_status_updater.WorkspaceRepository.create')
@patch('service_bus.deployment_status_updater.ResourceHistoryRepository.create')
@patch('service_bus.deployment_status_updater.ResourceTemplateRepository.create')
@patch("service_bus.deployment_status_updater.get_timestamp", return_value=FAKE_UPDATE_TIMESTAMP)
@patch('service_bus.deployment_status_updater.OperationRepository.create')
@patch('service_bus.deployment_status_updater.ResourceRepository.create')
async def test_workspace_service_uninstall_frees_address_space(
resource_repo,
operations_repo_mock,
_,
__,
___,
workspace_repo_mock
):
workspace_service_id = "59b5c8e7-5c42-4fcb-a7fd-294cfc27aa76"
parent_workspace_id = "1111c8e7-5c42-4fcb-a7fd-294cfc27aa76"
address_space = "10.1.0.0/22"

message_dict = {
"operationId": OPERATION_ID,
"stepId": "random-uuid",
"id": workspace_service_id,
"status": Status.Deleted,
"message": "uninstall succeeded",
"correlation_id": "test_correlation_id"
}
service_bus_received_message_mock = ServiceBusReceivedMessageMock(message_dict)

# Mock the operation showing RequestAction.UnInstall
operation = create_sample_operation(workspace_service_id, RequestAction.UnInstall)
operations_repo_mock.return_value.get_operation_by_id.return_value = operation

# Mock WorkspaceService resource returned by get_resource_by_id
workspace_service_mock = MagicMock()
workspace_service_mock.deploymentStatus = None
resource_repo.return_value.get_resource_by_id.return_value = workspace_service_mock

# Mock resource dict representation returned by get_resource_dict_by_id
workspace_service_dict = {
"id": workspace_service_id,
"resourceType": ResourceType.WorkspaceService,
"workspaceId": parent_workspace_id,
"properties": {
"address_space": address_space
}
}
resource_repo.return_value.get_resource_dict_by_id.return_value = workspace_service_dict

# Mock parent workspace containing the address space to free
parent_workspace = create_sample_workspace_object(parent_workspace_id)
parent_workspace.properties = {"address_spaces": ["10.0.0.0/22", address_space]}
parent_workspace.etag = "parent-workspace-etag"

workspace_repo = AsyncMock()
workspace_repo.get_workspace_by_id.return_value = parent_workspace
workspace_repo_mock.return_value = workspace_repo

status_updater = DeploymentStatusUpdater()
await status_updater.init_repos()
complete_message = await status_updater.process_message(service_bus_received_message_mock)

assert complete_message is True
workspace_repo.patch_workspace.assert_called_once()
called_args = workspace_repo.patch_workspace.call_args[0]
assert called_args[0] == parent_workspace
assert called_args[1].properties == {"address_spaces": ["10.0.0.0/22"]}
assert called_args[2] == "parent-workspace-etag"


@pytest.mark.parametrize("missing_property", ["address_space", "workspaceId"])
@patch('service_bus.deployment_status_updater.WorkspaceRepository.create')
@patch('service_bus.deployment_status_updater.ResourceHistoryRepository.create')
@patch('service_bus.deployment_status_updater.ResourceTemplateRepository.create')
@patch("service_bus.deployment_status_updater.get_timestamp", return_value=FAKE_UPDATE_TIMESTAMP)
@patch('service_bus.deployment_status_updater.OperationRepository.create')
@patch('service_bus.deployment_status_updater.ResourceRepository.create')
async def test_workspace_service_uninstall_does_not_free_address_space_if_missing(
resource_repo,
operations_repo_mock,
_,
__,
___,
workspace_repo_mock,
missing_property
):
workspace_service_id = "59b5c8e7-5c42-4fcb-a7fd-294cfc27aa76"
parent_workspace_id = "1111c8e7-5c42-4fcb-a7fd-294cfc27aa76"
address_space = "10.1.0.0/22"

message_dict = {
"operationId": OPERATION_ID,
"stepId": "random-uuid",
"id": workspace_service_id,
"status": Status.Deleted,
"message": "uninstall succeeded",
"correlation_id": "test_correlation_id"
}
service_bus_received_message_mock = ServiceBusReceivedMessageMock(message_dict)

operation = create_sample_operation(workspace_service_id, RequestAction.UnInstall)
operations_repo_mock.return_value.get_operation_by_id.return_value = operation

workspace_service_mock = MagicMock()
workspace_service_mock.deploymentStatus = None
resource_repo.return_value.get_resource_by_id.return_value = workspace_service_mock

workspace_service_dict = {
"id": workspace_service_id,
"resourceType": ResourceType.WorkspaceService,
"workspaceId": parent_workspace_id,
"properties": {
"address_space": address_space
}
}
if missing_property == "address_space":
del workspace_service_dict["properties"]["address_space"]
elif missing_property == "workspaceId":
del workspace_service_dict["workspaceId"]

resource_repo.return_value.get_resource_dict_by_id.return_value = workspace_service_dict

workspace_repo = AsyncMock()
workspace_repo_mock.return_value = workspace_repo

status_updater = DeploymentStatusUpdater()
await status_updater.init_repos()
complete_message = await status_updater.process_message(service_bus_received_message_mock)

assert complete_message is True
workspace_repo.patch_workspace.assert_not_called()


@patch('service_bus.deployment_status_updater.WorkspaceRepository.create')
@patch('service_bus.deployment_status_updater.ResourceHistoryRepository.create')
@patch('service_bus.deployment_status_updater.ResourceTemplateRepository.create')
@patch("service_bus.deployment_status_updater.get_timestamp", return_value=FAKE_UPDATE_TIMESTAMP)
@patch('service_bus.deployment_status_updater.OperationRepository.create')
@patch('service_bus.deployment_status_updater.ResourceRepository.create')
async def test_workspace_service_uninstall_frees_address_space_with_retry_on_etag_conflict(
resource_repo,
operations_repo_mock,
_,
__,
___,
workspace_repo_mock
):
from azure.cosmos.exceptions import CosmosAccessConditionFailedError

workspace_service_id = "59b5c8e7-5c42-4fcb-a7fd-294cfc27aa76"
parent_workspace_id = "1111c8e7-5c42-4fcb-a7fd-294cfc27aa76"
address_space = "10.1.0.0/22"

message_dict = {
"operationId": OPERATION_ID,
"stepId": "random-uuid",
"id": workspace_service_id,
"status": Status.Deleted,
"message": "uninstall succeeded",
"correlation_id": "test_correlation_id"
}
service_bus_received_message_mock = ServiceBusReceivedMessageMock(message_dict)

# Mock the operation showing RequestAction.UnInstall
operation = create_sample_operation(workspace_service_id, RequestAction.UnInstall)
operations_repo_mock.return_value.get_operation_by_id.return_value = operation

# Mock WorkspaceService resource returned by get_resource_by_id
workspace_service_mock = MagicMock()
workspace_service_mock.deploymentStatus = None
resource_repo.return_value.get_resource_by_id.return_value = workspace_service_mock

# Mock resource dict representation returned by get_resource_dict_by_id
workspace_service_dict = {
"id": workspace_service_id,
"resourceType": ResourceType.WorkspaceService,
"workspaceId": parent_workspace_id,
"properties": {
"address_space": address_space
}
}
resource_repo.return_value.get_resource_dict_by_id.return_value = workspace_service_dict

# Mock parent workspace containing the address space to free
parent_workspace = create_sample_workspace_object(parent_workspace_id)
parent_workspace.properties = {"address_spaces": ["10.0.0.0/22", address_space]}
parent_workspace.etag = "parent-workspace-etag"

workspace_repo = AsyncMock()
workspace_repo.get_workspace_by_id.return_value = parent_workspace

# First attempt raises CosmosAccessConditionFailedError, second succeeds
workspace_repo.patch_workspace.side_effect = [CosmosAccessConditionFailedError, None]
workspace_repo_mock.return_value = workspace_repo

status_updater = DeploymentStatusUpdater()
await status_updater.init_repos()
complete_message = await status_updater.process_message(service_bus_received_message_mock)

assert complete_message is True
# Assert get_workspace_by_id called twice due to retry
assert workspace_repo.get_workspace_by_id.call_count == 2
# Assert patch_workspace called twice
assert workspace_repo.patch_workspace.call_count == 2


@patch('service_bus.deployment_status_updater.WorkspaceRepository.create')
@patch('service_bus.deployment_status_updater.ResourceHistoryRepository.create')
@patch('service_bus.deployment_status_updater.ResourceTemplateRepository.create')
@patch("service_bus.deployment_status_updater.get_timestamp", return_value=FAKE_UPDATE_TIMESTAMP)
@patch('service_bus.deployment_status_updater.OperationRepository.create')
@patch('service_bus.deployment_status_updater.ResourceRepository.create')
@patch('services.logging.logger.exception')
async def test_workspace_service_uninstall_logs_error_after_max_retries(
logging_mock,
resource_repo,
operations_repo_mock,
_,
__,
___,
workspace_repo_mock
):
from azure.cosmos.exceptions import CosmosAccessConditionFailedError

workspace_service_id = "59b5c8e7-5c42-4fcb-a7fd-294cfc27aa76"
parent_workspace_id = "1111c8e7-5c42-4fcb-a7fd-294cfc27aa76"
address_space = "10.1.0.0/22"

message_dict = {
"operationId": OPERATION_ID,
"stepId": "random-uuid",
"id": workspace_service_id,
"status": Status.Deleted,
"message": "uninstall succeeded",
"correlation_id": "test_correlation_id"
}
service_bus_received_message_mock = ServiceBusReceivedMessageMock(message_dict)

# Mock the operation showing RequestAction.UnInstall
operation = create_sample_operation(workspace_service_id, RequestAction.UnInstall)
operations_repo_mock.return_value.get_operation_by_id.return_value = operation

# Mock WorkspaceService resource returned by get_resource_by_id
workspace_service_mock = MagicMock()
workspace_service_mock.deploymentStatus = None
resource_repo.return_value.get_resource_by_id.return_value = workspace_service_mock

# Mock resource dict representation returned by get_resource_dict_by_id
workspace_service_dict = {
"id": workspace_service_id,
"resourceType": ResourceType.WorkspaceService,
"workspaceId": parent_workspace_id,
"properties": {
"address_space": address_space
}
}
resource_repo.return_value.get_resource_dict_by_id.return_value = workspace_service_dict

# Mock parent workspace containing the address space to free
parent_workspace = create_sample_workspace_object(parent_workspace_id)
parent_workspace.properties = {"address_spaces": ["10.0.0.0/22", address_space]}
parent_workspace.etag = "parent-workspace-etag"

workspace_repo = AsyncMock()
workspace_repo.get_workspace_by_id.return_value = parent_workspace

# All attempts raise CosmosAccessConditionFailedError
workspace_repo.patch_workspace.side_effect = CosmosAccessConditionFailedError
workspace_repo_mock.return_value = workspace_repo

status_updater = DeploymentStatusUpdater()
await status_updater.init_repos()
complete_message = await status_updater.process_message(service_bus_received_message_mock)

# Note: complete_message is still True because we caught the exception in the outer block and logged it.
assert complete_message is True
# Assert get_workspace_by_id and patch_workspace called max_retries = 3 times
assert workspace_repo.get_workspace_by_id.call_count == 3
assert workspace_repo.patch_workspace.call_count == 3
# Assert we logged the final failure
logging_mock.assert_called_once_with("Failed to free workspace address space after successful uninstall")
2 changes: 1 addition & 1 deletion templates/workspace_services/azureml/porter.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
schemaVersion: 1.0.0
name: tre-service-azureml
version: 1.1.4
version: 1.1.5
description: "An Azure TRE service for Azure Machine Learning"
registry: azuretre
dockerfile: Dockerfile.tmpl
Expand Down
7 changes: 7 additions & 0 deletions templates/workspace_services/azureml/template_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,13 @@
},
{
"stepId": "main"
},
{
"stepId": "f720975a-c81e-477e-854e-53fde86e5e57",
"stepTitle": "Upgrade to ensure workspace is aware of address space removal",
"resourceType": "workspace",
"resourceAction": "upgrade",
"properties": []
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion templates/workspace_services/databricks/porter.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
schemaVersion: 1.0.0
name: tre-service-databricks
version: 1.0.16
version: 1.0.17
description: "An Azure TRE service for Azure Databricks."
registry: azuretre
dockerfile: Dockerfile.tmpl
Expand Down
Loading
Loading