Skip to content

Commit

Permalink
Created TPU Topology Sample + updated tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Thoughtseize1 committed Oct 22, 2024
1 parent 64cf1d5 commit 29c7436
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 1 deletion.
75 changes: 75 additions & 0 deletions tpu/create_tpu_topology.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from google.cloud.tpu_v2 import Node


def create_cloud_tpu_with_topology(
project_id: str,
zone: str,
tpu_name: str,
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
) -> Node:
"""Creates a Cloud TPU node with a specific topology.
Args:
project_id (str): The ID of the Google Cloud project.
zone (str): The zone where the TPU node will be created.
tpu_name (str): The name of the TPU node.
runtime_version (str, optional): The runtime version for the TPU.
Returns:
Node: The created TPU node.
"""
# [START tpu_vm_create_topology]
from google.cloud import tpu_v2

# TODO(developer): Update and un-comment below lines
# project_id = "your-project-id"
# zone = "us-central1-b"
# tpu_name = "tpu-name"
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"

node = tpu_v2.Node()
# Here we are creating a TPU v3-8 with 2x2 topology.
node.accelerator_config = tpu_v2.AcceleratorConfig(
type_=tpu_v2.AcceleratorConfig.Type.V3,
topology="2x2",
)
node.runtime_version = runtime_version

request = tpu_v2.CreateNodeRequest(
parent=f"projects/{project_id}/locations/{zone}",
node_id=tpu_name,
node=node,
)

client = tpu_v2.TpuClient()
operation = client.create_node(request=request)
print("Waiting for operation to complete...")

response = operation.result()
print(response.accelerator_config)
# Example response:
# type_: V3
# topology: "2x2"

# [END tpu_vm_create_topology]
return response


if __name__ == "__main__":
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-central1-a"
create_cloud_tpu_with_topology(PROJECT_ID, ZONE, "tpu-name")
17 changes: 16 additions & 1 deletion tpu/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@
import os
import uuid

from google.cloud.tpu_v2.types import Node
from google.cloud.tpu_v2.types import AcceleratorConfig, Node

import pytest

import create_tpu
import create_tpu_topology
import create_tpu_with_script
import delete_tpu
import get_tpu
import list_tpu
import start_tpu
import stop_tpu


TPU_NAME = "test-tpu-" + uuid.uuid4().hex[:10]
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
ZONE = "us-south1-a"
Expand Down Expand Up @@ -78,3 +80,16 @@ def test_stop_tpu() -> None:
def test_start_tpu() -> None:
node = start_tpu.start_cloud_tpu(PROJECT_ID, ZONE, TPU_NAME)
assert node.state == Node.State.READY


def test_with_topology() -> None:
topology_tpu_name = "topology-tpu-" + uuid.uuid4().hex[:5]
topology_zone = "us-central1-a"
try:
topology_tpu = create_tpu_topology.create_cloud_tpu_with_topology(
PROJECT_ID, topology_zone, topology_tpu_name, TPU_VERSION
)
assert topology_tpu.accelerator_config.type_ == AcceleratorConfig.Type.V3
assert topology_tpu.accelerator_config.topology == "2x2"
finally:
delete_tpu.delete_cloud_tpu(PROJECT_ID, topology_zone, topology_tpu_name)

0 comments on commit 29c7436

Please sign in to comment.