-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(tpu): add tpu queued resources network (#9605)
* Added tpu_queued_resources_network sample * Changed CODEOWNERS * Update CreateQueuedResourceWithNetwork.java --------- Co-authored-by: Eric Schmidt <[email protected]>
- Loading branch information
1 parent
19dcf09
commit 002b4ae
Showing
7 changed files
with
539 additions
and
0 deletions.
There are no files selected for viewing
Validating CODEOWNERS rules …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
Copyright 2024 Google LLC | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
--> | ||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xmlns="http://maven.apache.org/POM/4.0.0" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>com.example.tpu</groupId> | ||
<artifactId>gce-diregapic-samples</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
|
||
<!-- | ||
The parent pom defines common style checks and testing strategies for our samples. | ||
Removing or replacing it should not affect the execution of the samples in anyway. | ||
--> | ||
<parent> | ||
<artifactId>shared-configuration</artifactId> | ||
<groupId>com.google.cloud.samples</groupId> | ||
<version>1.2.0</version> | ||
</parent> | ||
|
||
<properties> | ||
<maven.compiler.source>11</maven.compiler.source> | ||
<maven.compiler.target>11</maven.compiler.target> | ||
</properties> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.google.cloud</groupId> | ||
<artifactId>google-cloud-tpu</artifactId> | ||
<version>2.52.0</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>com.google.api</groupId> | ||
<artifactId>gax</artifactId> | ||
</dependency> | ||
|
||
<!-- Test dependencies --> | ||
<dependency> | ||
<artifactId>google-cloud-storage</artifactId> | ||
<groupId>com.google.cloud</groupId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<artifactId>truth</artifactId> | ||
<groupId>com.google.truth</groupId> | ||
<scope>test</scope> | ||
<version>1.4.0</version> | ||
</dependency> | ||
<dependency> | ||
<artifactId>junit</artifactId> | ||
<groupId>junit</groupId> | ||
<scope>test</scope> | ||
<version>4.13.2</version> | ||
</dependency> | ||
|
||
<!-- | ||
JUnit Jupiter dependencies to run BeforeEach and AfterEach methods | ||
(in tandem with mvn surefire) before every test. | ||
Without these, mvn surefire skips these methods and leads to concurrency | ||
issues. | ||
--> | ||
<dependency> | ||
<groupId>org.junit.jupiter</groupId> | ||
<artifactId>junit-jupiter-engine</artifactId> | ||
<version>5.10.2</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.mockito</groupId> | ||
<artifactId>mockito-core</artifactId> | ||
<version>5.13.0</version> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
<dependencyManagement> | ||
<dependencies> | ||
<dependency> | ||
<artifactId>libraries-bom</artifactId> | ||
<groupId>com.google.cloud</groupId> | ||
<scope>import</scope> | ||
<type>pom</type> | ||
<version>26.40.0</version> | ||
</dependency> | ||
</dependencies> | ||
</dependencyManagement> | ||
|
||
</project> |
139 changes: 139 additions & 0 deletions
139
tpu/src/main/java/tpu/CreateQueuedResourceWithNetwork.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
/* | ||
* Copyright 2024 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package tpu; | ||
|
||
//[START tpu_queued_resources_network] | ||
import com.google.api.gax.retrying.RetrySettings; | ||
import com.google.cloud.tpu.v2alpha1.CreateQueuedResourceRequest; | ||
import com.google.cloud.tpu.v2alpha1.NetworkConfig; | ||
import com.google.cloud.tpu.v2alpha1.Node; | ||
import com.google.cloud.tpu.v2alpha1.QueuedResource; | ||
import com.google.cloud.tpu.v2alpha1.TpuClient; | ||
import com.google.cloud.tpu.v2alpha1.TpuSettings; | ||
import java.io.IOException; | ||
import java.util.concurrent.ExecutionException; | ||
import org.threeten.bp.Duration; | ||
|
||
public class CreateQueuedResourceWithNetwork { | ||
public static void main(String[] args) | ||
throws IOException, ExecutionException, InterruptedException { | ||
// TODO(developer): Replace these variables before running the sample. | ||
// Project ID or project number of the Google Cloud project you want to create a node. | ||
String projectId = "YOUR_PROJECT_ID"; | ||
// The zone in which to create the TPU. | ||
// For more information about supported TPU types for specific zones, | ||
// see https://cloud.google.com/tpu/docs/regions-zones | ||
String zone = "europe-west4-a"; | ||
// The name for your TPU. | ||
String nodeName = "YOUR_TPU_NAME"; | ||
// The accelerator type that specifies the version and size of the Cloud TPU you want to create. | ||
// For more information about supported accelerator types for each TPU version, | ||
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions. | ||
String tpuType = "v2-8"; | ||
// Software version that specifies the version of the TPU runtime to install. | ||
// For more information see https://cloud.google.com/tpu/docs/runtimes | ||
String tpuSoftwareVersion = "tpu-vm-tf-2.14.1"; | ||
// The name for your Queued Resource. | ||
String queuedResourceId = "QUEUED_RESOURCE_ID"; | ||
// The name of the network you want the node to connect to. | ||
// The network should be assigned to your project. | ||
String networkName = "YOUR_COMPUTE_TPU_NETWORK"; | ||
|
||
createQueuedResourceWithNetwork(projectId, zone, queuedResourceId, nodeName, | ||
tpuType, tpuSoftwareVersion, networkName); | ||
} | ||
|
||
// Creates a Queued Resource with network configuration. | ||
public static QueuedResource createQueuedResourceWithNetwork( | ||
String projectId, String zone, String queuedResourceId, String nodeName, | ||
String tpuType, String tpuSoftwareVersion, String networkName) | ||
throws IOException, ExecutionException, InterruptedException { | ||
// With these settings the client library handles the Operation's polling mechanism | ||
// and prevent CancellationException error | ||
TpuSettings.Builder clientSettings = | ||
TpuSettings.newBuilder(); | ||
clientSettings | ||
.createQueuedResourceSettings() | ||
.setRetrySettings( | ||
RetrySettings.newBuilder() | ||
.setInitialRetryDelay(Duration.ofMillis(5000L)) | ||
.setRetryDelayMultiplier(2.0) | ||
.setInitialRpcTimeout(Duration.ZERO) | ||
.setRpcTimeoutMultiplier(1.0) | ||
.setMaxRetryDelay(Duration.ofMillis(45000L)) | ||
.setTotalTimeout(Duration.ofHours(24L)) | ||
.build()); | ||
// Initialize client that will be used to send requests. This client only needs to be created | ||
// once, and can be reused for multiple requests. | ||
try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) { | ||
String parent = String.format("projects/%s/locations/%s", projectId, zone); | ||
String region = zone.substring(0, zone.length() - 2); | ||
|
||
// Specify the network and subnetwork that you want to connect your TPU to. | ||
NetworkConfig networkConfig = | ||
NetworkConfig.newBuilder() | ||
.setEnableExternalIps(true) | ||
.setNetwork(String.format("projects/%s/global/networks/%s", projectId, networkName)) | ||
.setSubnetwork( | ||
String.format( | ||
"projects/%s/regions/%s/subnetworks/%s", projectId, region, networkName)) | ||
.build(); | ||
|
||
// Create a node | ||
Node node = | ||
Node.newBuilder() | ||
.setName(nodeName) | ||
.setAcceleratorType(tpuType) | ||
.setRuntimeVersion(tpuSoftwareVersion) | ||
.setNetworkConfig(networkConfig) | ||
.setQueuedResource( | ||
String.format( | ||
"projects/%s/locations/%s/queuedResources/%s", | ||
projectId, zone, queuedResourceId)) | ||
.build(); | ||
|
||
// Create queued resource | ||
QueuedResource queuedResource = | ||
QueuedResource.newBuilder() | ||
.setName(queuedResourceId) | ||
.setTpu( | ||
QueuedResource.Tpu.newBuilder() | ||
.addNodeSpec( | ||
QueuedResource.Tpu.NodeSpec.newBuilder() | ||
.setParent(parent) | ||
.setNode(node) | ||
.setNodeId(nodeName) | ||
.build()) | ||
.build()) | ||
.build(); | ||
|
||
CreateQueuedResourceRequest request = | ||
CreateQueuedResourceRequest.newBuilder() | ||
.setParent(parent) | ||
.setQueuedResource(queuedResource) | ||
.setQueuedResourceId(queuedResourceId) | ||
.build(); | ||
|
||
QueuedResource response = tpuClient.createQueuedResourceAsync(request).get(); | ||
// You can wait until TPU Node is READY, | ||
// and check its status using getTpuVm() from "tpu_vm_get" sample. | ||
System.out.println("Queued Resource created: " + queuedResourceId); | ||
return response; | ||
} | ||
} | ||
} | ||
//[END tpu_queued_resources_network] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Copyright 2024 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package tpu; | ||
|
||
//[START tpu_queued_resources_delete_force] | ||
|
||
import com.google.api.gax.retrying.RetrySettings; | ||
import com.google.api.gax.rpc.UnknownException; | ||
import com.google.cloud.tpu.v2alpha1.DeleteQueuedResourceRequest; | ||
import com.google.cloud.tpu.v2alpha1.TpuClient; | ||
import com.google.cloud.tpu.v2alpha1.TpuSettings; | ||
import java.io.IOException; | ||
import java.util.concurrent.ExecutionException; | ||
import org.threeten.bp.Duration; | ||
|
||
public class DeleteForceQueuedResource { | ||
public static void main(String[] args) { | ||
// TODO(developer): Replace these variables before running the sample. | ||
// Project ID or project number of the Google Cloud project. | ||
String projectId = "YOUR_PROJECT_ID"; | ||
// The zone in which the TPU was created. | ||
String zone = "europe-west4-a"; | ||
// The name for your Queued Resource. | ||
String queuedResourceId = "QUEUED_RESOURCE_ID"; | ||
|
||
deleteForceQueuedResource(projectId, zone, queuedResourceId); | ||
} | ||
|
||
// Deletes a Queued Resource asynchronously with --force flag. | ||
public static void deleteForceQueuedResource( | ||
String projectId, String zone, String queuedResourceId) { | ||
String name = String.format("projects/%s/locations/%s/queuedResources/%s", | ||
projectId, zone, queuedResourceId); | ||
// With these settings the client library handles the Operation's polling mechanism | ||
// and prevent CancellationException error | ||
TpuSettings.Builder clientSettings = | ||
TpuSettings.newBuilder(); | ||
clientSettings | ||
.deleteQueuedResourceSettings() | ||
.setRetrySettings( | ||
RetrySettings.newBuilder() | ||
.setInitialRetryDelay(Duration.ofMillis(5000L)) | ||
.setRetryDelayMultiplier(2.0) | ||
.setInitialRpcTimeout(Duration.ZERO) | ||
.setRpcTimeoutMultiplier(1.0) | ||
.setMaxRetryDelay(Duration.ofMillis(45000L)) | ||
.setTotalTimeout(Duration.ofHours(24L)) | ||
.build()); | ||
|
||
// Initialize client that will be used to send requests. This client only needs to be created | ||
// once, and can be reused for multiple requests. | ||
try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) { | ||
DeleteQueuedResourceRequest request = | ||
DeleteQueuedResourceRequest.newBuilder().setName(name).setForce(true).build(); | ||
|
||
tpuClient.deleteQueuedResourceAsync(request).get(); | ||
|
||
} catch (UnknownException | InterruptedException | ExecutionException | IOException e) { | ||
System.out.println(e.getMessage()); | ||
} | ||
System.out.printf("Deleted Queued Resource: %s\n", name); | ||
} | ||
} | ||
//[END tpu_queued_resources_delete_force] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Copyright 2024 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package tpu; | ||
|
||
//[START tpu_queued_resources_get] | ||
|
||
import com.google.cloud.tpu.v2alpha1.GetQueuedResourceRequest; | ||
import com.google.cloud.tpu.v2alpha1.QueuedResource; | ||
import com.google.cloud.tpu.v2alpha1.TpuClient; | ||
import java.io.IOException; | ||
|
||
public class GetQueuedResource { | ||
public static void main(String[] args) throws IOException { | ||
// TODO(developer): Replace these variables before running the sample. | ||
// Project ID or project number of the Google Cloud project. | ||
String projectId = "YOUR_PROJECT_ID"; | ||
// The zone in which the TPU was created. | ||
String zone = "europe-west4-a"; | ||
// The name for your Queued Resource. | ||
String queuedResourceId = "QUEUED_RESOURCE_ID"; | ||
|
||
getQueuedResource(projectId, zone, queuedResourceId); | ||
} | ||
|
||
// Get a Queued Resource. | ||
public static QueuedResource getQueuedResource( | ||
String projectId, String zone, String queuedResourceId) throws IOException { | ||
String name = String.format("projects/%s/locations/%s/queuedResources/%s", | ||
projectId, zone, queuedResourceId); | ||
// Initialize client that will be used to send requests. This client only needs to be created | ||
// once, and can be reused for multiple requests. | ||
try (TpuClient tpuClient = TpuClient.create()) { | ||
GetQueuedResourceRequest request = | ||
GetQueuedResourceRequest.newBuilder().setName(name).build(); | ||
|
||
return tpuClient.getQueuedResource(request); | ||
} | ||
} | ||
} | ||
//[END tpu_queued_resources_get] |
Oops, something went wrong.