Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ import (
_ "github.com/googleapis/genai-toolbox/internal/tools/postgres/postgreslisttables"
_ "github.com/googleapis/genai-toolbox/internal/tools/postgres/postgressql"
_ "github.com/googleapis/genai-toolbox/internal/tools/redis"
_ "github.com/googleapis/genai-toolbox/internal/tools/serverlessspark/serverlesssparklistbatches"
_ "github.com/googleapis/genai-toolbox/internal/tools/spanner/spannerexecutesql"
_ "github.com/googleapis/genai-toolbox/internal/tools/spanner/spannerlisttables"
_ "github.com/googleapis/genai-toolbox/internal/tools/spanner/spannersql"
Expand Down Expand Up @@ -189,6 +190,7 @@ import (
_ "github.com/googleapis/genai-toolbox/internal/sources/oracle"
_ "github.com/googleapis/genai-toolbox/internal/sources/postgres"
_ "github.com/googleapis/genai-toolbox/internal/sources/redis"
_ "github.com/googleapis/genai-toolbox/internal/sources/serverlessspark"
_ "github.com/googleapis/genai-toolbox/internal/sources/spanner"
_ "github.com/googleapis/genai-toolbox/internal/sources/sqlite"
_ "github.com/googleapis/genai-toolbox/internal/sources/tidb"
Expand Down
14 changes: 14 additions & 0 deletions cmd/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,7 @@ func TestPrebuiltTools(t *testing.T) {
cloudsqlpgobsvconfig, _ := prebuiltconfigs.Get("cloud-sql-postgres-observability")
cloudsqlmysqlobsvconfig, _ := prebuiltconfigs.Get("cloud-sql-mysql-observability")
cloudsqlmssqlobsvconfig, _ := prebuiltconfigs.Get("cloud-sql-mssql-observability")
serverless_spark_config, _ := prebuiltconfigs.Get("serverless-spark")

// Set environment variables
t.Setenv("API_KEY", "your_api_key")
Expand Down Expand Up @@ -1305,6 +1306,9 @@ func TestPrebuiltTools(t *testing.T) {
t.Setenv("CLOUD_SQL_MSSQL_PASSWORD", "your_cloudsql_mssql_password")
t.Setenv("CLOUD_SQL_POSTGRES_PASSWORD", "your_cloudsql_pg_password")

t.Setenv("SERVERLESS_SPARK_PROJECT", "your_gcp_project_id")
t.Setenv("SERVERLESS_SPARK_LOCATION", "your_gcp_location")

t.Setenv("POSTGRES_HOST", "localhost")
t.Setenv("POSTGRES_PORT", "5432")
t.Setenv("POSTGRES_DATABASE", "your_postgres_db")
Expand Down Expand Up @@ -1457,6 +1461,16 @@ func TestPrebuiltTools(t *testing.T) {
},
},
},
{
name: "serverless spark prebuilt tools",
in: serverless_spark_config,
wantToolset: server.ToolsetConfigs{
"serverless_spark_tools": tools.ToolsetConfig{
Name: "serverless_spark_tools",
ToolNames: []string{"list_batches"},
},
},
},
{
name: "firestore prebuilt tools",
in: firestoreconfig,
Expand Down
14 changes: 14 additions & 0 deletions docs/en/reference/prebuilt-tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,20 @@ details on how to connect your AI tools (IDEs) to databases via Toolbox and MCP.
* `list_invalid_indexes`: Lists invalid indexes in the database.
* `get_query_plan`: Generate the execution plan of a statement.

## Google Cloud Serverless for Apache Spark

* `--prebuilt` value: `serverless-spark`
* **Environment Variables:**
* `SERVERLESS_SPARK_PROJECT`: The GCP project ID
* `SERVERLESS_SPARK_LOCATION`: The GCP Location.
* **Permissions:**
* **Dataproc Serverless Viewer** (`roles/dataproc.serverlessViewer`) to
view serverless batches.
* **Dataproc Serverless Editor** (`roles/dataproc.serverlessEditor`) to
view serverless batches.
* **Tools:**
* `list_batches`: Lists Spark batches.

## Spanner (GoogleSQL dialect)

* `--prebuilt` value: `spanner`
Expand Down
57 changes: 57 additions & 0 deletions docs/en/resources/sources/serverless-spark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
title: "Serverless for Apache Spark"
type: docs
weight: 1
description: >
Google Cloud Serverless for Apache Spark lets you run Spark workloads without requiring you to provision and manage your own Spark cluster.
---

## About

The [Serverless for Apache
Spark](https://cloud.google.com/dataproc-serverless/docs/overview) source allows
Toolbox to interact with Spark batches hosted on Google Cloud Serverless for
Apache Spark.

## Available Tools

- [`serverless-spark-list-batches`](../tools/serverless-spark/serverless-spark-list-batches.md)
List and filter Serverless Spark batches.

## Requirements

### IAM Permissions

Serverless for Apache Spark uses [Identity and Access Management
(IAM)](https://cloud.google.com/bigquery/docs/access-control) to control user
and group access to serverless Spark resources like batches and sessions.

Toolbox will use your [Application Default Credentials
(ADC)](https://cloud.google.com/docs/authentication#adc) to authorize and
authenticate when interacting with Google Cloud Serverless for Apache Spark.
When using this method, you need to ensure the IAM identity associated with your
ADC has the correct
[permissions](https://cloud.google.com/dataproc-serverless/docs/concepts/iam)
for the actions you intend to perform. Common roles include
`roles/dataproc.serverlessEditor` (which includes permissions to run batches) or
`roles/dataproc.serverlessViewer`. Follow this
[guide](https://cloud.google.com/docs/authentication/provide-credentials-adc) to
set up your ADC.

## Example

```yaml
sources:
my-serverless-spark-source:
kind: serverless-spark
project: my-project-id
location: us-central1
```

## Reference

| **field** | **type** | **required** | **description** |
| --------- | :------: | :----------: | ----------------------------------------------------------------- |
| kind | string | true | Must be "serverless-spark". |
| project | string | true | ID of the GCP project with Serverless for Apache Spark resources. |
| location | string | true | Location containing Serverless for Apache Spark resources. |
7 changes: 7 additions & 0 deletions docs/en/resources/tools/serverless-spark/_index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
title: "Serverless for Apache Spark"
type: docs
weight: 1
description: >
Tools that work with Google Cloud Serverless for Apache Spark Sources.
---
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
---
title: "serverless-spark-list-batches"
type: docs
weight: 1
description: >
A "serverless-spark-list-batches" tool returns a list of Spark batches from the source.
aliases:
- /resources/tools/serverless-spark-list-batches
---

## About

A `serverless-spark-list-batches` tool returns a list of Spark batches from a
Google Cloud Serverless for Apache Spark source. It's compatible with the
following sources:

- [serverless-spark](../../sources/serverless-spark.md)

`serverless-spark-list-batches` accepts the following parameters:

- **`filter`** (optional): A filter expression to limit the batches returned.
Filters are case sensitive and may contain multiple clauses combined with
logical operators (AND/OR). Supported fields are `batch_id`, `batch_uuid`,
`state`, `create_time`, and `labels`. For example: `state = RUNNING AND
create_time < "2023-01-01T00:00:00Z"`.
- **`pageSize`** (optional): The maximum number of batches to return in a single
page.
- **`pageToken`** (optional): A page token, received from a previous call, to
retrieve the next page of results.

The tool gets the `project` and `location` from the source configuration.

## Example

```yaml
tools:
list_spark_batches:
kind: serverless-spark-list-batches
source: my-serverless-spark-source
description: Use this tool to list and filter serverless spark batches.
```

## Response Format

```json
{
"batches": [
{
"name": "projects/my-project/locations/us-central1/batches/batch-abc-123",
"uuid": "a1b2c3d4-e5f6-7890-1234-567890abcdef",
"state": "SUCCEEDED",
"creator": "[email protected]",
"createTime": "2023-10-27T10:00:00Z"
},
{
"name": "projects/my-project/locations/us-central1/batches/batch-def-456",
"uuid": "b2c3d4e5-f6a7-8901-2345-678901bcdefa",
"state": "FAILED",
"creator": "[email protected]",
"createTime": "2023-10-27T11:30:00Z"
}
],
"nextPageToken": "abcd1234"
}
```

## Reference

| **field** | **type** | **required** | **description** |
| ----------- | :------: | :----------: | -------------------------------------------------- |
| kind | string | true | Must be "serverless-spark-list-batches". |
| source | string | true | Name of the source the tool should use. |
| description | string | true | Description of the tool that is passed to the LLM. |
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
cloud.google.com/go/bigtable v1.40.1
cloud.google.com/go/cloudsqlconn v1.18.1
cloud.google.com/go/dataplex v1.27.1
cloud.google.com/go/dataproc/v2 v2.14.1
cloud.google.com/go/firestore v1.19.0
cloud.google.com/go/geminidataanalytics v0.2.1
cloud.google.com/go/spanner v1.86.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ cloud.google.com/go/dataplex v1.27.1/go.mod h1:VB+xlYJiJ5kreonXsa2cHPj0A3CfPh/mg
cloud.google.com/go/dataproc v1.7.0/go.mod h1:CKAlMjII9H90RXaMpSxQ8EU6dQx6iAYNPcYPOkSbi8s=
cloud.google.com/go/dataproc v1.8.0/go.mod h1:5OW+zNAH0pMpw14JVrPONsxMQYMBqJuzORhIBfBn9uI=
cloud.google.com/go/dataproc v1.12.0/go.mod h1:zrF3aX0uV3ikkMz6z4uBbIKyhRITnxvr4i3IjKsKrw4=
cloud.google.com/go/dataproc/v2 v2.14.1 h1:Kxq0iomU0H4MlVP4HYeYPNJnV+YxNctf/hFrprmGy5Y=
cloud.google.com/go/dataproc/v2 v2.14.1/go.mod h1:tSdkodShfzrrUNPDVEL6MdH9/mIEvp/Z9s9PBdbsZg8=
cloud.google.com/go/dataqna v0.5.0/go.mod h1:90Hyk596ft3zUQ8NkFfvICSIfHFh1Bc7C4cK3vbhkeo=
cloud.google.com/go/dataqna v0.6.0/go.mod h1:1lqNpM7rqNLVgWBJyk5NF6Uen2PHym0jtVJonplVsDA=
cloud.google.com/go/dataqna v0.7.0/go.mod h1:Lx9OcIIeqCrw1a6KdO3/5KMP1wAmTc0slZWwP12Qq3c=
Expand Down
1 change: 1 addition & 0 deletions internal/prebuiltconfigs/prebuiltconfigs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ var expectedToolSources = []string{
"neo4j",
"oceanbase",
"postgres",
"serverless-spark",
"spanner-postgres",
"spanner",
"sqlite",
Expand Down
28 changes: 28 additions & 0 deletions internal/prebuiltconfigs/tools/serverless-spark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

sources:
serverless-spark-source:
kind: serverless-spark
project: ${SERVERLESS_SPARK_PROJECT}
location: ${SERVERLESS_SPARK_LOCATION}

tools:
list_batches:
kind: serverless-spark-list-batches
source: serverless-spark-source

toolsets:
serverless_spark_tools:
- list_batches
99 changes: 99 additions & 0 deletions internal/sources/serverlessspark/serverlessspark.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package serverlessspark

import (
"context"
"fmt"
"log"

dataproc "cloud.google.com/go/dataproc/v2/apiv1"
"github.com/goccy/go-yaml"
"github.com/googleapis/genai-toolbox/internal/sources"
"github.com/googleapis/genai-toolbox/internal/util"
"go.opentelemetry.io/otel/trace"
"google.golang.org/api/option"
)

const SourceKind string = "serverless-spark"

// validate interface
var _ sources.SourceConfig = Config{}

func init() {
if !sources.Register(SourceKind, newConfig) {
panic(fmt.Sprintf("source kind %q already registered", SourceKind))
}
}

func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (sources.SourceConfig, error) {
actual := Config{Name: name}
if err := decoder.DecodeContext(ctx, &actual); err != nil {
return nil, err
}
return actual, nil
}

type Config struct {
Name string `yaml:"name" validate:"required"`
Kind string `yaml:"kind" validate:"required"`
Project string `yaml:"project" validate:"required"`
Location string `yaml:"location" validate:"required"`
}

func (r Config) SourceConfigKind() string {
return SourceKind
}

func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.Source, error) {
ua, err := util.UserAgentFromContext(ctx)
if err != nil {
return nil, fmt.Errorf("error in User Agent retrieval: %s", err)
}
endpoint := fmt.Sprintf("%s-dataproc.googleapis.com:443", r.Location)
client, err := dataproc.NewBatchControllerClient(ctx, option.WithEndpoint(endpoint), option.WithUserAgent(ua))
if err != nil {
return nil, fmt.Errorf("failed to create dataproc client: %w", err)
}

log.Printf("New source with config %+v\n", r)

s := &Source{
Name: r.Name,
Kind: SourceKind,
Project: r.Project,
Location: r.Location,
Client: client,
}
return s, nil
}

var _ sources.Source = &Source{}

type Source struct {
Name string `yaml:"name"`
Kind string `yaml:"kind"`
Project string
Location string
Client *dataproc.BatchControllerClient
}

func (s *Source) SourceKind() string {
return SourceKind
}

func (s *Source) GetBatchControllerClient() *dataproc.BatchControllerClient {
return s.Client
}
Loading