From fdcbe83d8002118cc5abce6c23fe2f74140df393 Mon Sep 17 00:00:00 2001
From: Twisha Bansal <twishabansal@google.com>
Date: Thu, 24 Oct 2024 23:25:37 +0530
Subject: [PATCH] Test not running

---
 .../notebooks/batch_embeddings_update.ipynb   | 104 ++++---
 alloydb/tests/conftest.py                     | 275 ++++++++++++++++++
 alloydb/tests/e2e_test.py                     |  28 ++
 alloydb/tests/requirements-test.txt           |   9 +
 4 files changed, 362 insertions(+), 54 deletions(-)
 create mode 100644 alloydb/tests/conftest.py
 create mode 100644 alloydb/tests/e2e_test.py
 create mode 100644 alloydb/tests/requirements-test.txt

diff --git a/alloydb/notebooks/batch_embeddings_update.ipynb b/alloydb/notebooks/batch_embeddings_update.ipynb
index 5a1ac3e7d0b06..1916a9dd4508b 100644
--- a/alloydb/notebooks/batch_embeddings_update.ipynb
+++ b/alloydb/notebooks/batch_embeddings_update.ipynb
@@ -67,13 +67,31 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 16,
       "metadata": {
         "id": "M_ppDxYf4Gqs"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "I0000 00:00:1729791146.216757 11765976 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\u001b[33mWARNING: google-cloud-aiplatform 1.70.0 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
+            "\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 24.2 is available.\n",
+            "You should consider upgrading via the '/Users/twishabansal/Documents/forks/python-docs-samples/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n",
+            "\u001b[0m"
+          ]
+        }
+      ],
       "source": [
-        "%pip install google-cloud-alloydb-connector[asyncpg]==1.4.0 sqlalchemy==2.0.36 pandas==2.2.3 vertexai==1.70.0 asyncio==3.4.3 greenlet==3.1.1 --quiet"
+        "%pip install google-cloud-alloydb-connector[asyncpg]==1.4.0 sqlalchemy==2.0.36 pandas==2.2.3 vertexai==1.70.0 asyncio==3.4.3 --quiet"
       ]
     },
     {
@@ -119,7 +137,7 @@
         "# @markdown Please fill in the value below with your GCP project ID and then run the cell.\n",
         "\n",
         "# Please fill in these values.\n",
-        "project_id = \"my-project-id\"  # @param {type:\"string\"}\n",
+        "project_id = \"twisha-dev\"  # @param {type:\"string\"}\n",
         "\n",
         "# Quick input validations.\n",
         "assert project_id, \"⚠️ Please provide a Google Cloud project ID\"\n",
@@ -134,15 +152,8 @@
         "id": "O-oqMC5Ox-ZM"
       },
       "source": [
-        "### Enable APIs for AlloyDB and Vertex AI"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "X-bzfFb4A-xK"
-      },
-      "source": [
+        "### Enable APIs for AlloyDB and Vertex AI\n",
+        "\n",
         "You will need to enable these APIs in order to create an AlloyDB database and utilize Vertex AI as an embeddings service!"
       ]
     },
@@ -173,7 +184,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 4,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -187,16 +198,17 @@
         "\n",
         "# Please fill in these values.\n",
         "region = \"us-central1\"  # @param {type:\"string\"}\n",
-        "cluster_name = \"my-cluster\"  # @param {type:\"string\"}\n",
+        "cluster_name = \"test-cluster\"  # @param {type:\"string\"}\n",
         "instance_name = \"my-primary\"  # @param {type:\"string\"}\n",
         "database_name = \"test_db\"  # @param {type:\"string\"}\n",
-        "table_name = \"investments\"\n",
-        "password = input(\"Please provide a password to be used for 'postgres' database user: \")"
+        "table_name = \"investments\"  # @param {type:\"string\"}\n",
+        "password = \"test_password\"  # @param {type:\"string\"}\n",
+        "# password = input(\"Please provide a password to be used for 'postgres' database user: \")"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": 5,
       "metadata": {
         "id": "XXI1uUu3y8gc"
       },
@@ -240,7 +252,7 @@
       "outputs": [],
       "source": [
         "# create the AlloyDB Cluster\n",
-        "!gcloud beta alloydb clusters create {cluster_name} --password={password} --region={region}"
+        "# !gcloud beta alloydb clusters create {cluster_name} --password={password} --region={region}"
       ]
     },
     {
@@ -265,7 +277,7 @@
       },
       "outputs": [],
       "source": [
-        "!gcloud beta alloydb instances create {instance_name} --instance-type=PRIMARY --cpu-count=2 --region={region} --cluster={cluster_name}"
+        "# !gcloud beta alloydb instances create {instance_name} --instance-type=PRIMARY --cpu-count=2 --region={region} --cluster={cluster_name}"
       ]
     },
     {
@@ -289,31 +301,7 @@
       },
       "outputs": [],
       "source": [
-        "!gcloud beta alloydb instances update {instance_name} --region={region} --cluster={cluster_name} --assign-inbound-public-ip=ASSIGN_IPV4 --database-flags=\"password.enforce_complexity=on\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UabC_qh5HOVy"
-      },
-      "source": [
-        "Please wait for the instance to be updated. This might take some time. You can see if the changes are reflecting using:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "_KC91mQZHABv",
-        "outputId": "6da8a6e4-549b-428d-a488-8dc993ddd216"
-      },
-      "outputs": [],
-      "source": [
-        "!gcloud beta alloydb instances describe {instance_name} --region={region} --cluster={cluster_name}"
+        "# !gcloud beta alloydb instances update {instance_name} --region={region} --cluster={cluster_name} --assign-inbound-public-ip=ASSIGN_IPV4 --database-flags=\"password.enforce_complexity=on\" --no-async"
       ]
     },
     {
@@ -329,7 +317,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 15,
+      "execution_count": 22,
       "metadata": {
         "id": "fYKVQzv2cjcm"
       },
@@ -368,9 +356,17 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 23,
       "metadata": {},
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "I0000 00:00:1729792450.162148 11765976 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers\n"
+          ]
+        }
+      ],
       "source": [
         "from google.cloud.alloydb.connector import AsyncConnector\n",
         "\n",
@@ -645,7 +641,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 23,
+      "execution_count": 8,
       "metadata": {
         "id": "wvYGGRRoFXl4"
       },
@@ -671,7 +667,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 24,
+      "execution_count": 9,
       "metadata": {
         "id": "IZgMik9XBW19"
       },
@@ -715,7 +711,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 25,
+      "execution_count": 10,
       "metadata": {
         "id": "76qq6G38CZfm"
       },
@@ -784,7 +780,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 26,
+      "execution_count": 11,
       "metadata": {
         "id": "4OYdrJk9Co0v"
       },
@@ -916,7 +912,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": 12,
       "metadata": {
         "id": "lEyvhlOCCr7F"
       },
@@ -979,7 +975,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 28,
+      "execution_count": 13,
       "metadata": {
         "id": "rWb1T9aIBWa-"
       },
diff --git a/alloydb/tests/conftest.py b/alloydb/tests/conftest.py
new file mode 100644
index 0000000000000..1e69be464b96f
--- /dev/null
+++ b/alloydb/tests/conftest.py
@@ -0,0 +1,275 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import os
+import re
+import subprocess
+import sys
+import textwrap
+from collections.abc import Callable, Iterable
+from datetime import datetime
+
+import pytest
+
+@pytest.fixture(scope="session")
+def project() -> str:
+    # This is set by the testing infrastructure.
+    project = os.environ["GOOGLE_CLOUD_PROJECT"]
+    run_cmd("gcloud", "config", "set", "project", project)
+
+    # Since everything requires the project, let's confiugre and show some
+    # debugging information here.
+    run_cmd("gcloud", "version")
+    run_cmd("gcloud", "config", "list")
+    return project
+
+
+def env_var(prefix: str, id: str = "") -> str:
+    return f"{prefix}_{id}".replace(".", "").replace("/", "").strip("_")
+
+
+def run_cmd(*cmd: str) -> subprocess.CompletedProcess:
+    try:
+        print(f">> {cmd}")
+        start = datetime.now()
+        p = subprocess.run(
+            cmd,
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        print(p.stderr.decode("utf-8"))
+        print(p.stdout.decode("utf-8"))
+        elapsed = (datetime.now() - start).seconds
+        minutes = int(elapsed / 60)
+        seconds = elapsed - minutes * 60
+        print(f"Command `{cmd[0]}` finished in {minutes}m {seconds}s")
+        return p
+    except subprocess.CalledProcessError as e:
+        # Include the error message from the failed command.
+        print(e.stderr.decode("utf-8"))
+        print(e.stdout.decode("utf-8"))
+        raise RuntimeError(f"{e}\n\n{e.stderr.decode('utf-8')}") from e
+
+
+def cloud_run_cleanup(service_name: str, location: str) -> None:
+    # Delete the Container Registry image associated with the service.
+    image = (
+        run_cmd(
+            "gcloud",
+            "run",
+            "services",
+            "describe",
+            service_name,
+            f"--region={location}",
+            "--format=get(image)",
+        )
+        .stdout.decode("utf-8")
+        .strip()
+    )
+    run_cmd(
+        "gcloud",
+        "container",
+        "images",
+        "delete",
+        image,
+        "--force-delete-tags",
+        "--quiet",
+    )
+
+    # Delete the Cloud Run service.
+    run_cmd(
+        "gcloud",
+        "run",
+        "services",
+        "delete",
+        service_name,
+        f"--region={location}",
+        "--quiet",
+    )
+
+
+def run_notebook(
+    ipynb_file: str,
+    prelude: str = "",
+    section: str = "",
+    variables: dict = {},
+    replace: dict[str, str] = {},
+    preprocess: Callable[[str], str] = lambda source: source,
+    skip_shell_commands: bool = False,
+    until_end: bool = False,
+) -> None:
+    import nbformat
+    from nbclient.client import NotebookClient
+    from nbclient.exceptions import CellExecutionError
+
+    def notebook_filter_section(
+        start: str,
+        end: str,
+        cells: list[nbformat.NotebookNode],
+        until_end: bool = False,
+    ) -> Iterable[nbformat.NotebookNode]:
+        in_section = False
+        for cell in cells:
+            if cell["cell_type"] == "markdown":
+                if not in_section and cell["source"].startswith(start):
+                    in_section = True
+                elif in_section and not until_end and cell["source"].startswith(end):
+                    return
+
+            if in_section:
+                yield cell
+
+    # Regular expression to match and remove shell commands from the notebook.
+    #   https://regex101.com/r/EHWBpT/1
+    shell_command_re = re.compile(r"^!((?:[^\n]+\\\n)*(?:[^\n]+))$", re.MULTILINE)
+
+    # Compile regular expressions for variable substitutions.
+    #   https://regex101.com/r/e32vfW/1
+    compiled_substitutions = [
+        (
+            re.compile(rf"""\b{name}\s*=\s*(?:f?'[^']*'|f?"[^"]*"|\w+)"""),
+            f"{name} = {repr(value)}",
+        )
+        for name, value in variables.items()
+    ]
+
+    # Filter the section if any, otherwise use the entire notebook.
+    nb = nbformat.read(ipynb_file, as_version=4)
+    if section:
+        start = section
+        end = section.split(" ", 1)[0] + " "
+        nb.cells = list(notebook_filter_section(start, end, nb.cells, until_end))
+        if len(nb.cells) == 0:
+            raise ValueError(
+                f"Section {repr(section)} not found in notebook {repr(ipynb_file)}"
+            )
+
+    # Preprocess the cells.
+    for cell in nb.cells:
+        # Only preprocess code cells.
+        if cell["cell_type"] != "code":
+            continue
+
+        # Run any custom preprocessing functions before.
+        cell["source"] = preprocess(cell["source"])
+
+        # Preprocess shell commands.
+        if skip_shell_commands:
+            cmd = "pass"
+            cell["source"] = shell_command_re.sub(cmd, cell["source"])
+        else:
+            cell["source"] = shell_command_re.sub(r"_run(f'''\1''')", cell["source"])
+
+        # Apply variable substitutions.
+        for regex, new_value in compiled_substitutions:
+            cell["source"] = regex.sub(new_value, cell["source"])
+
+        # Apply replacements.
+        for old, new in replace.items():
+            cell["source"] = cell["source"].replace(old, new)
+
+        # Clear outputs.
+        cell["outputs"] = []
+
+    # Prepend the prelude cell.
+    prelude_src = textwrap.dedent(
+        """\
+        def _run(cmd):
+            import subprocess as _sp
+            import sys as _sys
+            _p = _sp.run(cmd, shell=True, stdout=_sp.PIPE, stderr=_sp.PIPE)
+            _stdout = _p.stdout.decode('utf-8').strip()
+            _stderr = _p.stderr.decode('utf-8').strip()
+            if _stdout:
+                print(f'➜ !{cmd}')
+                print(_stdout)
+            if _stderr:
+                print(f'➜ !{cmd}', file=_sys.stderr)
+                print(_stderr, file=_sys.stderr)
+            if _p.returncode:
+                raise RuntimeError('\\n'.join([
+                    f"Command returned non-zero exit status {_p.returncode}.",
+                    f"-------- command --------",
+                    f"{cmd}",
+                    f"-------- stderr --------",
+                    f"{_stderr}",
+                    f"-------- stdout --------",
+                    f"{_stdout}",
+                ]))
+        """
+        + prelude
+    )
+    nb.cells = [nbformat.v4.new_code_cell(prelude_src)] + nb.cells
+
+    # Run the notebook.
+    error = ""
+    client = NotebookClient(nb)
+    try:
+        client.execute()
+    except CellExecutionError as e:
+        # Remove colors and other escape characters to make it easier to read in the logs.
+        #   https://stackoverflow.com/a/33925425
+        color_chars = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]")
+        error = color_chars.sub("", str(e))
+        for cell in nb.cells:
+            if cell["cell_type"] != "code":
+                continue
+            for output in cell["outputs"]:
+                if output.get("name") == "stdout":
+                    print(color_chars.sub("", output["text"]))
+                elif output.get("name") == "stderr":
+                    print(color_chars.sub("", output["text"]), file=sys.stderr)
+
+    if error:
+        raise RuntimeError(
+            f"Error on {repr(ipynb_file)}, section {repr(section)}: {error}"
+        )
+
+
+def run_notebook_parallel(
+    ipynb_file: str,
+    sections: dict[str, dict],
+    prelude: str = "",
+    variables: dict = {},
+    replace: dict[str, str] = {},
+    skip_shell_commands: bool = False,
+) -> None:
+    import multiprocessing
+
+    args = [
+        {
+            "ipynb_file": ipynb_file,
+            "section": section,
+            "prelude": params.get("prelude", prelude),
+            "variables": {**variables, **params.get("variables", {})},
+            "replace": {**replace, **params.get("replace", {})},
+            "skip_shell_commands": params.get(
+                "skip_shell_commands", skip_shell_commands
+            ),
+        }
+        for section, params in sections.items()
+    ]
+    with multiprocessing.Pool(len(args)) as pool:
+        pool.map(_run_notebook_section, args)
+
+
+def _run_notebook_section(kwargs: dict):
+    # Helper function to make it pickleable and run with multiprpcessing.
+    run_notebook(**kwargs)
+
+def clean_up_cluster(cluster_name: str):
+    run_cmd(f"gcloud alloydb clusters delete {cluster_name} --region=us-central1")
\ No newline at end of file
diff --git a/alloydb/tests/e2e_test.py b/alloydb/tests/e2e_test.py
new file mode 100644
index 0000000000000..e83471f6a59fe
--- /dev/null
+++ b/alloydb/tests/e2e_test.py
@@ -0,0 +1,28 @@
+import conftest as conftest  # python-docs-samples/alloydb/conftest.py
+import uuid
+
+
+# def inject_password(source: str, password: str) -> str:
+#     """Injects the provided password into the notebook source."""
+#     modified_source = source.replace(
+#         "password = input(...)", f"password = '{password}'"
+#     )
+#     return modified_source
+CLUSTER_NAME = f"my-cluster-{uuid.uuid4()}"
+
+def test_batch_embeddings_update(project: str) -> None:
+    # TODO: Inject password such that we can take user input
+    conftest.run_notebook(
+        "alloydb/notebooks/batch_embeddings_update.ipynb",
+        # section="### Connect Your Google Cloud Project",
+        variables= {
+            "project_id": project,
+            # "cluster_name": CLUSTER_NAME,
+            "cluster_name": "my-cluster-171930bf-d6e4-4cf8-b1fa-4538a01b9c3f",
+            "password": "test_password",
+        },
+        until_end=True,
+    )
+
+    # Delete AlloyDB cluster
+    conftest.clean_up_cluster(cluster_name=CLUSTER_NAME)
\ No newline at end of file
diff --git a/alloydb/tests/requirements-test.txt b/alloydb/tests/requirements-test.txt
new file mode 100644
index 0000000000000..a2ad06b648049
--- /dev/null
+++ b/alloydb/tests/requirements-test.txt
@@ -0,0 +1,9 @@
+nbclient==0.8.0
+google-cloud-alloydb-connector[asyncpg]==1.4.0
+sqlalchemy==2.0.36
+pandas==2.2.3
+vertexai==1.70.0
+asyncio==3.4.3
+greenlet==3.1.1
+
+google-cloud==0.34.0
\ No newline at end of file