From fdcbe83d8002118cc5abce6c23fe2f74140df393 Mon Sep 17 00:00:00 2001 From: Twisha Bansal Date: Thu, 24 Oct 2024 23:25:37 +0530 Subject: [PATCH] Test not running --- .../notebooks/batch_embeddings_update.ipynb | 104 ++++--- alloydb/tests/conftest.py | 275 ++++++++++++++++++ alloydb/tests/e2e_test.py | 28 ++ alloydb/tests/requirements-test.txt | 9 + 4 files changed, 362 insertions(+), 54 deletions(-) create mode 100644 alloydb/tests/conftest.py create mode 100644 alloydb/tests/e2e_test.py create mode 100644 alloydb/tests/requirements-test.txt diff --git a/alloydb/notebooks/batch_embeddings_update.ipynb b/alloydb/notebooks/batch_embeddings_update.ipynb index 5a1ac3e7d0b06..1916a9dd4508b 100644 --- a/alloydb/notebooks/batch_embeddings_update.ipynb +++ b/alloydb/notebooks/batch_embeddings_update.ipynb @@ -67,13 +67,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "id": "M_ppDxYf4Gqs" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "I0000 00:00:1729791146.216757 11765976 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: google-cloud-aiplatform 1.70.0 does not provide the extra 'all'\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 24.2 is available.\n", + "You should consider upgrading via the '/Users/twishabansal/Documents/forks/python-docs-samples/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n", + "\u001b[0m" + ] + } + ], "source": [ - "%pip install google-cloud-alloydb-connector[asyncpg]==1.4.0 sqlalchemy==2.0.36 pandas==2.2.3 vertexai==1.70.0 asyncio==3.4.3 greenlet==3.1.1 --quiet" + "%pip install google-cloud-alloydb-connector[asyncpg]==1.4.0 sqlalchemy==2.0.36 pandas==2.2.3 vertexai==1.70.0 asyncio==3.4.3 --quiet" ] }, { @@ -119,7 +137,7 @@ "# @markdown Please fill in the value below with your GCP project ID and then run the cell.\n", "\n", "# Please fill in these values.\n", - "project_id = \"my-project-id\" # @param {type:\"string\"}\n", + "project_id = \"twisha-dev\" # @param {type:\"string\"}\n", "\n", "# Quick input validations.\n", "assert project_id, \"⚠️ Please provide a Google Cloud project ID\"\n", @@ -134,15 +152,8 @@ "id": "O-oqMC5Ox-ZM" }, "source": [ - "### Enable APIs for AlloyDB and Vertex AI" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "X-bzfFb4A-xK" - }, - "source": [ + "### Enable APIs for AlloyDB and Vertex AI\n", + "\n", "You will need to enable these APIs in order to create an AlloyDB database and utilize Vertex AI as an embeddings service!" ] }, @@ -173,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -187,16 +198,17 @@ "\n", "# Please fill in these values.\n", "region = \"us-central1\" # @param {type:\"string\"}\n", - "cluster_name = \"my-cluster\" # @param {type:\"string\"}\n", + "cluster_name = \"test-cluster\" # @param {type:\"string\"}\n", "instance_name = \"my-primary\" # @param {type:\"string\"}\n", "database_name = \"test_db\" # @param {type:\"string\"}\n", - "table_name = \"investments\"\n", - "password = input(\"Please provide a password to be used for 'postgres' database user: \")" + "table_name = \"investments\" # @param {type:\"string\"}\n", + "password = \"test_password\" # @param {type:\"string\"}\n", + "# password = input(\"Please provide a password to be used for 'postgres' database user: \")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "id": "XXI1uUu3y8gc" }, @@ -240,7 +252,7 @@ "outputs": [], "source": [ "# create the AlloyDB Cluster\n", - "!gcloud beta alloydb clusters create {cluster_name} --password={password} --region={region}" + "# !gcloud beta alloydb clusters create {cluster_name} --password={password} --region={region}" ] }, { @@ -265,7 +277,7 @@ }, "outputs": [], "source": [ - "!gcloud beta alloydb instances create {instance_name} --instance-type=PRIMARY --cpu-count=2 --region={region} --cluster={cluster_name}" + "# !gcloud beta alloydb instances create {instance_name} --instance-type=PRIMARY --cpu-count=2 --region={region} --cluster={cluster_name}" ] }, { @@ -289,31 +301,7 @@ }, "outputs": [], "source": [ - "!gcloud beta alloydb instances update {instance_name} --region={region} --cluster={cluster_name} --assign-inbound-public-ip=ASSIGN_IPV4 --database-flags=\"password.enforce_complexity=on\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UabC_qh5HOVy" - }, - "source": [ - "Please wait for the instance to be updated. This might take some time. You can see if the changes are reflecting using:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_KC91mQZHABv", - "outputId": "6da8a6e4-549b-428d-a488-8dc993ddd216" - }, - "outputs": [], - "source": [ - "!gcloud beta alloydb instances describe {instance_name} --region={region} --cluster={cluster_name}" + "# !gcloud beta alloydb instances update {instance_name} --region={region} --cluster={cluster_name} --assign-inbound-public-ip=ASSIGN_IPV4 --database-flags=\"password.enforce_complexity=on\" --no-async" ] }, { @@ -329,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, "metadata": { "id": "fYKVQzv2cjcm" }, @@ -368,9 +356,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "I0000 00:00:1729792450.162148 11765976 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers\n" + ] + } + ], "source": [ "from google.cloud.alloydb.connector import AsyncConnector\n", "\n", @@ -645,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "metadata": { "id": "wvYGGRRoFXl4" }, @@ -671,7 +667,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "metadata": { "id": "IZgMik9XBW19" }, @@ -715,7 +711,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 10, "metadata": { "id": "76qq6G38CZfm" }, @@ -784,7 +780,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 11, "metadata": { "id": "4OYdrJk9Co0v" }, @@ -916,7 +912,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 12, "metadata": { "id": "lEyvhlOCCr7F" }, @@ -979,7 +975,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 13, "metadata": { "id": "rWb1T9aIBWa-" }, diff --git a/alloydb/tests/conftest.py b/alloydb/tests/conftest.py new file mode 100644 index 0000000000000..1e69be464b96f --- /dev/null +++ b/alloydb/tests/conftest.py @@ -0,0 +1,275 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import re +import subprocess +import sys +import textwrap +from collections.abc import Callable, Iterable +from datetime import datetime + +import pytest + +@pytest.fixture(scope="session") +def project() -> str: + # This is set by the testing infrastructure. + project = os.environ["GOOGLE_CLOUD_PROJECT"] + run_cmd("gcloud", "config", "set", "project", project) + + # Since everything requires the project, let's confiugre and show some + # debugging information here. + run_cmd("gcloud", "version") + run_cmd("gcloud", "config", "list") + return project + + +def env_var(prefix: str, id: str = "") -> str: + return f"{prefix}_{id}".replace(".", "").replace("/", "").strip("_") + + +def run_cmd(*cmd: str) -> subprocess.CompletedProcess: + try: + print(f">> {cmd}") + start = datetime.now() + p = subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + print(p.stderr.decode("utf-8")) + print(p.stdout.decode("utf-8")) + elapsed = (datetime.now() - start).seconds + minutes = int(elapsed / 60) + seconds = elapsed - minutes * 60 + print(f"Command `{cmd[0]}` finished in {minutes}m {seconds}s") + return p + except subprocess.CalledProcessError as e: + # Include the error message from the failed command. + print(e.stderr.decode("utf-8")) + print(e.stdout.decode("utf-8")) + raise RuntimeError(f"{e}\n\n{e.stderr.decode('utf-8')}") from e + + +def cloud_run_cleanup(service_name: str, location: str) -> None: + # Delete the Container Registry image associated with the service. + image = ( + run_cmd( + "gcloud", + "run", + "services", + "describe", + service_name, + f"--region={location}", + "--format=get(image)", + ) + .stdout.decode("utf-8") + .strip() + ) + run_cmd( + "gcloud", + "container", + "images", + "delete", + image, + "--force-delete-tags", + "--quiet", + ) + + # Delete the Cloud Run service. + run_cmd( + "gcloud", + "run", + "services", + "delete", + service_name, + f"--region={location}", + "--quiet", + ) + + +def run_notebook( + ipynb_file: str, + prelude: str = "", + section: str = "", + variables: dict = {}, + replace: dict[str, str] = {}, + preprocess: Callable[[str], str] = lambda source: source, + skip_shell_commands: bool = False, + until_end: bool = False, +) -> None: + import nbformat + from nbclient.client import NotebookClient + from nbclient.exceptions import CellExecutionError + + def notebook_filter_section( + start: str, + end: str, + cells: list[nbformat.NotebookNode], + until_end: bool = False, + ) -> Iterable[nbformat.NotebookNode]: + in_section = False + for cell in cells: + if cell["cell_type"] == "markdown": + if not in_section and cell["source"].startswith(start): + in_section = True + elif in_section and not until_end and cell["source"].startswith(end): + return + + if in_section: + yield cell + + # Regular expression to match and remove shell commands from the notebook. + # https://regex101.com/r/EHWBpT/1 + shell_command_re = re.compile(r"^!((?:[^\n]+\\\n)*(?:[^\n]+))$", re.MULTILINE) + + # Compile regular expressions for variable substitutions. + # https://regex101.com/r/e32vfW/1 + compiled_substitutions = [ + ( + re.compile(rf"""\b{name}\s*=\s*(?:f?'[^']*'|f?"[^"]*"|\w+)"""), + f"{name} = {repr(value)}", + ) + for name, value in variables.items() + ] + + # Filter the section if any, otherwise use the entire notebook. + nb = nbformat.read(ipynb_file, as_version=4) + if section: + start = section + end = section.split(" ", 1)[0] + " " + nb.cells = list(notebook_filter_section(start, end, nb.cells, until_end)) + if len(nb.cells) == 0: + raise ValueError( + f"Section {repr(section)} not found in notebook {repr(ipynb_file)}" + ) + + # Preprocess the cells. + for cell in nb.cells: + # Only preprocess code cells. + if cell["cell_type"] != "code": + continue + + # Run any custom preprocessing functions before. + cell["source"] = preprocess(cell["source"]) + + # Preprocess shell commands. + if skip_shell_commands: + cmd = "pass" + cell["source"] = shell_command_re.sub(cmd, cell["source"]) + else: + cell["source"] = shell_command_re.sub(r"_run(f'''\1''')", cell["source"]) + + # Apply variable substitutions. + for regex, new_value in compiled_substitutions: + cell["source"] = regex.sub(new_value, cell["source"]) + + # Apply replacements. + for old, new in replace.items(): + cell["source"] = cell["source"].replace(old, new) + + # Clear outputs. + cell["outputs"] = [] + + # Prepend the prelude cell. + prelude_src = textwrap.dedent( + """\ + def _run(cmd): + import subprocess as _sp + import sys as _sys + _p = _sp.run(cmd, shell=True, stdout=_sp.PIPE, stderr=_sp.PIPE) + _stdout = _p.stdout.decode('utf-8').strip() + _stderr = _p.stderr.decode('utf-8').strip() + if _stdout: + print(f'➜ !{cmd}') + print(_stdout) + if _stderr: + print(f'➜ !{cmd}', file=_sys.stderr) + print(_stderr, file=_sys.stderr) + if _p.returncode: + raise RuntimeError('\\n'.join([ + f"Command returned non-zero exit status {_p.returncode}.", + f"-------- command --------", + f"{cmd}", + f"-------- stderr --------", + f"{_stderr}", + f"-------- stdout --------", + f"{_stdout}", + ])) + """ + + prelude + ) + nb.cells = [nbformat.v4.new_code_cell(prelude_src)] + nb.cells + + # Run the notebook. + error = "" + client = NotebookClient(nb) + try: + client.execute() + except CellExecutionError as e: + # Remove colors and other escape characters to make it easier to read in the logs. + # https://stackoverflow.com/a/33925425 + color_chars = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]") + error = color_chars.sub("", str(e)) + for cell in nb.cells: + if cell["cell_type"] != "code": + continue + for output in cell["outputs"]: + if output.get("name") == "stdout": + print(color_chars.sub("", output["text"])) + elif output.get("name") == "stderr": + print(color_chars.sub("", output["text"]), file=sys.stderr) + + if error: + raise RuntimeError( + f"Error on {repr(ipynb_file)}, section {repr(section)}: {error}" + ) + + +def run_notebook_parallel( + ipynb_file: str, + sections: dict[str, dict], + prelude: str = "", + variables: dict = {}, + replace: dict[str, str] = {}, + skip_shell_commands: bool = False, +) -> None: + import multiprocessing + + args = [ + { + "ipynb_file": ipynb_file, + "section": section, + "prelude": params.get("prelude", prelude), + "variables": {**variables, **params.get("variables", {})}, + "replace": {**replace, **params.get("replace", {})}, + "skip_shell_commands": params.get( + "skip_shell_commands", skip_shell_commands + ), + } + for section, params in sections.items() + ] + with multiprocessing.Pool(len(args)) as pool: + pool.map(_run_notebook_section, args) + + +def _run_notebook_section(kwargs: dict): + # Helper function to make it pickleable and run with multiprpcessing. + run_notebook(**kwargs) + +def clean_up_cluster(cluster_name: str): + run_cmd(f"gcloud alloydb clusters delete {cluster_name} --region=us-central1") \ No newline at end of file diff --git a/alloydb/tests/e2e_test.py b/alloydb/tests/e2e_test.py new file mode 100644 index 0000000000000..e83471f6a59fe --- /dev/null +++ b/alloydb/tests/e2e_test.py @@ -0,0 +1,28 @@ +import conftest as conftest # python-docs-samples/alloydb/conftest.py +import uuid + + +# def inject_password(source: str, password: str) -> str: +# """Injects the provided password into the notebook source.""" +# modified_source = source.replace( +# "password = input(...)", f"password = '{password}'" +# ) +# return modified_source +CLUSTER_NAME = f"my-cluster-{uuid.uuid4()}" + +def test_batch_embeddings_update(project: str) -> None: + # TODO: Inject password such that we can take user input + conftest.run_notebook( + "alloydb/notebooks/batch_embeddings_update.ipynb", + # section="### Connect Your Google Cloud Project", + variables= { + "project_id": project, + # "cluster_name": CLUSTER_NAME, + "cluster_name": "my-cluster-171930bf-d6e4-4cf8-b1fa-4538a01b9c3f", + "password": "test_password", + }, + until_end=True, + ) + + # Delete AlloyDB cluster + conftest.clean_up_cluster(cluster_name=CLUSTER_NAME) \ No newline at end of file diff --git a/alloydb/tests/requirements-test.txt b/alloydb/tests/requirements-test.txt new file mode 100644 index 0000000000000..a2ad06b648049 --- /dev/null +++ b/alloydb/tests/requirements-test.txt @@ -0,0 +1,9 @@ +nbclient==0.8.0 +google-cloud-alloydb-connector[asyncpg]==1.4.0 +sqlalchemy==2.0.36 +pandas==2.2.3 +vertexai==1.70.0 +asyncio==3.4.3 +greenlet==3.1.1 + +google-cloud==0.34.0 \ No newline at end of file