oracle-samples · cjbj · Mar 3, 2025
diff --git a/python/python-oracledb/create_schema.py b/python/python-oracledb/create_schema.py
@@ -54,7 +54,7 @@
     sample_env.run_sql_script(
         conn, "create_schema_21", main_user=sample_env.get_main_user()
     )
-if sample_env.get_server_version() >= (23, 5):
+if sample_env.get_server_version() >= (23, 7):
     sample_env.run_sql_script(
         conn, "create_schema_23", main_user=sample_env.get_main_user()
     )

diff --git a/python/python-oracledb/dataframe_numpy.py b/python/python-oracledb/dataframe_numpy.py
@@ -0,0 +1,71 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# This software is dual-licensed to you under the Universal Permissive License
+# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
+# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
+# either license.
+#
+# If you elect to accept the software under the Apache License, Version 2.0,
+# the following applies:
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# dataframe_numpy.py
+#
+# Shows how to use connection.fetch_df_all() to efficiently put data into a
+# NumPy ndarray via the DLPack standard memory layout.
+# -----------------------------------------------------------------------------
+
+import pyarrow
+import numpy
+
+import oracledb
+import sample_env
+
+# determine whether to use python-oracledb thin mode or thick mode
+if not sample_env.get_is_thin():
+    oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
+
+connection = oracledb.connect(
+    user=sample_env.get_main_user(),
+    password=sample_env.get_main_password(),
+    dsn=sample_env.get_connect_string(),
+    params=sample_env.get_connect_params(),
+)
+
+SQL = "select id from SampleQueryTab order by id"
+
+# Get an OracleDataFrame
+# Adjust arraysize to tune the query fetch performance
+odf = connection.fetch_df_all(statement=SQL, arraysize=100)
+
+# Convert to an ndarray via the Python DLPack specification
+pyarrow_array = pyarrow.array(odf.get_column_by_name("ID"))
+np = numpy.from_dlpack(pyarrow_array)
+
+# If the array has nulls, an alternative is:
+# np = pyarrow_array.to_numpy(zero_copy_only=False)
+
+print("Type:")
+print(type(np))  # <class 'numpy.ndarray'>
+
+# Perform various numpy operations on the ndarray
+
+print("\nSum:")
+print(numpy.sum(np))
+
+print("\nLog10:")
+print(numpy.log10(np))
diff --git a/python/python-oracledb/dataframe_pandas.py b/python/python-oracledb/dataframe_pandas.py
@@ -0,0 +1,102 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# This software is dual-licensed to you under the Universal Permissive License
+# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
+# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
+# either license.
+#
+# If you elect to accept the software under the Apache License, Version 2.0,
+# the following applies:
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# dataframe_pandas.py
+#
+# Shows how to use connection.fetch_df_all() and connection.fetch_df_batches()
+# to create Pandas dataframes.
+# -----------------------------------------------------------------------------
+
+import pandas
+import pyarrow
+
+import oracledb
+import sample_env
+
+# determine whether to use python-oracledb thin mode or thick mode
+if not sample_env.get_is_thin():
+    oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
+
+connection = oracledb.connect(
+    user=sample_env.get_main_user(),
+    password=sample_env.get_main_password(),
+    dsn=sample_env.get_connect_string(),
+    params=sample_env.get_connect_params(),
+)
+
+SQL = "select id, name from SampleQueryTab order by id"
+
+# -----------------------------------------------------------------------------
+#
+# Fetching all records
+
+# Get an OracleDataFrame.
+# Adjust arraysize to tune the query fetch performance
+odf = connection.fetch_df_all(statement=SQL, arraysize=100)
+
+# Get a Pandas DataFrame from the data
+df = pyarrow.Table.from_arrays(
+    odf.column_arrays(), names=odf.column_names()
+).to_pandas()
+
+# Perform various Pandas operations on the DataFrame
+
+print("Columns:")
+print(df.columns)
+
+print("\nDataframe description:")
+print(df.describe())
+
+print("\nLast three rows:")
+print(df.tail(3))
+
+print("\nTransform:")
+print(df.T)
+
+# -----------------------------------------------------------------------------
+#
+# Batch record fetching
+#
+# Note that since this particular example ends up with all query rows being
+# held in memory, it would be more efficient to use fetch_df_all() as shown
+# above.
+
+print("\nFetching in batches:")
+df = pandas.DataFrame()
+
+# Tune 'size' for your data set. Here it is small to show the batch fetch
+# behavior on the sample table.
+for odf in connection.fetch_df_batches(statement=SQL, size=10):
+    df_b = pyarrow.Table.from_arrays(
+        odf.column_arrays(), names=odf.column_names()
+    ).to_pandas()
+    print(f"Appending {df_b.shape[0]} rows")
+    df = pandas.concat([df, df_b], ignore_index=True)
+
+r, c = df.shape
+print(f"{r} rows, {c} columns")
+
+print("\nLast three rows:")
+print(df.tail(3))
diff --git a/python/python-oracledb/dataframe_pandas_async.py b/python/python-oracledb/dataframe_pandas_async.py
@@ -0,0 +1,109 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# This software is dual-licensed to you under the Universal Permissive License
+# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
+# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
+# either license.
+#
+# If you elect to accept the software under the Apache License, Version 2.0,
+# the following applies:
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# dataframe_pandas_async.py
+#
+# An asynchronous version of dataframe_pandas.py
+#
+# Shows how to use AsyncConnection.fetch_df_all() and
+# AsyncConnection.fetch_df_batches(). This example then creates Pandas
+# dataframes. Alternative dataframe libraries could be used similar to the
+# other, synchronous, data frame samples.
+# -----------------------------------------------------------------------------
+
+import asyncio
+
+import pandas
+import pyarrow
+
+import oracledb
+import sample_env
+
+
+async def main():
+    connection = await oracledb.connect_async(
+        user=sample_env.get_main_user(),
+        password=sample_env.get_main_password(),
+        dsn=sample_env.get_connect_string(),
+        params=sample_env.get_connect_params(),
+    )
+
+    SQL = "select id, name from SampleQueryTab order by id"
+
+    # -------------------------------------------------------------------------
+    #
+    # Fetching all records
+
+    # Get an OracleDataFrame.
+    # Adjust arraysize to tune the query fetch performance
+    odf = await connection.fetch_df_all(statement=SQL, arraysize=100)
+
+    # Get a Pandas DataFrame from the data
+    df = pyarrow.Table.from_arrays(
+        odf.column_arrays(), names=odf.column_names()
+    ).to_pandas()
+
+    # Perform various Pandas operations on the DataFrame
+
+    print("Columns:")
+    print(df.columns)
+
+    print("\nDataframe description:")
+    print(df.describe())
+
+    print("\nLast three rows:")
+    print(df.tail(3))
+
+    print("\nTransform:")
+    print(df.T)
+
+    # -------------------------------------------------------------------------
+    #
+    # Batch record fetching
+    #
+    # Note that since this particular example ends up with all query rows being
+    # held in memory, it would be more efficient to use fetch_df_all() as shown
+    # above.
+
+    print("\nFetching in batches:")
+    df = pandas.DataFrame()
+
+    # Tune 'size' for your data set. Here it is small to show the batch fetch
+    # behavior on the sample table.
+    async for odf in connection.fetch_df_batches(statement=SQL, size=10):
+        df_b = pyarrow.Table.from_arrays(
+            odf.column_arrays(), names=odf.column_names()
+        ).to_pandas()
+        print(f"Appending {df_b.shape[0]} rows")
+        df = pandas.concat([df, df_b], ignore_index=True)
+
+    r, c = df.shape
+    print(f"{r} rows, {c} columns")
+
+    print("\nLast three rows:")
+    print(df.tail(3))
+
+
+asyncio.run(main())
diff --git a/python/python-oracledb/dataframe_parquet_write.py b/python/python-oracledb/dataframe_parquet_write.py
@@ -0,0 +1,87 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# This software is dual-licensed to you under the Universal Permissive License
+# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
+# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
+# either license.
+#
+# If you elect to accept the software under the Apache License, Version 2.0,
+# the following applies:
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# dataframe_parquet_write.py
+#
+# Shows how to use connection.fetch_df_batches() to write files in Parquet
+# format.
+# -----------------------------------------------------------------------------
+
+import os
+
+import pyarrow
+import pyarrow.parquet as pq
+
+import oracledb
+import sample_env
+
+# determine whether to use python-oracledb thin mode or thick mode
+if not sample_env.get_is_thin():
+    oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
+
+connection = oracledb.connect(
+    user=sample_env.get_main_user(),
+    password=sample_env.get_main_password(),
+    dsn=sample_env.get_connect_string(),
+    params=sample_env.get_connect_params(),
+)
+
+PARQUET_FILE_NAME = "sample.parquet"
+
+if os.path.isfile(PARQUET_FILE_NAME):
+    os.remove(PARQUET_FILE_NAME)
+
+# Tune this for your query
+FETCH_BATCH_SIZE = 10
+
+SQL = "select id, name from SampleQueryTab order by id"
+pqwriter = None
+
+for odf in connection.fetch_df_batches(statement=SQL, size=FETCH_BATCH_SIZE):
+
+    pyarrow_table = pyarrow.Table.from_arrays(
+        arrays=odf.column_arrays(), names=odf.column_names()
+    )
+
+    if not pqwriter:
+        pqwriter = pq.ParquetWriter(PARQUET_FILE_NAME, pyarrow_table.schema)
+
+    print(f"Writing a batch of {odf.num_rows()} rows")
+    pqwriter.write_table(pyarrow_table)
+
+pqwriter.close()
+
+# -----------------------------------------------------------------------------
+# Check the file was created
+
+print("\nParquet file metadata:")
+print(pq.read_metadata(PARQUET_FILE_NAME))
+
+# -----------------------------------------------------------------------------
+# Read the file
+
+print("\nParquet file data:")
+t = pq.read_table(PARQUET_FILE_NAME, columns=["ID", "NAME"])
+print(t)