diff --git a/.asf.yaml b/.asf.yaml index 4cd9297c8a..70f6e97a28 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -21,16 +21,22 @@ github: collaborators: - krlmlr - nbenn - - ywc88 enabled_merge_buttons: merge: false rebase: false squash: true features: + discussions: true issues: true + labels: + - arrow + - database + protected_branches: + main: {} notifications: commits: commits@arrow.apache.org + discussions: user@arrow.apache.org issues_status: issues@arrow.apache.org issues: github@arrow.apache.org pullrequests: github@arrow.apache.org diff --git a/.codespell-dictionary b/.codespell-dictionary new file mode 100644 index 0000000000..3b5db1cc24 --- /dev/null +++ b/.codespell-dictionary @@ -0,0 +1 @@ +arrpw->arrow diff --git a/.codespell-ignore b/.codespell-ignore new file mode 100644 index 0000000000..b7c9e00a11 --- /dev/null +++ b/.codespell-ignore @@ -0,0 +1,13 @@ +afterall +checkin +collet +copys +errorprone +flate +fpr +ges +gir +hastables +re-use +te +thirdparty diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000000..775b8703ee --- /dev/null +++ b/.codespellrc @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[codespell] +dictionary = .codespell-dictionary,- +ignore-words = .codespell-ignore +skip = go/adbc/go.sum diff --git a/.env b/.env index d7ff7ad64d..f62b9c65b0 100644 --- a/.env +++ b/.env @@ -16,7 +16,7 @@ # under the License. # All of the following environment variables are required to set default values -# for the parameters in docker-compose.yml. +# for the parameters in compose.yaml. # Default repository to pull and push images from REPO=apache/arrow-dev @@ -28,23 +28,23 @@ ARCH_SHORT=amd64 ARCH_CONDA_FORGE=linux_64_ # Default versions for various dependencies -JDK=11 -MANYLINUX=2-28 -MAVEN=3.6.3 +JDK=21 +MANYLINUX=2014 +MAVEN=3.9.10 PLATFORM=linux/amd64 -PYTHON=3.9 -GO=1.22.4 -ARROW_MAJOR_VERSION=14 +PYTHON=3.13 +GO=1.24.1 +ARROW_MAJOR_VERSION=18 DOTNET=8.0 -# Used through docker-compose.yml and serves as the default version for the +# Used through compose.yaml and serves as the default version for the # ci/scripts/install_vcpkg.sh script. Keep in sync with apache/arrow .env. # When updating, also update the docs, which list the version of libpq/SQLite # that vcpkg (and hence our wheels) ship -VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" +VCPKG="4334d8b4c8916018600212ab4dd4bbdc343065d1" # 2025.09.17 Release # These are used to tell tests where to find services for integration testing. -# They are valid if the services are started with the docker-compose config. +# They are valid if the services are started with the compose config. ADBC_DREMIO_FLIGHTSQL_PASS=dremio123 ADBC_DREMIO_FLIGHTSQL_URI=grpc+tcp://localhost:32010 ADBC_DREMIO_FLIGHTSQL_USER=dremio diff --git a/.gitattributes b/.gitattributes index f885677cbf..3ccebae19a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15,9 +15,13 @@ # specific language governing permissions and limitations # under the License. +*.stdout.txt linguist-generated c/vendor/* linguist-vendored -go/adbc/drivermgr/adbc.h linguist-generated +go/adbc/drivermgr/arrow-adbc/adbc.h linguist-generated +go/adbc/drivermgr/arrow-adbc/adbc_driver_manager.h linguist-generated go/adbc/drivermgr/adbc_driver_manager.cc linguist-generated +go/adbc/drivermgr/current_arch.h linguist-generated +go/adbc/pkg/bigquery/* linguist-generated go/adbc/pkg/flightsql/* linguist-generated go/adbc/pkg/panicdummy/* linguist-generated go/adbc/pkg/snowflake/* linguist-generated diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1dd38f0869..402484a42d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -27,7 +27,7 @@ adbc.h @lidavidm ## Implementations /c/ @lidavidm -/csharp/ @lidavidm @CurtHagenlocher +/csharp/ @CurtHagenlocher /glib/ @kou /java/ @lidavidm /go/ @zeroshade diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 65c5410864..4f320f8d12 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -29,6 +29,16 @@ updates: interval: "weekly" commit-message: prefix: "chore(go/adbc): " + groups: + "golang.org/x": + applies-to: version-updates + patterns: + - "golang.org/x/*" + opentelemetry: + applies-to: version-updates + patterns: + - "go.opentelemetry.io/otel" + - "go.opentelemetry.io/otel/*" - package-ecosystem: "maven" directory: "/java/" schedule: @@ -51,3 +61,15 @@ updates: - dependency-name: "System.*" update-types: - "version-update:semver-major" + - package-ecosystem: "cargo" + directory: "/rust/" + schedule: + interval: "weekly" + commit-message: + prefix: "chore(rust): " + groups: + arrow-datafusion: + applies-to: version-updates + patterns: + - "arrow-*" + - "datafusion*" diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index b2f643343c..e86da725e3 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -32,7 +32,7 @@ jobs: if: github.event.comment.body == 'take' runs-on: ubuntu-latest steps: - - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 08d62e7a30..0546a88073 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -36,6 +36,11 @@ concurrency: permissions: contents: read +defaults: + run: + # 'bash' will expand to -eo pipefail + shell: bash + jobs: csharp: name: "C# ${{ matrix.os }} ${{ matrix.dotnet }}" @@ -46,20 +51,17 @@ jobs: fail-fast: false matrix: dotnet: ['8.0.x'] - os: [ubuntu-latest, windows-2019, macos-13, macos-latest] + os: [ubuntu-latest, windows-2022, macos-15-intel, macos-latest] steps: - name: Install C# - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v5 with: dotnet-version: ${{ matrix.dotnet }} - name: Checkout ADBC - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 submodules: recursive - - name: Install Source Link - shell: bash - run: dotnet tool install --global sourcelink - name: Build shell: bash run: ci/scripts/csharp_build.sh $(pwd) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 88bcf119c7..b192fe8d0b 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -28,21 +28,29 @@ concurrency: permissions: contents: read +defaults: + run: + # 'bash' will expand to -eo pipefail + shell: bash + jobs: pre-commit: name: "pre-commit" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-go@v5 + - name: Get required Go version + run: | + (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV + - uses: actions/setup-go@v6 with: - go-version-file: 'go/adbc/go.mod' + go-version: "${{ env.GO_VERSION }}" check-latest: true - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: '3.x' - name: install golangci-lint diff --git a/.github/workflows/dev_adbc.yml b/.github/workflows/dev_adbc.yml new file mode 100644 index 0000000000..8b94610afc --- /dev/null +++ b/.github/workflows/dev_adbc.yml @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Dev ADBC + +on: + pull_request: + branches: + - main + paths: + - "dev/**" + - ".github/workflows/dev_adbc.yml" + push: + paths: + - "dev/**" + - ".github/workflows/dev_adbc.yml" + +concurrency: + group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + contents: read + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + pre-commit: + name: "pre-commit" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Cache Conda + uses: actions/cache@v4 + with: + path: ~/conda_pkgs_dir + key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 + with: + miniforge-version: latest + use-only-tar-bz2: false + use-mamba: true + + - name: Install Dependencies + run: | + mamba install -c conda-forge \ + --file ci/conda_env_dev.txt \ + pytest + + - name: Test + run: | + pytest -vv dev/adbc_dev/ diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index fec7499d74..51f3b8457d 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -31,12 +31,17 @@ permissions: issues: write pull-requests: write +defaults: + run: + # 'bash' will expand to -eo pipefail + shell: bash + jobs: process: name: Process runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: persist-credentials: false @@ -47,7 +52,7 @@ jobs: run: | ./.github/workflows/dev_pr/milestone.sh "${GITHUB_REPOSITORY}" ${{github.event.number}} - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 path: pr_checkout @@ -58,11 +63,15 @@ jobs: env: PR_TITLE: ${{ github.event.pull_request.title }} run: | - python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout "$PR_TITLE" + python dev/adbc_dev/title_check.py $(pwd)/pr_checkout "$PR_TITLE" # Pings make it into the commit message where they annoy the user every # time the commit gets pushed somewhere - name: Check PR body for pings + # Don't check this if it's dependabot - dependabot doesn't ping users + # and it tends to include things that look like pings (but are + # escaped) + if: github.actor != 'dependabot[bot]' env: PR_BODY: ${{ github.event.pull_request.body }} run: | diff --git a/.github/workflows/dev_pr/body_check.py b/.github/workflows/dev_pr/body_check.py old mode 100644 new mode 100755 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 352f3cca30..01c508647d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -28,6 +28,8 @@ on: - "python/**" - ".github/workflows/integration.yml" push: + branches-ignore: + - 'dependabot/**' paths: - "c/**" - "ci/**" @@ -46,18 +48,21 @@ env: # Increment this to reset cache manually CACHE_NUMBER: "0" +defaults: + run: + shell: bash -l -eo pipefail {0} + jobs: duckdb: name: "DuckDB Integration Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -65,22 +70,19 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} run: | sudo sysctl vm.mmap_rnd_bits=28 - name: Build DuckDB Integration Tests - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" @@ -92,7 +94,6 @@ jobs: run: | ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" - name: Run DuckDB Integration Tests - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_INTEGRATION_DUCKDB: "1" @@ -103,7 +104,7 @@ jobs: name: "FlightSQL Integration Tests (Dremio and SQLite)" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -112,7 +113,6 @@ jobs: (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -120,37 +120,33 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt pip install pytest-error-for-skips - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} run: | sudo sysctl vm.mmap_rnd_bits=28 - name: Start SQLite server and Dremio - shell: bash -l {0} run: | docker compose up --detach flightsql-test flightsql-sqlite-test dremio dremio-init pip install python-dotenv[cli] python -m dotenv -f .env list --format simple | tee -a $GITHUB_ENV - name: Build FlightSQL Driver - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_FLIGHTSQL: "1" @@ -159,7 +155,6 @@ jobs: ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" ./ci/scripts/python_build.sh "$(pwd)" "$(pwd)/build" - name: Test FlightSQL Driver against Dremio and SQLite - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_FLIGHTSQL: "1" @@ -167,7 +162,6 @@ jobs: ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" ./ci/scripts/cpp_test.sh "$(pwd)/build" - name: Test Python Flight SQL driver against Dremio - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_FLIGHTSQL: "1" @@ -175,7 +169,6 @@ jobs: run: | ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" - name: Stop SQLite server and Dremio - shell: bash -l {0} run: | docker compose down @@ -183,13 +176,12 @@ jobs: name: "PostgreSQL Integration Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -197,24 +189,21 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt pip install pytest-error-for-skips - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} run: | sudo sysctl vm.mmap_rnd_bits=28 - name: Build PostgreSQL Driver - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" @@ -223,70 +212,53 @@ jobs: ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" ./ci/scripts/python_build.sh "$(pwd)" "$(pwd)/build" - name: Configure Integration Env Vars - shell: bash -l {0} run: | pip install python-dotenv[cli] python -m dotenv -f .env list --format simple | tee -a $GITHUB_ENV echo "ADBC_USE_ASAN=ON" >> $GITHUB_ENV echo "ADBC_USE_UBSAN=ON" >> $GITHUB_ENV - - name: Test PostgreSQL Driver - postgres 11 - shell: bash -l {0} - env: - BUILD_ALL: "0" - BUILD_DRIVER_POSTGRESQL: "1" - PYTEST_ADDOPTS: "--error-for-skips" - run: | - env POSTGRES_VERSION=11 docker compose up --wait --detach postgres-test - ./ci/scripts/cpp_test.sh "$(pwd)/build" - ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" - docker compose down - - name: Test PostgreSQL Driver - postgres 12 - shell: bash -l {0} + - name: Test PostgreSQL Driver - postgres 13 env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" PYTEST_ADDOPTS: "--error-for-skips" run: | - env POSTGRES_VERSION=12 docker compose up --wait --detach postgres-test + env POSTGRES_VERSION=13 docker compose up --wait --detach postgres-test ./ci/scripts/cpp_test.sh "$(pwd)/build" ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" docker compose down - - name: Test PostgreSQL Driver - postgres 13 - shell: bash -l {0} + - name: Test PostgreSQL Driver - postgres 14 env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" PYTEST_ADDOPTS: "--error-for-skips" run: | - env POSTGRES_VERSION=13 docker compose up --wait --detach postgres-test + env POSTGRES_VERSION=14 docker compose up --wait --detach postgres-test ./ci/scripts/cpp_test.sh "$(pwd)/build" ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" docker compose down - - name: Test PostgreSQL Driver - postgres 14 - shell: bash -l {0} + - name: Test PostgreSQL Driver - postgres 15 env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" PYTEST_ADDOPTS: "--error-for-skips" run: | - env POSTGRES_VERSION=14 docker compose up --wait --detach postgres-test + env POSTGRES_VERSION=15 docker compose up --wait --detach postgres-test ./ci/scripts/cpp_test.sh "$(pwd)/build" ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" docker compose down - - name: Test PostgreSQL Driver - postgres 15 - shell: bash -l {0} + - name: Test PostgreSQL Driver - postgres 16 env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" PYTEST_ADDOPTS: "--error-for-skips" run: | - env POSTGRES_VERSION=15 docker compose up --wait --detach postgres-test + env POSTGRES_VERSION=16 docker compose up --wait --detach postgres-test ./ci/scripts/cpp_test.sh "$(pwd)/build" ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" docker compose down - - name: Test PostgreSQL Driver - postgres 16 - shell: bash -l {0} + - name: Test PostgreSQL Driver - postgres 17 env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" @@ -301,7 +273,7 @@ jobs: name: "Snowflake Integration Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -310,7 +282,6 @@ jobs: (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -318,29 +289,26 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} run: | sudo sysctl vm.mmap_rnd_bits=28 - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - name: Build and Test Snowflake Driver - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_SNOWFLAKE: "1" @@ -349,7 +317,6 @@ jobs: ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" ./ci/scripts/cpp_test.sh "$(pwd)/build" - name: Build and Test Snowflake Driver (Python) - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" @@ -358,3 +325,36 @@ jobs: run: | ./ci/scripts/python_build.sh "$(pwd)" "$(pwd)/build" env BUILD_DRIVER_MANAGER=0 ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" + + flightsql_interop: + name: "FlightSQL C# Interop" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + persist-credentials: false + - name: Get required Go version + run: | + (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV + - uses: actions/setup-go@v6 + with: + go-version: "${{ env.GO_VERSION }}" + check-latest: true + cache: true + cache-dependency-path: go/adbc/go.sum + - name: Build ADBC Driver + working-directory: go/adbc/pkg + run: | + make libadbc_driver_flightsql.so + - name: Start Test Servers + run: | + docker compose up --wait --detach spiceai-test + - name: Test Driver against Spice.ai OSS + env: + FLIGHTSQL_INTEROP_TEST_CONFIG_FILE: "../../../../../csharp/configs/flightsql-spiceai.json" + run: | + dotnet test ./csharp/test/Drivers/Interop/FlightSql/Apache.Arrow.Adbc.Tests.Drivers.Interop.FlightSql.csproj + - name: Stop Test Servers + run: | + docker compose down diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index e2457f29c2..47c1cf7528 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -25,6 +25,8 @@ on: - "java/**" - ".github/workflows/java.yml" push: + branches-ignore: + - 'dependabot/**' paths: - "java/**" - ".github/workflows/java.yml" @@ -36,26 +38,29 @@ concurrency: permissions: contents: read +defaults: + run: + shell: bash -l -eo pipefail {0} + jobs: java: name: "Java ${{ matrix.java }}/Linux" runs-on: ubuntu-latest strategy: matrix: - java: ['11', '17', '21', '22'] + java: ['11', '17', '21', '24'] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false submodules: recursive - - uses: actions/setup-java@v4 + - uses: actions/setup-java@v5 with: cache: "maven" distribution: "temurin" java-version: ${{ matrix.java }} - name: Start test services - shell: bash -l {0} run: | docker compose up --detach --wait flightsql-sqlite-test mssql-test postgres-test cat .env | grep -v -e '^#' | awk NF | tee -a $GITHUB_ENV @@ -71,12 +76,12 @@ jobs: matrix: java: ['17', '21'] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false submodules: recursive - - uses: actions/setup-java@v4 + - uses: actions/setup-java@v5 with: cache: "maven" distribution: "temurin" @@ -99,3 +104,133 @@ jobs: HERE # Our linters are compile-time, no point re-running tests mvn -P errorprone install -DskipTests + + java-jni-artifacts: + name: "JNI Libraries/${{ matrix.os }} ${{ matrix.arch }}" + runs-on: ${{ matrix.runner }} + strategy: + matrix: + include: + - { os: Linux, arch: amd64, vcpkg_arch: x64, runner: ubuntu-latest } + - { os: macOS, arch: arm64v8, vcpkg_arch: arm64, runner: macos-latest } + env: + VCPKG_ROOT: "${{ github.workspace }}/vcpkg" + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + persist-credentials: false + - uses: actions/setup-java@v5 + with: + cache: "maven" + distribution: "temurin" + java-version: 11 + - name: Retrieve Go, VCPKG version from .env + run: | + (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV + (. .env && echo "VCPKG_VERSION=${VCPKG}") >> $GITHUB_ENV + - name: Install vcpkg + run: | + ./ci/scripts/install_vcpkg.sh $VCPKG_ROOT $VCPKG_VERSION + - uses: actions/setup-go@v6 + with: + go-version: "${{ env.GO_VERSION }}" + check-latest: true + cache: true + cache-dependency-path: adbc/go/adbc/go.sum + - name: Install Homebrew dependencies + if: matrix.os == 'macOS' + run: brew install autoconf bash pkg-config ninja + - name: Build artifacts + if: matrix.os != 'macOS' + run: | + docker compose run python-wheel-manylinux-build + - name: Build artifacts (macOS) + if: matrix.os == 'macOS' + run: | + # XXX(https://github.com/apache/arrow-adbc/issues/3382) + sudo xcode-select -s "/Applications/Xcode_16.app" + + export ADBC_BUILD_STATIC=ON + export ADBC_BUILD_TESTS=OFF + export ADBC_USE_ASAN=OFF + export ADBC_USE_UBSAN=OFF + ./ci/scripts/python_wheel_unix_build.sh ${{ matrix.arch }} $(pwd) $(pwd)/build + - name: Build JNI artifacts + run: | + export ADBC_BUILD_STATIC=ON + export ADBC_BUILD_TESTS=OFF + export ADBC_USE_ASAN=OFF + export ADBC_USE_UBSAN=OFF + ./ci/scripts/java_build.sh $(pwd) + ./ci/scripts/java_jni_build.sh $(pwd) $(pwd)/build_jni $(pwd)/build/${{ matrix.vcpkg_arch }} + - name: Assemble artifacts + run: | + mkdir artifacts + cp -r java/driver/jni/src/main/resources/ artifacts/jni + cp -r build/${{ matrix.vcpkg_arch }}/lib artifacts/driver + ls -laR artifacts + mv artifacts artifacts-${{ matrix.os }}-${{ matrix.arch }} + tar czf artifacts-${{ matrix.os }}-${{ matrix.arch }}.tgz artifacts-${{ matrix.os }}-${{ matrix.arch }} + - uses: actions/upload-artifact@v4 + with: + name: jni-artifacts-${{ matrix.os }}-${{ matrix.arch }} + retention-days: 7 + path: | + artifacts-${{ matrix.os }}-${{ matrix.arch }}.tgz + - name: Assemble logs + if: failure() + run: | + mkdir ~/logs + find "$VCPKG_ROOT" -name 'build-*.log' -exec cp '{}' ~/logs ';' + - name: Upload failure logs + if: failure() + uses: actions/upload-artifact@v4 + with: + name: jni-artifacts-${{ matrix.os }}-${{ matrix.arch }} + retention-days: 7 + path: ~/logs + + java-jni: + name: "Java ${{ matrix.java }} JNI/${{ matrix.os }} ${{ matrix.arch }}" + runs-on: ${{ matrix.runner }} + needs: + - java-jni-artifacts + strategy: + matrix: + java: ['11', '24'] + include: + - { os: Linux, arch: amd64, vcpkg_arch: x64, runner: ubuntu-latest } + - { os: macOS, arch: arm64v8, vcpkg_arch: arm64, runner: macos-latest } + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + persist-credentials: false + submodules: recursive + - uses: actions/download-artifact@v5 + with: + path: artifacts + pattern: jni-artifacts-* + merge-multiple: true + - uses: actions/setup-java@v5 + with: + cache: "maven" + distribution: "temurin" + java-version: 11 + - name: Build/Test + run: | + set -x + pushd artifacts + for archive in artifacts*.tgz; do + tar xvf $archive + done + popd + cp -r artifacts/*/jni/adbc_driver_jni java/driver/jni/src/main/resources + for driver in artifacts/*/driver; do + export LD_LIBRARY_PATH=$(pwd)/$driver:${LD_LIBRARY_PATH:-} + export DYLD_LIBRARY_PATH=$(pwd)/$driver:${DYLD_LIBRARY_PATH:-} + done + env BUILD_JNI=ON ./ci/scripts/java_build.sh $(pwd) + cd java + mvn -B -Pjni test -pl :adbc-driver-jni diff --git a/.github/workflows/native-unix.yml b/.github/workflows/native-unix.yml index 770fd2a689..3c9d83cc30 100644 --- a/.github/workflows/native-unix.yml +++ b/.github/workflows/native-unix.yml @@ -31,6 +31,8 @@ on: - "ruby/**" - ".github/workflows/native-unix.yml" push: + branches-ignore: + - 'dependabot/**' paths: - "c/**" - "ci/**" @@ -49,6 +51,10 @@ concurrency: permissions: contents: read +defaults: + run: + shell: bash -l -eo pipefail {0} + env: # Increment this to reset cache manually CACHE_NUMBER: "1" @@ -62,9 +68,9 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest"] include: - - os: macos-13 + - os: macos-15-intel goarch: x64 - os: macos-latest goarch: arm64 @@ -75,7 +81,7 @@ jobs: # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -84,7 +90,6 @@ jobs: (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -92,17 +97,16 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true @@ -110,7 +114,6 @@ jobs: cache-dependency-path: go/adbc/go.sum - name: Build and Install (No ASan) - shell: bash -l {0} run: | # Python and others need something that don't use the ASAN runtime rm -rf "$(pwd)/build" @@ -122,7 +125,6 @@ jobs: export ADBC_CMAKE_ARGS="-DCMAKE_UNITY_BUILD=ON" ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Go Build - shell: bash -l {0} env: CGO_ENABLED: "1" run: | @@ -135,7 +137,6 @@ jobs: # https://github.com/actions/download-artifact/issues/346 - name: tar artifacts - shell: bash -l {0} run: | cd tar czf ~/local.tgz local @@ -155,19 +156,18 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest"] env: # Required for macOS # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -175,34 +175,33 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 - name: Build - shell: bash -l {0} env: ADBC_CMAKE_ARGS: "-DCMAKE_UNITY_BUILD=ON" BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" BUILD_DRIVER_POSTGRESQL: "1" BUILD_DRIVER_SQLITE: "1" + ADBC_DRIVER_MANAGER_USER_CONFIG_TEST: "1" run: | + # Ensure the CONDA_PREFIX searching in driver manager is tested + export CONDA_BUILD=1 ./ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" - name: Test - shell: bash -l {0} env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" @@ -220,7 +219,7 @@ jobs: matrix: os: ["ubuntu-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -231,20 +230,19 @@ jobs: - name: Get required Go version run: | (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.x' - name: Install Meson via Python run: pip install meson - name: Start SQLite server, Dremio, and postgresql - shell: bash -l {0} run: | env POSTSGRES_VERSION=16 docker compose up --detach --wait \ dremio \ @@ -257,20 +255,26 @@ jobs: - name: Build run: | meson setup \ - -Db_sanitize=address,undefined \ - -Ddriver_manager=true \ - -Dflightsql=true \ - -Dpostgresql=true \ - -Dsnowflake=true \ - -Dsqlite=true \ - -Dtests=true \ + -Dauto_features=enabled \ c c/build meson compile -C c/build - name: Test run: | meson test -C c/build --print-errorlogs + - name: Build with sanitizers + run: | + # skip bigquery and flightsql for now; see GH-2744 + meson setup \ + -Db_sanitize=address,undefined \ + -Dauto_features=enabled \ + -Dbigquery=disabled \ + -Dflightsql=disabled \ + c c/build + meson compile -C c/build + - name: Test with sanitizers + run: | + meson test -C c/build --print-errorlogs - name: Stop SQLite server, Dremio, and postgresql - shell: bash -l {0} run: | docker compose down @@ -278,13 +282,12 @@ jobs: name: "clang-tidy" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -292,20 +295,18 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_cpp_lint.txt - name: clang-tidy - shell: bash -l {0} run: | ./ci/scripts/cpp_clang_tidy.sh "$(pwd)" "$(pwd)/build" @@ -321,19 +322,18 @@ jobs: matrix: # N.B. no macos-latest here since conda-forge does not package # arrow-c-glib for M1 - os: ["macos-13", "ubuntu-latest"] + os: ["macos-15-intel", "ubuntu-latest"] env: # Required for macOS # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -341,44 +341,50 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ 'arrow-c-glib>=10.0.1' \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_glib.txt - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: driver-manager-${{ matrix.os }} path: "~" - name: untar artifacts - shell: bash -l {0} run: | cd tar xvf ~/local.tgz - name: Build GLib Driver Manager - shell: bash -l {0} run: | env BUILD_ALL=0 BUILD_DRIVER_MANAGER=1 ./ci/scripts/glib_build.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Test GLib/Ruby Driver Manager - shell: bash -l {0} run: | env BUILD_ALL=0 BUILD_DRIVER_MANAGER=1 ./ci/scripts/glib_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" + - name: Search for build logs + if: failure() + run: | + for log in $(find glib -type f | grep mkmf.log); do + echo ============================================================ + echo $log + cat $log + echo ============================================================ + done + + # ------------------------------------------------------------ # Go # ------------------------------------------------------------ @@ -389,27 +395,31 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest", "windows-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest", "windows-latest"] permissions: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - - uses: 'google-github-actions/auth@v2' + - uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3.0.0 continue-on-error: true # if auth fails, bigquery driver tests should skip with: workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} - - uses: actions/setup-go@v5 - with: - go-version-file: "go/adbc/go.mod" + # XXX: https://github.com/actions/setup-go/issues/457 + # We can't use go-version-file for setup-go because it doesn't understand + # the 'toolchain' parameter and staticcheck tends to need the latest Go + - id: toolchain + run: echo "GO_VERSION=$(sed -n 's/^toolchain go//p' go/adbc/go.mod)" >> "$GITHUB_OUTPUT" + - uses: actions/setup-go@v6 + with: + go-version: ${{ steps.toolchain.outputs.GO_VERSION }} check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 @@ -426,7 +436,7 @@ jobs: popd - name: Go Test env: - SNOWFLAKE_DATABASE: ADBC_TESTING + SNOWFLAKE_DATABASE: ARROW_OSS_DB SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} run: | ./ci/scripts/go_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" @@ -438,9 +448,9 @@ jobs: - drivers-build-conda strategy: matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest"] include: - - os: macos-13 + - os: macos-15-intel goarch: x64 - os: macos-latest goarch: arm64 @@ -452,11 +462,11 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - - uses: 'google-github-actions/auth@v2' + - uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3.0.0 continue-on-error: true # if auth fails, bigquery driver tests should skip with: workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} @@ -465,7 +475,6 @@ jobs: (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -473,58 +482,51 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ --file ci/conda_env_cpp.txt - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - name: Install staticcheck - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: go install honnef.co/go/tools/cmd/staticcheck@latest - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: driver-manager-${{ matrix.os }} path: "~" - name: untar artifacts - shell: bash -l {0} run: | cd tar xvf ~/local.tgz - name: Go Build - shell: bash -l {0} run: | export PATH=$RUNNER_TOOL_CACHE/go/${GO_VERSION}/${{ matrix.goarch }}/bin:$PATH ./ci/scripts/go_build.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Run Staticcheck if: matrix.os == 'ubuntu-latest' - shell: bash -l {0} run: | pushd go/adbc staticcheck -f stylish ./... popd - name: Go Test - shell: bash -l {0} env: SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} run: | @@ -541,14 +543,14 @@ jobs: - drivers-build-conda strategy: matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest"] - python: ["3.9", "3.12"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest"] + python: ["3.10", "3.13"] env: # Required for macOS # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk CXXFLAGS: "-D_LIBCPP_DISABLE_AVAILABILITY" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -557,7 +559,6 @@ jobs: (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -565,43 +566,39 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ python=${{ matrix.python }} \ --file ci/conda_env_cpp.txt \ --file ci/conda_env_python.txt - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: driver-manager-${{ matrix.os }} path: "~" - name: untar artifacts - shell: bash -l {0} run: | cd tar xvf ~/local.tgz - name: Build - shell: bash -l {0} env: BUILD_ALL: "1" ADBC_USE_ASAN: "0" @@ -610,7 +607,6 @@ jobs: export PATH=$RUNNER_TOOL_CACHE/go/${GO_VERSION}/x64/bin:$PATH ./ci/scripts/python_build.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Build Panic Dummy - shell: bash -l {0} run: | export PATH=$RUNNER_TOOL_CACHE/go/${GO_VERSION}/x64/bin:$PATH if [[ $(uname) = "Darwin" ]]; then @@ -619,7 +615,6 @@ jobs: make -C ./go/adbc/pkg libadbc_driver_panicdummy.so fi - name: Test Python Driver Manager - shell: bash -l {0} run: | if [[ $(uname) = "Darwin" ]]; then export PANICDUMMY_LIBRARY_PATH=$(pwd)/go/adbc/pkg/libadbc_driver_panicdummy.dylib @@ -629,7 +624,6 @@ jobs: export PATH=$RUNNER_TOOL_CACHE/go/${GO_VERSION}/x64/bin:$PATH env BUILD_ALL=0 BUILD_DRIVER_MANAGER=1 ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Test Python Driver Flight SQL - shell: bash -l {0} run: | export PATH=$RUNNER_TOOL_CACHE/go/${GO_VERSION}/x64/bin:$PATH # Can't use Docker on macOS @@ -647,21 +641,24 @@ jobs: env BUILD_ALL=0 BUILD_DRIVER_FLIGHTSQL=1 ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" kill %1 - name: Test Python Driver PostgreSQL - shell: bash -l {0} run: | env BUILD_ALL=0 BUILD_DRIVER_POSTGRESQL=1 ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Test Python Driver SQLite - shell: bash -l {0} run: | env BUILD_ALL=0 BUILD_DRIVER_SQLITE=1 ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Test Python Driver Snowflake - shell: bash -l {0} run: | env BUILD_ALL=0 BUILD_DRIVER_SNOWFLAKE=1 ./ci/scripts/python_test.sh "$(pwd)" "$(pwd)/build" "$HOME/local" - name: Typecheck Python - shell: bash -l {0} run: | ./ci/scripts/python_typecheck.sh "$(pwd)" + - name: Run Python Docker-based integration tests + if: runner.os == 'Linux' + run: | + # Self-contained tests using docker-compose + + # Test that the driver manager can load manifests installed into a venv + docker compose run python-venv python-docs: name: "Documentation ${{ matrix.python }} (Conda/${{ matrix.os }})" @@ -671,15 +668,14 @@ jobs: strategy: matrix: os: ["ubuntu-latest"] - python: ["3.12"] + python: ["3.13"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Get Date id: get-date - shell: bash run: | echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT - name: Cache Conda @@ -687,37 +683,33 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false use-mamba: true - name: Install Dependencies - shell: bash -l {0} run: | - mamba install -c conda-forge \ + ./ci/scripts/remamba.sh install -c conda-forge \ python=${{ matrix.python }} \ --file ci/conda_env_docs.txt \ --file ci/conda_env_python.txt - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: driver-manager-${{ matrix.os }} path: "~" - name: untar artifacts - shell: bash -l {0} run: | cd tar xvf ~/local.tgz - name: Build Python - shell: bash -l {0} env: BUILD_ALL: "1" ADBC_USE_ASAN: "0" @@ -726,7 +718,6 @@ jobs: ./ci/scripts/python_build.sh "$(pwd)" "$(pwd)/build" "$HOME/local" # Docs requires Python packages since it runs doctests - name: Build Docs - shell: bash -l {0} run: | ./ci/scripts/docs_build.sh "$(pwd)" - name: Archive docs @@ -737,16 +728,25 @@ jobs: path: | docs/build/html - name: Configure Integration Env Vars - shell: bash -l {0} run: | pip install python-dotenv[cli] python -m dotenv -f .env list --format simple | tee -a $GITHUB_ENV - name: Test Recipes (C++) - shell: bash -l {0} run: | ./ci/scripts/cpp_recipe.sh $(pwd) ~/local build/recipe + - name: Ensure recipes are up to date + run: | + pip install -e ./docs/source/ext/sphinx_recipe + # Exits 1 if any recipes were updated + python -m sphinx_recipe.update_output \ + docs/source/cpp/recipe/*.cc \ + docs/source/cpp/recipe_driver/driver_example.py \ + docs/source/python/recipe/*.py - name: Test Recipes (Python) - shell: bash -l {0} run: | - docker compose up --detach --wait dremio dremio-init flightsql-sqlite-test postgres-test + docker compose up --detach --wait dremio flightsql-sqlite-test postgres-test + docker compose run --rm dremio-init + export ADBC_CPP_RECIPE_BIN=~/local/bin + # Needed for the combined C++/Python driver example + export LD_LIBRARY_PATH=~/local/lib pytest -vvs docs/source/tests/ diff --git a/.github/workflows/native-windows.yml b/.github/workflows/native-windows.yml index 9fb3caf228..b2bf4d9310 100644 --- a/.github/workflows/native-windows.yml +++ b/.github/workflows/native-windows.yml @@ -30,6 +30,8 @@ on: - "ruby/**" - ".github/workflows/native-windows.yml" push: + branches-ignore: + - 'dependabot/**' paths: - "c/**" - "ci/**" @@ -47,6 +49,10 @@ concurrency: permissions: contents: read +defaults: + run: + shell: pwsh + jobs: # ------------------------------------------------------------ # Common build (builds libraries used in GLib, Python, Ruby) @@ -58,7 +64,7 @@ jobs: matrix: os: ["windows-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -75,12 +81,11 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-mamba: true - name: Install Dependencies - shell: pwsh run: | mamba install -c conda-forge ` --file ci\conda_env_cpp.txt @@ -88,13 +93,13 @@ jobs: mamba uninstall gtest - name: Build and Install (No ASan) - shell: pwsh env: BUILD_ALL: "1" # TODO(apache/arrow-adbc#634) BUILD_DRIVER_BIGQUERY: "0" BUILD_DRIVER_FLIGHTSQL: "0" BUILD_DRIVER_SNOWFLAKE: "0" + BUILD_DRIVER_MANAGER_USER_CONFIG_TEST: "1" run: | .\ci\scripts\cpp_build.ps1 $pwd ${{ github.workspace }}\build @@ -115,7 +120,7 @@ jobs: matrix: os: ["windows-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -132,12 +137,11 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-mamba: true - name: Install Dependencies - shell: pwsh run: | mamba install -c conda-forge ` --file ci\conda_env_cpp.txt @@ -145,31 +149,28 @@ jobs: mamba uninstall gtest - name: Build Driver Manager - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" + BUILD_DRIVER_MANAGER_USER_CONFIG_TEST: "1" run: .\ci\scripts\cpp_build.ps1 $pwd $pwd\build - name: Build Driver PostgreSQL - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" run: .\ci\scripts\cpp_build.ps1 $pwd $pwd\build - name: Build Driver SQLite - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_SQLITE: "1" run: .\ci\scripts\cpp_build.ps1 $pwd $pwd\build - name: Test Driver Manager - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" + BUILD_DRIVER_MANAGER_USER_CONFIG_TEST: "1" run: .\ci\scripts\cpp_test.ps1 $pwd\build - name: Test Driver SQLite - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_SQLITE: "1" @@ -187,7 +188,7 @@ jobs: matrix: os: ["windows-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -208,29 +209,27 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-mamba: true - name: Install Dependencies - shell: pwsh run: | mamba install -c conda-forge ` --file ci\conda_env_cpp.txt - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: go/adbc/go.sum - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: driver-manager-${{ matrix.os }} path: ${{ github.workspace }}/build - name: Go Build - shell: pwsh env: CGO_ENABLED: "1" run: | @@ -238,7 +237,6 @@ jobs: .\ci\scripts\go_build.ps1 $pwd $pwd\build # TODO(apache/arrow#358): enable these tests on Windows # - name: Go Test - # shell: pwsh # env: # CGO_ENABLED: "1" # run: | @@ -256,9 +254,9 @@ jobs: strategy: matrix: os: ["windows-latest"] - python: ["3.9", "3.11"] + python: ["3.10", "3.13"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -275,55 +273,48 @@ jobs: with: path: ~/conda_pkgs_dir key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-mamba: true - name: Install Dependencies - shell: pwsh run: | mamba install -c conda-forge ` python=${{ matrix.python }} ` --file ci\conda_env_cpp.txt ` --file ci\conda_env_python.txt - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: driver-manager-${{ matrix.os }} path: ${{ github.workspace }}/build - name: Build Python Driver Manager - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" run: .\ci\scripts\python_build.ps1 $pwd $pwd\build - name: Build Python Driver PostgreSQL - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" run: .\ci\scripts\python_build.ps1 $pwd $pwd\build - name: Build Python Driver SQLite - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_SQLITE: "1" run: .\ci\scripts\python_build.ps1 $pwd $pwd\build - name: Test Python Driver Manager - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_MANAGER: "1" run: .\ci\scripts\python_test.ps1 $pwd $pwd\build - name: Test Python Driver PostgreSQL - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_POSTGRESQL: "1" run: .\ci\scripts\python_test.ps1 $pwd $pwd\build - name: Test Python Driver SQLite - shell: pwsh env: BUILD_ALL: "0" BUILD_DRIVER_SQLITE: "1" diff --git a/.github/workflows/nightly-verify.yml b/.github/workflows/nightly-verify.yml index ae6d7f87da..33f6fafc4b 100644 --- a/.github/workflows/nightly-verify.yml +++ b/.github/workflows/nightly-verify.yml @@ -23,6 +23,8 @@ on: - main paths: - ".github/workflows/nightly-verify.yml" + - "ci/docker/cpp-clang-latest.dockerfile" + - "ci/docker/cpp-gcc-latest.dockerfile" - "dev/release/verify-release-candidate.sh" - "dev/release/verify-release-candidate.ps1" schedule: @@ -32,27 +34,34 @@ on: permissions: contents: read +defaults: + run: + # 'bash' will expand to -eo pipefail + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: source: # For cron: only run on the main repo, not forks if: github.event_name != 'schedule' || github.repository == 'apache/arrow-adbc' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false submodules: recursive - name: Prepare version - shell: bash run: | VERSION=$(grep 'set(ADBC_VERSION' c/cmake_modules/AdbcVersion.cmake | \ grep -E -o '[0-9]+\.[0-9]+\.[0-9]+') echo "VERSION=${VERSION}" >> $GITHUB_ENV - name: Create archive - shell: bash run: | git config --global user.name 'github-actions[bot]' git config --global user.email 'github-actions[bot]@users.noreply.github.com' @@ -64,7 +73,6 @@ jobs: apache-arrow-adbc-${VERSION}-rc0 - name: Create fake GPG key - shell: bash run: | gpg \ --quick-gen-key \ @@ -80,7 +88,6 @@ jobs: --export >> KEYS - name: Create sum/signature - shell: bash run: | gpg \ --armor \ @@ -109,32 +116,30 @@ jobs: strategy: fail-fast: false matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest", "windows-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest", "windows-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 path: arrow-adbc persist-credentials: false - name: Prepare version - shell: bash run: | VERSION=$(grep 'set(ADBC_VERSION' arrow-adbc/c/cmake_modules/AdbcVersion.cmake | \ grep -E -o '[0-9]+\.[0-9]+\.[0-9]+') echo "VERSION=${VERSION}" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source path: ${{ github.workspace }}/apache-arrow-adbc-${{ env.VERSION }}-rc0/ - name: Setup directory structure - shell: bash run: | mv apache-arrow-adbc-${{ env.VERSION }}-rc0/KEYS . - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 # The Unix script will set up conda itself if: matrix.os == 'windows-latest' with: @@ -142,7 +147,6 @@ jobs: use-mamba: true - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 @@ -161,6 +165,9 @@ jobs: # Rust uses a lot of disk space, free up some space # https://github.com/actions/runner-images/issues/2840 sudo rm -rf "$AGENT_TOOLSDIRECTORY" + # Required for macOS + export CXXFLAGS="${CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY" + echo "CXXFLAGS=${CXXFLAGS}" ./arrow-adbc/dev/release/verify-release-candidate.sh $VERSION 0 - name: Verify @@ -181,7 +188,7 @@ jobs: name: "Run Docker Tests" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 path: arrow-adbc @@ -192,7 +199,58 @@ jobs: pushd arrow-adbc docker compose run --rm cpp-clang-latest + - name: cpp-gcc-latest + run: | + pushd arrow-adbc + docker compose run --rm cpp-gcc-latest + + - name: cpp-shared-no-common-entrypoints-test + run: | + pushd arrow-adbc + docker compose run --rm cpp-shared-no-common-entrypoints-test + + - name: cpp-static-test + run: | + pushd arrow-adbc + docker compose run --rm cpp-static-test + - name: python-debug + run: | + # Need to set this or ASAN inside the container gets stuck + # printing a loop of DEADLYSIGNAL + sudo sysctl vm.mmap_rnd_bits=28 + pushd arrow-adbc + docker compose run -e PYTHON=3.13 --rm python-debug + + source-verify-docker: + name: "Verify Source (OS)/${{ matrix.os }} ${{ matrix.version }}" + runs-on: ubuntu-latest + strategy: + max-parallel: 2 + matrix: + include: + - os: ubuntu + version: "22.04" + - os: ubuntu + version: "24.04" + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + path: arrow-adbc + persist-credentials: false + submodules: recursive + + - name: Clean up disk space + run: | + # Rust uses a lot of disk space, free up some space + # https://github.com/actions/runner-images/issues/2840 + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Verify + env: + OS: ${{ matrix.os }} + OS_VERSION: ${{ matrix.version }} run: | pushd arrow-adbc - docker compose run -e PYTHON=3.12 --rm python-debug + env ${OS@U}=${OS_VERSION} docker compose run --rm verify-all-${OS} diff --git a/.github/workflows/nightly-website.yml b/.github/workflows/nightly-website.yml index c4a8193ddf..414be6e034 100644 --- a/.github/workflows/nightly-website.yml +++ b/.github/workflows/nightly-website.yml @@ -31,17 +31,21 @@ concurrency: group: ${{ github.repository }}-${{ github.workflow }} cancel-in-progress: false +defaults: + run: + # 'bash' will expand to -eo pipefail + shell: bash + jobs: build: name: "Build Website" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false - name: Build - shell: bash run: | docker compose run docs - name: Archive docs @@ -57,30 +61,29 @@ jobs: runs-on: ubuntu-latest needs: [build] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 path: site # NOTE: needed to push at the end persist-credentials: true ref: asf-site - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 path: scripts persist-credentials: false - name: Download docs - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: name: docs path: temp # To use pip below, we need to install our own Python; the system Python's # pip won't let us install packages without a scary flag. - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: '3.x' - name: Build - shell: bash run: | pip install sphobjinv ./scripts/ci/scripts/website_build.sh "$(pwd)/scripts" "$(pwd)/site" "$(pwd)/temp" diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml index fc5cd5edb0..0cb59b87af 100644 --- a/.github/workflows/packaging.yml +++ b/.github/workflows/packaging.yml @@ -25,6 +25,8 @@ on: - ".env" - "c/**" - "ci/**" + - "dev/release/verify-apt.sh" + - "dev/release/verify-yum.sh" - "glib/**" - "python/**" - "ruby/**" @@ -61,7 +63,7 @@ jobs: # For cron: only run on the main repo, not forks if: github.event_name != 'schedule' || github.repository == 'apache/arrow-adbc' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false @@ -98,11 +100,10 @@ jobs: strategy: matrix: - os: ["ubuntu-latest", "windows-latest", "macos-13"] - go-version: [1.21.8] # Customize Go versions as needed + os: ["ubuntu-latest", "windows-latest", "macos-15-intel"] steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -116,10 +117,13 @@ jobs: tar xf apache-arrow-adbc-${VERSION}.tar.gz mv apache-arrow-adbc-${VERSION} adbc + - name: Get required Go version + run: | + (. adbc/.env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: - go-version: ${{ matrix.go-version }} + go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: adbc/go/adbc/go.sum @@ -132,8 +136,9 @@ jobs: popd - name: Upload Go binaries - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: go-${{ matrix.os }} retention-days: 7 path: | adbc/go/adbc/pkg/libadbc_driver_flightsql.* @@ -147,7 +152,7 @@ jobs: - go-binaries steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -167,13 +172,15 @@ jobs: echo "schedule: ${{ github.event.schedule }}" >> $GITHUB_STEP_SUMMARY echo "ref: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v5 with: + pattern: go-* path: adbc/go/adbc/pkg + merge-multiple: true - name: Copy Go binaries run: | - pushd adbc/go/adbc/pkg/artifact + pushd adbc/go/adbc/pkg/ cp *.dll ../ cp *.so ../ cp *.dylib ../ @@ -200,7 +207,7 @@ jobs: needs: - source steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -242,16 +249,77 @@ jobs: path: | docs.tgz + java-jni: + name: "JNI Libraries/${{ matrix.os }} ${{ matrix.arch }}" + needs: + - source + runs-on: ${{ matrix.runner }} + strategy: + matrix: + include: + - { os: Linux, arch: amd64, runner: ubuntu-latest } + - { os: Linux, arch: arm64, runner: ubuntu-24.04-arm } + - { os: macOS, arch: arm64v8, runner: macos-latest } + - { os: Windows, arch: amd64, runner: windows-latest } + + steps: + - uses: actions/download-artifact@v5 + with: + name: source + - name: Extract source archive + run: | + source_archive=$(echo apache-arrow-adbc-*.tar.gz) + VERSION=${source_archive#apache-arrow-adbc-} + VERSION=${VERSION%.tar.gz} + echo "VERSION=${VERSION}" >> $GITHUB_ENV + + tar xf apache-arrow-adbc-${VERSION}.tar.gz + mv apache-arrow-adbc-${VERSION} adbc + - name: Build JNI artifacts + working-directory: adbc + env: + ADBC_BUILD_STATIC: ON + ADBC_BUILD_TESTS: OFF + ADBC_USE_ASAN: OFF + ADBC_USE_UBSAN: OFF + BUILD_ALL: 0 + BUILD_DRIVER_MANAGER: 1 + CMAKE_BUILD_TYPE: release + run: | + ./ci/scripts/cpp_build.sh $(pwd) $(pwd)/build_driver_manager + ./ci/scripts/java_build.sh $(pwd) + ./ci/scripts/java_jni_build.sh $(pwd) $(pwd)/build_jni $(pwd)/build_driver_manager/local + - name: Assemble artifacts + run: | + mkdir artifacts + cp -r adbc/java/driver/jni/src/main/resources/ artifacts/jni + ls -laR artifacts + mv artifacts artifacts-${{ matrix.os }}-${{ matrix.arch }} + tar czf artifacts-${{ matrix.os }}-${{ matrix.arch }}.tgz artifacts-${{ matrix.os }}-${{ matrix.arch }} + - uses: actions/upload-artifact@v4 + with: + name: jni-artifacts-${{ matrix.os }}-${{ matrix.arch }} + retention-days: 7 + path: | + artifacts-${{ matrix.os }}-${{ matrix.arch }}.tgz + java: name: "Java 11" runs-on: ubuntu-latest needs: - source + - java-jni steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source + - uses: actions/download-artifact@v5 + with: + path: artifacts + pattern: jni-artifacts-* + merge-multiple: true + - name: Extract source archive run: | source_archive=$(echo apache-arrow-adbc-*.tar.gz) @@ -270,8 +338,16 @@ jobs: - name: Build and test run: | + set -x + pushd artifacts + for archive in artifacts*.tgz; do + tar xvf $archive + done + popd + cp -r artifacts/*/jni/adbc_driver_jni adbc/java/driver/jni/src/main/resources + pushd adbc/ - docker compose run java-dist + docker compose run -e BUILD_JNI=ON java-dist popd cp -a adbc/dist/ ./ @@ -295,14 +371,17 @@ jobs: target: - almalinux-8 - almalinux-9 + - almalinux-10 - debian-bookworm + - debian-trixie - ubuntu-jammy + - ubuntu-noble steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: repository: apache/arrow path: arrow @@ -338,7 +417,7 @@ jobs: mv apache-arrow-adbc-$VERSION.tar.gz adbc/ci/linux-packages/ - name: Set up Ruby - uses: ruby/setup-ruby@v1 + uses: ruby/setup-ruby@ab177d40ee5483edb974554986f56b33477e21d0 # v1.265.0 with: ruby-version: ruby @@ -350,7 +429,7 @@ jobs: restore-keys: linux-${{ env.TASK_NAMESPACE }}-ccache-${{ matrix.target }}- - name: Login to GitHub Container registry - uses: docker/login-action@v3 + uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0 with: registry: ghcr.io username: ${{ github.actor }} @@ -428,7 +507,7 @@ jobs: # TODO: "linux_aarch64_" arch: ["linux_64_"] steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -479,7 +558,7 @@ jobs: python-conda-macos: name: "Python ${{ matrix.arch }} Conda" - runs-on: macos-13 + runs-on: macos-15-intel # No need for Conda packages during release # TODO(apache/arrow-adbc#468): re-enable if: false @@ -492,9 +571,9 @@ jobs: arch: ["osx_64_"] defaults: run: - shell: bash -l {0} + shell: bash -l -eo pipefail {0} steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -515,7 +594,7 @@ jobs: echo "schedule: ${{ github.event.schedule }}" >> $GITHUB_STEP_SUMMARY echo "ref: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false @@ -549,25 +628,23 @@ jobs: ./adbc/ci/scripts/python_conda_test.sh $(pwd)/adbc $(pwd)/adbc/build python-manylinux: - name: "Python ${{ matrix.arch }} manylinux${{ matrix.manylinux_version }}" - runs-on: ubuntu-latest + name: "Python ${{ matrix.arch }} manylinux" + runs-on: ${{ matrix.runs_on }} needs: - source strategy: fail-fast: false matrix: arch: ["amd64", "arm64v8"] - is_pr: - - ${{ startsWith(github.ref, 'refs/pull/') }} - exclude: - # Don't run arm64v8 build on PRs since the build is excessively slow - - arch: arm64v8 - is_pr: true include: - - {arch: amd64, platform: linux/amd64} - - {arch: arm64v8, platform: linux/arm64} + - arch: amd64 + platform: "linux/amd64" + runs_on: "ubuntu-latest" + - arch: arm64v8 + platform: "linux/arm64/v8" + runs_on: "ubuntu-24.04-arm" steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -590,17 +667,19 @@ jobs: echo "schedule: ${{ github.event.schedule }}" >> $GITHUB_STEP_SUMMARY echo "ref: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Build wheel env: ARCH: ${{ matrix.arch }} + PLATFORM: ${{ matrix.platform }} run: | pushd adbc docker compose run \ -e SETUPTOOLS_SCM_PRETEND_VERSION=$VERSION \ - python-wheel-manylinux + python-wheel-manylinux-build + + docker compose run \ + -e SETUPTOOLS_SCM_PRETEND_VERSION=$VERSION \ + python-wheel-manylinux-relocate popd - name: Archive wheels @@ -616,14 +695,6 @@ jobs: adbc/python/adbc_driver_sqlite/repaired_wheels/*.whl adbc/python/adbc_driver_snowflake/repaired_wheels/*.whl - - name: Test wheel 3.9 - env: - ARCH: ${{ matrix.arch }} - PLATFORM: ${{ matrix.platform }} - run: | - pushd adbc - env PYTHON=3.9 docker compose run python-wheel-manylinux-test - - name: Test wheel 3.10 env: ARCH: ${{ matrix.arch }} @@ -648,6 +719,31 @@ jobs: pushd adbc env PYTHON=3.12 docker compose run python-wheel-manylinux-test + - name: Test wheel 3.13 + env: + ARCH: ${{ matrix.arch }} + PLATFORM: ${{ matrix.platform }} + run: | + pushd adbc + env PYTHON=3.13 docker compose run python-wheel-manylinux-test + + # - name: Test wheel 3.14 + # env: + # ARCH: ${{ matrix.arch }} + # PLATFORM: ${{ matrix.platform }} + # run: | + # pushd adbc + # env PYTHON=3.14 docker compose run python-wheel-manylinux-test + + # TODO(lidavidm): once we support 3.14, only test 3.14t + - name: Test wheel 3.13t + env: + ARCH: ${{ matrix.arch }} + PLATFORM: ${{ matrix.platform }} + run: | + pushd adbc + env PYTHON=3.13t docker compose run python-wheel-manylinux-freethreaded-test + python-macos: name: "Python ${{ matrix.arch }} macOS" runs-on: ${{ matrix.os }} @@ -656,19 +752,19 @@ jobs: strategy: fail-fast: false matrix: - os: ["macos-13", "macos-latest"] + os: ["macos-15-intel", "macos-latest"] include: - - os: macos-13 + - os: macos-15-intel arch: amd64 - os: macos-latest arch: arm64v8 env: MACOSX_DEPLOYMENT_TARGET: "10.15" - PYTHON: "/Library/Frameworks/Python.framework/Versions/3.10/bin/python3.10" + PYTHON: "/Library/Frameworks/Python.framework/Versions/3.13/bin/python3.13" # Where to install vcpkg VCPKG_ROOT: "${{ github.workspace }}/vcpkg" steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -709,28 +805,33 @@ jobs: ci/scripts/install_vcpkg.sh $VCPKG_ROOT $VCPKG_VERSION popd - - uses: actions/setup-go@v5 + - name: Get required Go version + run: | + (. adbc/.env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV + + - uses: actions/setup-go@v6 with: - go-version: 1.21.8 + go-version: "${{ env.GO_VERSION }}" check-latest: true cache: true cache-dependency-path: adbc/go/adbc/go.sum - - name: Install Python (AMD64 only) - if: matrix.arch == 'amd64' - run: | - pushd adbc - sudo ci/scripts/install_python.sh macos 3.9 - popd - - name: Install Python run: | pushd adbc sudo ci/scripts/install_python.sh macos 3.10 sudo ci/scripts/install_python.sh macos 3.11 sudo ci/scripts/install_python.sh macos 3.12 + sudo ci/scripts/install_python.sh macos 3.13 + # sudo ci/scripts/install_python.sh macos 3.14 popd + - name: Downgrade XCode + if: matrix.os == 'macos-latest' + run: | + # XXX(https://github.com/apache/arrow-adbc/issues/3382) + sudo xcode-select -s "/Applications/Xcode_16.app" + - name: Build wheel env: ARCH: ${{ matrix.arch }} @@ -739,6 +840,7 @@ jobs: $PYTHON -m venv build-env source build-env/bin/activate ./ci/scripts/python_wheel_unix_build.sh $ARCH $(pwd) $(pwd)/build + ./ci/scripts/python_wheel_unix_relocate.sh $ARCH $(pwd) $(pwd)/build popd - name: Archive wheels @@ -754,17 +856,6 @@ jobs: adbc/python/adbc_driver_sqlite/repaired_wheels/*.whl adbc/python/adbc_driver_snowflake/repaired_wheels/*.whl - - name: Test wheel 3.9 - if: matrix.arch == 'amd64' - run: | - pushd adbc - - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3.9 -m venv test-env-39 - source test-env-39/bin/activate - export PYTHON_VERSION=3.9 - ./ci/scripts/python_wheel_unix_test.sh $(pwd) - deactivate - - name: Test wheel 3.10 run: | pushd adbc @@ -795,6 +886,45 @@ jobs: ./ci/scripts/python_wheel_unix_test.sh $(pwd) deactivate + - name: Test wheel 3.13 + run: | + pushd adbc + + /Library/Frameworks/Python.framework/Versions/3.13/bin/python3.13 -m venv test-env-313 + source test-env-313/bin/activate + export PYTHON_VERSION=3.13 + ./ci/scripts/python_wheel_unix_test.sh $(pwd) + deactivate + + # - name: Test wheel 3.14 + # run: | + # pushd adbc + + # /Library/Frameworks/Python.framework/Versions/3.14/bin/python3.14 -m venv test-env-314 + # source test-env-314/bin/activate + # export PYTHON_VERSION=3.14 + # ./ci/scripts/python_wheel_unix_test.sh $(pwd) + # deactivate + + - name: Assemble logs + if: failure() + run: | + mkdir ~/logs + counter=0 + for log in $(find "$VCPKG_ROOT" -name 'build-*.log' -or -name 'config-*.log'); do + echo cp "$log" ~/logs/$counter-$(basename "$log") + cp "$log" ~/logs/$counter-$(basename "$log") + counter=$((counter+1)) + done + + - name: Upload failure logs + if: failure() + uses: actions/upload-artifact@v4 + with: + name: vcpkg-logs-${{ matrix.os }}-${{ matrix.arch }} + retention-days: 7 + path: ~/logs + python-windows: name: "Python ${{ matrix.python_version }} Windows" runs-on: windows-latest @@ -803,13 +933,13 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python_version: ["3.10", "3.11", "3.12", "3.13"] env: PYTHON_VERSION: "${{ matrix.python_version }}" # Where to install vcpkg VCPKG_ROOT: "${{ github.workspace }}\\vcpkg" steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -858,7 +988,7 @@ jobs: .\bootstrap-vcpkg.bat -disableMetrics popd - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" check-latest: true @@ -866,15 +996,16 @@ jobs: cache-dependency-path: adbc/go/adbc/go.sum - name: Install Python ${{ matrix.python_version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: + check-latest: true python-version: ${{ matrix.python_version }} - name: Build wheel shell: cmd run: | where python.exe - CALL "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat" + CALL "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" pushd adbc set SETUPTOOLS_SCM_PRETEND_VERSION=%VERSION% .\ci\scripts\python_wheel_windows_build.bat %cd% %cd%\build @@ -910,7 +1041,7 @@ jobs: needs: - source steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v5 with: name: source @@ -975,7 +1106,7 @@ jobs: - python-sdist steps: - name: Get All Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: path: release-artifacts - name: Release @@ -1026,18 +1157,18 @@ jobs: # - python-conda-macos defaults: run: - shell: bash -l {0} + shell: bash -l -eo pipefail {0} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: true submodules: recursive - name: Get All Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: path: conda-packages - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 with: miniforge-version: latest use-only-tar-bz2: false @@ -1060,24 +1191,44 @@ jobs: env: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + # This takes a while due to sleeping to avoid rate limits, so run it first + # and in parallel with the builds + clean-gemfury: + name: "Remove old Gemfury packages" + runs-on: ubuntu-latest + if: github.ref_name == 'main' && (github.event.schedule || inputs.upload_artifacts) + steps: + - uses: actions/checkout@v5 + - name: Install Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + - name: Clean + run: | + pip install requests + ./ci/scripts/gemfury_clean.py + env: + GEMFURY_API_TOKEN: ${{ secrets.GEMFURY_API_TOKEN }} + upload-gemfury: name: "Upload packages to Gemfury" runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' && (github.event.schedule || inputs.upload_artifacts) needs: + - clean-gemfury - java - python-manylinux - python-macos - python-windows - python-sdist steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: true submodules: recursive - name: Get All Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v5 with: path: nightly-artifacts - name: Upload diff --git a/.github/workflows/r-basic.yml b/.github/workflows/r-basic.yml index 5320e794e6..8b5c31bdd0 100644 --- a/.github/workflows/r-basic.yml +++ b/.github/workflows/r-basic.yml @@ -38,15 +38,28 @@ permissions: contents: read jobs: - check: + check-c: strategy: matrix: os: [ubuntu] - pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql, adbcflightsql, adbcsnowflake] + pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql] uses: ./.github/workflows/r-check.yml with: os: ${{ matrix.os }} pkg: ${{ matrix.pkg }} + + check-go: + strategy: + matrix: + os: [ubuntu] + pkg: [adbcflightsql, adbcsnowflake, adbcbigquery] + + uses: ./.github/workflows/r-check.yml + with: + os: ${{ matrix.os }} + pkg: ${{ matrix.pkg }} + # If update this to `error`, warnings will not fail the job + error-on: warning secrets: SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} diff --git a/.github/workflows/r-check.yml b/.github/workflows/r-check.yml index cc4cd9b7c6..29a87de4f0 100644 --- a/.github/workflows/r-check.yml +++ b/.github/workflows/r-check.yml @@ -31,6 +31,10 @@ on: required: false default: release type: string + error-on: + required: false + default: warning + type: string secrets: SNOWFLAKE_URI: required: false @@ -40,14 +44,14 @@ jobs: runs-on: ${{ inputs.os }}-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Get required Go version run: | (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: "${{ env.GO_VERSION }}" - - uses: r-lib/actions/setup-r@v2 + - uses: r-lib/actions/setup-r@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2 with: r-version: release use-public-rspm: true @@ -72,7 +76,7 @@ jobs: popd shell: bash - - uses: r-lib/actions/setup-r-dependencies@f4937e0dc26f9b99c969cd3e4ca943b576e7f991 + - uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: extra-packages: any::rcmdcheck needs: check @@ -94,12 +98,13 @@ jobs: ADBC_FLIGHTSQL_TEST_URI="grpc://localhost:8080" echo "ADBC_FLIGHTSQL_TEST_URI=${ADBC_FLIGHTSQL_TEST_URI}" >> $GITHUB_ENV - - uses: r-lib/actions/check-r-package@v2 + - uses: r-lib/actions/check-r-package@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2 env: ADBC_SNOWFLAKE_TEST_URI: ${{ secrets.SNOWFLAKE_URI }} R_KEEP_PKG_SOURCE: yes with: working-directory: r/${{ inputs.pkg }} + error-on: '"${{ inputs.error-on }}"' - name: Shutdown docker compose services if: runner.os == 'Linux' diff --git a/.github/workflows/r-extended.yml b/.github/workflows/r-extended.yml index 1ca2e821c2..4471ce691c 100644 --- a/.github/workflows/r-extended.yml +++ b/.github/workflows/r-extended.yml @@ -39,12 +39,27 @@ permissions: jobs: # Runs R CMD check on the same platforms/R versions CRAN does - cran: + cran-c: strategy: matrix: rversion: [oldrel, release, devel] os: [macOS, windows, ubuntu] - pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql, adbcflightsql, adbcsnowflake, adbcbigquery] + pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql] + fail-fast: false + + uses: ./.github/workflows/r-check.yml + with: + os: ${{ matrix.os }} + pkg: ${{ matrix.pkg }} + rversion: ${{ matrix.rversion }} + + # Go-based drivers are not distributed on CRAN and are checked with error-on error only + cran-go: + strategy: + matrix: + rversion: [oldrel, release, devel] + os: [macOS, windows, ubuntu] + pkg: [adbcflightsql, adbcsnowflake, adbcbigquery] fail-fast: false uses: ./.github/workflows/r-check.yml @@ -52,16 +67,20 @@ jobs: os: ${{ matrix.os }} pkg: ${{ matrix.pkg }} rversion: ${{ matrix.rversion }} + # Go based drivers generate WARNINGs because some symbols + # are linked in that R CMD check does not allow. + error-on: error secrets: SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} # Check older versions of R on Linux. This catches accidental use of newer R functions. + # We don't check Go drivers here because they don't have much R API surface area. rversions: strategy: matrix: rversion: ["3.6", "4.0", "4.1"] os: [ubuntu] - pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql, adbcflightsql, adbcsnowflake, adbcbigquery] + pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql] fail-fast: false uses: ./.github/workflows/r-check.yml @@ -69,18 +88,16 @@ jobs: os: ${{ matrix.os }} pkg: ${{ matrix.pkg }} rversion: ${{ matrix.rversion }} - secrets: - SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} - # Checks on older verions of R on Windows. The Windows build system changed + # Checks on older versions of R on Windows. The Windows build system changed # several times so we need to check packages on every version. Go-based # drivers aren't supported before 4.2, so we don't check them here. - # We don't need to check R 4.1 because the build system for R 4.0 and R 4.1 - # are the same. + # The test dependencies support the released version and the four previous + # versions (e.g., 4.1 is the minimum supported version as of R 4.5). winrversions: strategy: matrix: - rversion: ["3.6", "4.0"] + rversion: ["4.1"] os: [windows] pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql] fail-fast: false @@ -90,8 +107,6 @@ jobs: os: ${{ matrix.os }} pkg: ${{ matrix.pkg }} rversion: ${{ matrix.rversion }} - secrets: - SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} # Runs tests with valgrind. Go does not support valgrind, so we don't run # those tests here. @@ -107,8 +122,8 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-r@v2 + - uses: actions/checkout@v5 + - uses: r-lib/actions/setup-r@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2 with: rversion: ${{ matrix.rversion }} use-public-rspm: true @@ -131,7 +146,7 @@ jobs: popd shell: bash - - uses: r-lib/actions/setup-r-dependencies@f4937e0dc26f9b99c969cd3e4ca943b576e7f991 + - uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: working-directory: r/${{ matrix.pkg }} diff --git a/.github/workflows/r-standard.yml b/.github/workflows/r-standard.yml index 907d779844..d5a49169f6 100644 --- a/.github/workflows/r-standard.yml +++ b/.github/workflows/r-standard.yml @@ -46,15 +46,30 @@ permissions: contents: read jobs: - check: + check-c: strategy: matrix: os: [ubuntu, macOS, windows] - pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql, adbcflightsql, adbcsnowflake, adbcbigquery] + pkg: [adbcdrivermanager, adbcsqlite, adbcpostgresql] uses: ./.github/workflows/r-check.yml with: os: ${{ matrix.os }} pkg: ${{ matrix.pkg }} + + # Go-based drivers are not distributed on CRAN and are checked with error-on error only + check-go: + strategy: + matrix: + os: [ubuntu, macOS, windows] + pkg: [adbcflightsql, adbcsnowflake, adbcbigquery] + + uses: ./.github/workflows/r-check.yml + with: + os: ${{ matrix.os }} + pkg: ${{ matrix.pkg }} + # Go based drivers generate WARNINGs because some symbols + # are linked in that R CMD check does not allow. + error-on: error secrets: SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 63150469db..03c524d164 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -38,38 +38,55 @@ permissions: defaults: run: - shell: bash - working-directory: rust + shell: bash -l -eo pipefail {0} jobs: - native-unix: - uses: ./.github/workflows/native-unix.yml - permissions: - contents: read - id-token: write - rust: - needs: [native-unix] strategy: matrix: - # TODO(alexandreyc): add `windows-latest` # See: https://github.com/apache/arrow-adbc/pull/1803#issuecomment-2117669300 - os: [macos-13, macos-latest, ubuntu-latest] - name: "Rust ${{ matrix.os }}" + os: + - macos-15-intel + - macos-latest + - ubuntu-latest + - windows-latest + minimal-versions: + - false + include: + - os: ubuntu-latest + minimal-versions: true # Test can be built with older arrow except adbc_datafusion + - os: windows-latest + minimal-versions: true # Test can be built with older windows specific deps + name: Rust ${{ matrix.os }} ${{ matrix.minimal-versions && '(minimal versions for adbc_core)' || '' }} runs-on: ${{ matrix.os }} env: CARGO_INCREMENTAL: 0 RUSTFLAGS: "-Adead_code" # TODO(alexandreyc): remove this line when implementation is complete steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false + - name: Cargo update for minimal versions + if: ${{ matrix.minimal-versions }} + working-directory: rust + run: | + rustup toolchain install nightly + CARGO_RESOLVER_INCOMPATIBLE_RUST_VERSIONS=fallback cargo +nightly generate-lockfile -Z direct-minimal-versions - name: Use stable Rust id: rust run: | rustup toolchain install stable --no-self-update rustup default stable + - name: Get required Go version + run: | + (. ./.env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV + - uses: actions/setup-go@v6 + with: + go-version: "${{ env.GO_VERSION }}" + check-latest: true + cache: true + cache-dependency-path: go/adbc/go.sum - name: Install Protoc if: runner.os == 'Linux' run: | @@ -82,33 +99,145 @@ jobs: curl -L "https://github.com/protocolbuffers/protobuf/releases/download/v28.3/protoc-28.3-osx-universal_binary.zip" -o protoc.zip unzip "protoc.zip" -d $HOME/.local echo "$HOME/.local/bin" >> "$GITHUB_PATH" - - uses: actions/download-artifact@v4 + - name: Install Protoc + if: runner.os == 'Windows' + run: | + curl -L "https://github.com/protocolbuffers/protobuf/releases/download/v28.3/protoc-28.3-win64.zip" -o protoc.zip + unzip "protoc.zip" -d $HOME/.local + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + - name: Install sqlite3 + if: runner.os == 'Linux' + run: sudo apt-get install libsqlite3-dev + - name: Setup conda + uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 + if: runner.os == 'Windows' with: - name: driver-manager-${{ matrix.os }} - path: "~" - - name: Untar artifacts - shell: bash -l {0} - run: | - cd - mkdir -p ${{ github.workspace }}/build - tar xvf ~/local.tgz -C ${{ github.workspace }}/build --strip-components=1 + miniforge-version: latest + - name: Install Dependencies + if: runner.os == 'Windows' + run: | + conda install -c conda-forge --file ci/conda_env_cpp.txt + - name: Build C++/Go drivers + run: | + mkdir -p build + mkdir -p local + pushd build + cmake \ + -DADBC_BUILD_TESTS=OFF \ + -DADBC_DRIVER_BIGQUERY=ON \ + -DADBC_DRIVER_FLIGHTSQL=ON \ + -DADBC_DRIVER_MANAGER=ON \ + -DADBC_DRIVER_SQLITE=ON \ + -DADBC_DRIVER_SNOWFLAKE=ON \ + -DADBC_USE_ASAN=OFF \ + -DADBC_USE_UBSAN=OFF \ + -DADBC_SQLITE_COMPILE_DEFINES=-DADBC_SQLITE_WITH_NO_LOAD_EXTENSION \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=../local \ + ../c + cmake --build . --target install + popd - name: Set dynamic linker path - if: matrix.os == 'ubuntu-latest' + if: runner.os == 'Linux' run: | - echo "LD_LIBRARY_PATH=${{ github.workspace }}/build/lib:$LD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${{ github.workspace }}/local/lib:$LD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "ADBC_DRIVER_MANAGER_TEST_LIB=${{ github.workspace }}/local/lib/libadbc_driver_sqlite.so" >> "$GITHUB_ENV" - name: Set dynamic linker path if: matrix.os == 'macos-latest' run: | - echo "DYLD_LIBRARY_PATH=/opt/homebrew/opt/sqlite/lib:${{ github.workspace }}/build/lib:$DYLD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "DYLD_LIBRARY_PATH=/opt/homebrew/opt/sqlite/lib:${{ github.workspace }}/local/lib:$DYLD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "ADBC_DRIVER_MANAGER_TEST_LIB=${{ github.workspace }}/local/lib/libadbc_driver_sqlite.dylib" >> "$GITHUB_ENV" - name: Set dynamic linker path - if: matrix.os == 'macos-13' + if: matrix.os == 'macos-15-intel' run: | - echo "DYLD_LIBRARY_PATH=/usr/local/opt/sqlite/lib:${{ github.workspace }}/build/lib:$DYLD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "DYLD_LIBRARY_PATH=/usr/local/opt/sqlite/lib:${{ github.workspace }}/local/lib:$DYLD_LIBRARY_PATH" >> "$GITHUB_ENV" + echo "ADBC_DRIVER_MANAGER_TEST_LIB=${{ github.workspace }}/local/lib/libadbc_driver_sqlite.dylib" >> "$GITHUB_ENV" + - name: Set dynamic linker path + if: runner.os == 'Windows' + run: | + echo "PATH=${{ github.workspace }}/local/bin;${{ github.workspace }}/local/lib;$PATH" >> "$GITHUB_ENV" + echo "ADBC_DRIVER_MANAGER_TEST_LIB=${{ github.workspace }}/local/bin/adbc_driver_sqlite.dll" >> "$GITHUB_ENV" + - name: Set search dir for Snowflake Go lib + run: echo "ADBC_SNOWFLAKE_GO_LIB_DIR=${{ github.workspace }}/local/lib" >> "$GITHUB_ENV" - name: Clippy - run: cargo clippy --workspace --all-targets --all-features -- -Dwarnings + if: runner.os != 'Linux' && ! matrix.minimal-versions + working-directory: rust + run: | + rustup component add clippy + cargo clippy --workspace --all-targets --all-features --locked -- -Dwarnings + - name: Clippy (nightly) + if: runner.os == 'Linux' && ! matrix.minimal-versions + working-directory: rust + run: | + rustup toolchain install nightly --component clippy + cargo +nightly clippy --workspace --all-targets --all-features --locked -- -Dwarnings -Zcrate-attr='feature(non_exhaustive_omitted_patterns_lint)' - name: Test - run: cargo test --workspace --all-targets --all-features + working-directory: rust + # TODO: enable snowflake tests on windows + run: > + cargo test --all-targets --all-features --workspace + ${{ matrix.minimal-versions && '--exclude adbc_datafusion' || '' }} + ${{ runner.os == 'Windows' && '--exclude adbc_snowflake' || '' }} + # env: + # ADBC_SNOWFLAKE_TESTS: 1 + # ADBC_SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} + # ADBC_SNOWFLAKE_SQL_DB: ARROW_OSS_DB - name: Doctests - run: cargo test --workspace --doc --all-features + working-directory: rust + # TODO: enable snowflake tests on windows + run: > + cargo test --doc --all-features --workspace + ${{ matrix.minimal-versions && '--exclude adbc_datafusion' || '' }} + ${{ runner.os == 'Windows' && '--exclude adbc_snowflake' || '' }} - name: Check docs - run: cargo doc --workspace --all-features + working-directory: rust + # TODO: enable snowflake tests on windows + run: > + cargo doc --all-features --workspace + ${{ matrix.minimal-versions && '--exclude adbc_datafusion' || '' }} + ${{ runner.os == 'Windows' && '--exclude adbc_snowflake' || '' }} + - name: Install cargo-msrv + if: matrix.os == 'ubuntu-latest' + run: | + # Install cargo-msrv + cargo_msrv_version="v0.18.4" + cargo_msrv_hash="367a48e4ad014b119bf728a9e95e863575e02fdf6cf95ae24f44ca73b445ee14" + cargo_msrv_archive="cargo-msrv.tgz" + + curl -L "https://github.com/foresterre/cargo-msrv/releases/download/${cargo_msrv_version}/cargo-msrv-x86_64-unknown-linux-gnu-${cargo_msrv_version}.tgz" -o "${cargo_msrv_archive}" + echo "${cargo_msrv_hash} ${cargo_msrv_archive}" | sha256sum -c - + tar xz -f "${cargo_msrv_archive}" --wildcards --strip-components=1 -C "$HOME/.local/bin" "*/cargo-msrv" + rm "${cargo_msrv_archive}" + + - name: Verify MSRV (Minimum Supported Rust Version) except adbc_datafusion + if: matrix.os == 'ubuntu-latest' && matrix.minimal-versions + working-directory: rust + run: | + # Verify MSRV for each package except adbc_datafusion which requires a newer Rust version + find . -mindepth 2 -name Cargo.toml -not -path "*/datafusion/*" | while read -r dir + do + echo "Checking package '$dir'" + cargo msrv verify --manifest-path "$dir" || exit 1 + done + + - name: Verify MSRV (Minimum Supported Rust Version) of adbc_datafusion + if: matrix.os == 'ubuntu-latest' && ! matrix.minimal-versions + working-directory: rust + run: | + cargo msrv verify --manifest-path "./driver/datafusion/Cargo.toml" || exit 1 + + - name: Verify supports the latest arrow + if: matrix.os == 'ubuntu-latest' && ! matrix.minimal-versions + working-directory: rust + run: | + # Update arrow to the latest version in the lock file + cargo update \ + -p arrow-array \ + -p arrow-buffer \ + -p arrow-schema \ + -p arrow-select + + # If the lock file was updated, run the tests except for adbc_datafusion + if ! git diff --quiet Cargo.lock; then + cargo test --all-targets --all-features --workspace --exclude adbc_datafusion + fi diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index bb546eb913..4dc4aefecb 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -38,6 +38,11 @@ on: permissions: contents: read +defaults: + run: + # 'bash' will expand to -eo pipefail + shell: bash + jobs: binary-unix: name: "Verify Binaries/${{ matrix.os }}" @@ -45,21 +50,19 @@ jobs: strategy: fail-fast: false matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false # No Docker on M1 - name: Skip Binary Verifiction if: matrix.os == 'macos-latest' - shell: bash run: | echo "TEST_APT=0" | tee -a $GITHUB_ENV echo "TEST_YUM=0" | tee -a $GITHUB_ENV - name: Verify - shell: bash env: CI: "false" REPOSITORY: ${{ github.repository }} @@ -77,21 +80,20 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ["macos-13", "macos-latest", "ubuntu-latest", "windows-latest"] + os: ["macos-15-intel", "macos-latest", "ubuntu-latest", "windows-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 persist-credentials: false submodules: recursive - - uses: conda-incubator/setup-miniconda@v3 + - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 # The Unix script will set up conda itself if: matrix.os == 'windows-latest' with: miniforge-version: latest use-mamba: true - name: Work around ASAN issue (GH-1617) - shell: bash -l {0} if: matrix.os == 'ubuntu-latest' run: | sudo sysctl vm.mmap_rnd_bits=28 @@ -108,6 +110,9 @@ jobs: # Rust uses a lot of disk space, free up some space # https://github.com/actions/runner-images/issues/2840 sudo rm -rf "$AGENT_TOOLSDIRECTORY" + # Required for macOS + export CXXFLAGS="${CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY" + echo "CXXFLAGS=${CXXFLAGS}" ./dev/release/verify-release-candidate.sh ${{ inputs.version }} ${{ inputs.rc }} - name: Verify if: matrix.os == 'windows-latest' diff --git a/.gitignore b/.gitignore index 8716f5d7b3..68ffd90405 100644 --- a/.gitignore +++ b/.gitignore @@ -61,6 +61,7 @@ c/apidoc/objects.inv docs/example.gz docs/example1.dat docs/example3.dat +docs/source/cpp/recipe_driver/driver_example.toml python/.eggs/ python/doc/ # Egg metadata @@ -78,6 +79,7 @@ docker_cache .*.swo CMakeUserPresets.json build/ +local/ site/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6d8bf873cb..9d6a351357 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,78 +22,88 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: cef0300fd0fc4d2a87a85fa2093c6b283ea36f4b # v5.0.0 hooks: + - id: check-shebang-scripts-are-executable - id: check-xml - id: check-yaml exclude: ci/conda/meta.yaml - id: end-of-file-fixer - exclude: "^r/.*?/_snaps/.*?.md$" + exclude: "^(r/.*?/_snaps/.*?.md)|(.*\\.patch)$" - id: mixed-line-ending name: "Mixed line endings (LF)" args: [--fix=lf] - exclude: '\.(bat|sln)$' + exclude: '\.(bat|sln|patch)$' - id: mixed-line-ending name: "Mixed line endings (CRLF)" args: [--fix=crlf] files: '\.(bat|sln)$' - id: trailing-whitespace - exclude: "^r/.*?/_snaps/.*?.md$" + exclude: "^(r/.*?/_snaps/.*?.md)|(.*\\.patch)$" - repo: https://github.com/pre-commit/mirrors-clang-format - rev: "v18.1.7" + rev: deec0115cd2847f837ac9180c4b7d1edb423fe02 # v18.1.7 hooks: - id: clang-format types_or: [c, c++] - repo: https://github.com/cheshirekow/cmake-format-precommit - rev: v0.6.13 + rev: e2c2116d86a80e72e7146a06e68b7c228afc6319 # v0.6.13 hooks: - id: cmake-format args: [--in-place] - repo: https://github.com/cpplint/cpplint - rev: 1.6.1 + rev: 350702a4d39395571bd118c9a3cff0a573488d61 # 2.0.2 hooks: - id: cpplint + types_or: [c++] args: # From Arrow's config - - "--filter=-whitespace/comments,-whitespace/indent,-readability/braces,-readability/casting,-readability/todo,-readability/alt_tokens,-build/header_guard,-build/c++11,-build/include_order,-build/include_subdir,-runtime/references" + - "--filter=-whitespace/comments,-whitespace/indent,-readability/braces,-readability/casting,-readability/todo,-readability/alt_tokens,-build/header_guard,-build/c++11,-build/include_order,-build/include_subdir,-runtime/references,-build/c++17" - "--linelength=90" - "--verbose=2" - repo: https://github.com/golangci/golangci-lint - rev: v1.61.0 + rev: 2b224c2cf4c9f261c22a16af7f8ca6408467f338 # v2.0.2 hooks: - id: golangci-lint entry: bash -c 'cd go/adbc && golangci-lint run --fix --timeout 5m' types_or: [go, go-mod] - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.13.0 + rev: a6273196190bb0f68caf1dc68073cf62c719f725 # v2.14.0 hooks: - id: pretty-format-golang + args: [--autofix] + types_or: [go] - id: pretty-format-java args: [--autofix] types_or: [java] - repo: https://github.com/psf/black - rev: 24.4.2 + rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # 25.1.0 hooks: - id: black types_or: [pyi, python] - repo: https://github.com/PyCQA/flake8 - rev: 7.1.0 + rev: bddd87797f8dfc07d2a10c894776018d9bec590b # 7.1.2 hooks: - id: flake8 types_or: [python] - repo: https://github.com/PyCQA/isort - rev: 5.13.2 + rev: 0a0b7a830386ba6a31c2ec8316849ae4d1b8240d # 6.0.0 hooks: - id: isort types_or: [python] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.16.2 + rev: 9247866fce7128f2c0eaf4a09f437880397d4689 # v0.16.2 hooks: - id: cython-lint - repo: https://github.com/vala-lang/vala-lint rev: 8ae2bb65fe66458263d94711ae4ddd978faece00 hooks: - id: vala-lint + - repo: https://github.com/trim21/pre-commit-mirror-meson + rev: 3fcd193f7ae6a5ae62cfcd21a1ac66347db7cb6b # v1.7.2 + hooks: + - id: meson-fmt + files: 'meson.build' + args: ['--inplace'] - repo: local hooks: - id: apache-rat @@ -105,13 +115,32 @@ repos: name: Ensure CGO adbc.h is syncd language: script pass_filenames: true - files: '^c/include/arrow-adbc/.*\.h$' + files: '^c/include/arrow-adbc/[^/]*\.h$' entry: "./ci/scripts/run_cgo_drivermgr_check.sh" + - id: check-cgo-adbc-impl + name: Ensure CGO adbc_driver_manager.cc is syncd + language: script + pass_filenames: true + files: '^c/driver_manager/adbc_driver_manager\.cc$' + entry: "./ci/scripts/run_cgo_drivermgr_check.sh" + # https://infra.apache.org/github-actions-policy.html + - id: check-pin + name: Ensure GitHub Actions and pre-commit hooks are pinned to a specific SHA + language: python + additional_dependencies: + - "ruamel.yaml==0.18.7" + pass_filenames: true + files: '(^\.pre-commit-config\.yaml$)|(^\.github/workflows/.*\.(yml|yaml)$)' + entry: "./ci/scripts/run_pre_commit_pin.py" - repo: https://github.com/doublify/pre-commit-rust - rev: v1.0 + rev: eeee35a89e69d5772bdee97db1a6a898467b686e # v1.0 hooks: - id: fmt name: rustfmt args: ["--all", "--manifest-path", "rust/Cargo.toml", "--"] + - repo: https://github.com/codespell-project/codespell + rev: 63c8f8312b7559622c0d82815639671ae42132ac # v2.4.1 + hooks: + - id: codespell -exclude: "^c/vendor/.*" +exclude: "^(c/vendor/.*)|(go/adbc/drivermgr/vendored/.*)" diff --git a/CHANGELOG.md b/CHANGELOG.md index 2805d519f6..0069576605 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -347,7 +347,7 @@ - **c/driver/postgresql,c/driver/sqlite**: Implement FOREIGN KEY constraints (#1099) - **go/adbc/driver/flightsql**: log new connections (#1146) - **c/driver/postgresql**: add integral COPY writers (#1130) -- **c/driver/postgresql**: Inital COPY Writer design (#1110) +- **c/driver/postgresql**: Initial COPY Writer design (#1110) - **c/driver/postgresql,c/driver/sqlite**: implement BOOL support in drivers (#1091) ### Fix @@ -690,7 +690,7 @@ - **ci**: Pin r-lib actions as a workaround for latest action updates (#2051) - **csharp/src/Drivers/BigQuery**: update BigQuery documents (#2047) - **go/adbc/driver/snowflake**: split files properly after reaching targetSize on ingestion (#2026) -- **c/driver/postgresql**: Ensure schema ordering is consisent and respects case sensitivity of table names (#2028) +- **c/driver/postgresql**: Ensure schema ordering is consistent and respects case sensitivity of table names (#2028) - **docs**: update broken link (#2016) - **docs**: correct snowflake options for bulk ingest (#2004) - **go/adbc/driver/flightsql**: propagate headers in GetObjects (#1996) @@ -699,3 +699,468 @@ - **ci**: update website_build.sh for new versioning scheme (#1972) - **dev/release**: update C# tag (#1973) - **c/vendor/nanoarrow**: Fix -Wreorder warning (#1966) + +## ADBC Libraries 15 (2024-11-08) + +### Versions + +- C/C++/GLib/Go/Python/Ruby: 1.3.0 +- C#: 0.15.0 +- Java: 0.15.0 +- R: 0.15.0 +- Rust: 0.15.0 + +### Feat + +- **c/driver/postgresql**: Enable basic connect/query workflow for Redshift (#2219) +- **rust/drivers/datafusion**: add support for bulk ingest (#2279) +- **csharp/src/Drivers/Apache**: convert Double to Float for Apache Spark on scalar conversion (#2296) +- **go/adbc/driver/snowflake**: update to the latest 1.12.0 gosnowflake driver (#2298) +- **csharp/src/Drivers/BigQuery**: support max stream count setting when creating read session (#2289) +- **rust/drivers**: adbc driver for datafusion (#2267) +- **go/adbc/driver/snowflake**: improve GetObjects performance and semantics (#2254) +- **c**: Implement ingestion and testing for float16, string_view, and binary_view (#2234) +- **r**: Add R BigQuery driver wrapper (#2235) +- **csharp/src/Drivers/Apache/Spark**: add request_timeout_ms option to allow longer HTTP request length (#2218) +- **go/adbc/driver/snowflake**: add support for a client config file (#2197) +- **csharp/src/Client**: Additional parameter support for DbCommand (#2195) +- **csharp/src/Drivers/Apache/Spark**: add option to ignore TLS/SSL certificate exceptions (#2188) +- **csharp/src/Drivers/Apache/Spark**: Perform scalar data type conversion for Spark over HTTP (#2152) +- **csharp/src/Drivers/Apache/Spark**: Azure HDInsight Spark Documentation (#2164) +- **c/driver/postgresql**: Implement ingestion of list types for PostgreSQL (#2153) +- **csharp/src/Drivers/Apache/Spark**: poc - Support for Apache Spark over HTTP (non-Arrow) (#2018) +- **c/driver/postgresql**: add `arrow.opaque` type metadata (#2122) + +### Fix + +- **csharp/src/Drivers/Apache**: fix float data type handling for tests on Databricks Spark (#2283) +- **go/adbc/driver/internal/driverbase**: proper unmarshalling for ConstraintColumnNames (#2285) +- **csharp/src/Drivers/Apache**: fix to workaround concurrency issue (#2282) +- **csharp/src/Drivers/Apache**: correctly handle empty response and add Client tests (#2275) +- **csharp/src/Drivers/Apache**: remove interleaved async look-ahead code (#2273) +- **c/driver_manager**: More robust error reporting for errors that occur before AdbcDatabaseInit() (#2266) +- **rust**: implement database/connection constructors without options (#2242) +- **csharp/src/Drivers**: update System.Text.Json to version 8.0.5 because of known vulnerability (#2238) +- **csharp/src/Drivers/Apache/Spark**: correct batch handling for the HiveServer2Reader (#2215) +- **go/adbc/driver/snowflake**: call GetObjects with null catalog at catalog depth (#2194) +- **csharp/src/Drivers/Apache/Spark**: correct BatchSize implementation for base reader (#2199) +- **csharp/src/Drivers/Apache/Spark**: correct precision/scale handling with zeros in fractional portion (#2198) +- **csharp/src/Drivers/BigQuery**: Fixed GBQ driver issue when results.TableReference is null (#2165) +- **go/adbc/driver/snowflake**: fix setting database and schema context after initial connection (#2169) +- **csharp/src/Drivers/Interop/Snowflake**: add test to demonstrate DEFAULT_ROLE behavior (#2151) +- **c/driver/postgresql**: Improve error reporting for queries that error before the COPY header is sent (#2134) + +### Refactor + +- **c/driver/postgresql**: cleanups for result_helper signatures (#2261) +- **c/driver/postgresql**: Use GetObjectsHelper from framework to build objects (#2189) +- **csharp/src/Drivers/Apache/Spark**: use UTF8 string for data conversion, instead of .NET String (#2192) +- **c/driver/postgresql**: Use Status for error handling in BindStream (#2187) +- **c/driver/postgresql**: Use Status instead of AdbcStatusCode/AdbcError in result helper (#2178) +- **c/driver**: Use non-objects framework components in Postgres driver (#2166) +- **c/driver/postgresql**: Use copy writer in BindStream for parameter binding (#2157) + +## ADBC Libraries 16 (2025-01-17) + +### Versions + +- C/C++/GLib/Go/Python/Ruby: 1.4.0 +- C#: 0.16.0 +- Java: 0.16.0 +- R: 0.16.0 +- Rust: 0.16.0 + +### Breaking Changes + +- ⚠️ **rust/driver/snowflake**: return a `Result` from `Builder::from_env` when parsing fails (#2334) + +### New Features + +- **csharp/src/Client**: parse custom properties from connection string (#2352) +- **csharp/src/Drivers**: introduce Interop.FlightSql driver (#2214) +- **csharp/src/Drivers/Apache**: add connect and query timeout options (#2312) +- **csharp/src/Drivers/Apache**: make Apache driver tests inheritable (#2341) +- **rust/driver/snowflake**: add `adbc_snowflake` crate with Go driver wrapper (#2207) +- ⚠️ **rust/driver/snowflake**: return a `Result` from `Builder::from_env` when parsing fails (#2334) + +### Bugfixes + +- **c/driver/postgresql**: don't unnecessarily COMMIT (#2412) +- **c/driver/postgresql**: return unknown OIDs as opaque (#2450) +- **ci**: ensure wheels are built with older manylinux (#2351) +- **csharp/src/Apache.Arrow.Adbc/C**: export statement_execute_schema correctly (#2409) +- **csharp/src/Drivers/Apache**: detect sever error when polling for response (#2355) +- **csharp/src/Drivers/BigQuery**: Use job reference instead of job id to get job to avoid interference between different locations (#2433) +- **csharp/src/Drivers/BigQuery**: ensure BigQuery DATE type is Date32 Arrow type (#2446) +- **csharp/src/Drivers/BigQuery**: remove details to have type names match ODBC (#2431) +- **go/adbc/driver/bigquery**: set default project and dataset for new statements (#2342) +- **go/adbc/driver/snowflake**: update default values for fetch params (#2325) +- **java/driver-manager**: typo (#2336) + +### Documentation Improvements + +- add related work (#2333) +- change Flight SQL driver usage to executable example (#2395) +- remove crosslinking to Arrow Javadocs (#2455) + +## ADBC Libraries 17 (2025-03-03) + +### Versions + +- C/C++/GLib/Go/Python/Ruby: 1.5.0 +- C#: 0.17.0 +- Java: 0.17.0 +- R: 0.17.0 +- Rust: 0.17.0 + +### New Features + +- **c/driver**: add support for CMake packages of Go based drivers (#2561) +- **ci/linux-packages/apt**: add support for Ubuntu 24.04 packages (#2482) +- **csharp/src/Apache.Arrow.Adbc**: improved performance of ValueAt helper and AdbcDataReader (#2534) +- **csharp/src/Drivers/Apache**: Add support for Impala ADBC Driver with Refactoring and Unit Tests (#2365) +- **csharp/src/Drivers/BigQuery**: add support for net472 (#2527) +- **csharp/src/Drivers/BigQuery**: use a default project ID if one is not specified (#2471) +- **go/adbc/driver/flightsql**: allow passing arbitrary grpc dial options in NewDatabase (#2563) +- **go/adbc/driver/snowflake**: add query tag option (#2484) +- **go/adbc/driver/snowflake**: implement WithTransporter driver option (#2558) + +### Bugfixes + +- **c/driver/sqlite**: don't rely on double-quoted strings feature (#2555) +- **go/adbc/driver/flightsql**: Parsing column metadata in FlightSQL driver (#2481) +- **go/adbc/driver/snowflake**: fix GetObjects for VECTOR cols (#2564) +- **go/adbc/driver/snowflake**: use one session for connection (#2494) + +### Documentation Improvements + +- add SQLite cookbook example for batch size/inference (#2523) +- add stdout/stderr and index support to recipe directive (#2495) +- crosslink to Arrow Javadocs again (#2483) +- fix references to root CONTRIBUTING.md file (#2521) +- update java quickstart to use the PARAM_URI instead of the legacy PARAM_URL (#2530) + +## ADBC Libraries 18 (2025-05-02) + +### Versions + +- C/C++/GLib/Go/Python/Ruby: 1.6.0 +- C#: 0.18.0 +- Java: 0.18.0 +- R: 0.18.0 +- Rust: 0.18.0 + +### New Features + +- **c**: Declare dependencies for drivers in Meson configuration (#2746) +- **c/driver/postgresql**: avoid commit/rollback when idle (#2685) +- **csharp**: Add retry-after behavior for 503 responses in Spark ADBC driver (#2664) +- **csharp**: Add support for Prepare to ImportedStatement and to ADO.NET wrapper (#2628) +- **csharp**: Implement CloudFetch for Databricks Spark driver (#2634) +- **csharp**: fix powerbi hang when reading cloudfetch result in Databricks driver (#2747) +- **csharp**: improve handling of StructArrays (#2587) +- **csharp/src/Drivers**: Add Databricks driver (#2672) +- **csharp/src/Drivers/Apache**: Add prefetch functionality to CloudFetch in Spark ADBC driver (#2678) +- **csharp/src/Drivers/Apache**: Add support for Hive ADBC Driver with unit tests (#2540) +- **csharp/src/Drivers/Apache**: Add support for native metadata queries using statement options (#2665) +- **csharp/src/Drivers/Apache**: Custom ssl server certificate validation for Spark, Impala & Hive (#2610) +- **csharp/src/Drivers/Apache**: Performance improvement - Replace TSocketTransport with TBufferedTransport (#2742) +- **csharp/src/Drivers/Apache**: Regenerate Thrift classes based on a newer TCLIService.thrift (#2611) +- **csharp/src/Drivers/Apache**: enhance GetColumns with BASE_TYPE_NAME column (#2695) +- **csharp/src/Drivers/Apache/Spark**: Add Lz4 compression support to arrow batch reader (#2669) +- **csharp/src/Drivers/Apache/Spark**: Add OAuth access token auth type to Csharp Spark Driver (#2579) +- **csharp/src/Drivers/Apache/Spark**: add user agent entry + thrift version for spark http connections (#2711) +- **csharp/src/Drivers/BigQuery**: Add support for AAD/Entra authentication (#2655) +- **csharp/src/Drivers/BigQuery**: add additional billing and timeout properties and test settings (#2566) +- **csharp/src/Drivers/BigQuery**: choose the first project ID if not specified (#2541) +- **csharp/src/Drivers/BigQuery**: support evaluation kind and statement type setting (#2698) +- **csharp/src/Drivers/Databricks**: Add option to enable using direct results for statements (#2737) +- **csharp/src/Drivers/Databricks**: Implement ClientCredentialsProvider (#2743) +- **csharp/src/Drivers/Databricks**: Make Cloud Fetch options configurable at the connection level (#2691) +- **csharp/src/Drivers/Databricks**: Support server side property passthrough (#2692) +- **go/adbc/driver/bigquery**: Return data about table/view partitioning (#2697) +- **go/adbc/driver/flightsql**: Add OAuth Support to Flight Client (#2651) +- **go/adbc/sqldriver**: read from union types (#2637) +- **java/driver/jni**: add JNI bindings to native driver manager (#2401) +- **python/adbc_driver_manager**: add cursor() arg to set options (#2589) +- **python/adbc_driver_manager**: enable DB-API without PyArrow (#2609) + +### Bugfixes + +- **c**: Add libdl as dependency of driver manager in Meson (#2735) +- **c/driver/postgresql**: avoid crash if closing invalidated result (#2653) +- **c/driver/postgresql**: handle connection options before Init (#2701) +- **ci**: Skip flaky ASAN failures in Meson (#2604) +- **ci**: add missing trigger paths for Linux packages (#2761) +- **ci**: fix MacOS builds for C# (#2606) +- **csharp/src**: Add missing override to ImportedAdbcConnection (#2577) +- **csharp/src/Drivers/Apache**: Fix setting foreign schema/table in GetCrossReference (#2765) +- **csharp/src/Drivers/Apache**: Improve handling of authentication and server type enumeration parsing (#2574) +- **csharp/src/Drivers/Apache**: Set tls enabled to true all HTTP-based drivers, by default (#2667) +- **csharp/src/Drivers/Apache/Thrift**: Generated Thrift-based code should not be exposed publicly (#2710) +- **csharp/src/Drivers/Databricks**: Fix Lz4 compression logic for DatabricksReader (#2690) +- **dev/release**: remove incorrect `-f` from `mamba create` (#2755) +- **dev/release**: use packages.apache.org instead of apache.jfrog.io (#2756) +- **glib**: use -fPIE explicitly for g-ir-scanner (#2758) +- **go**: Use arrow-go in templates instead of arrow/go (#2712) +- **go/adbc/driver/bigquery**: Avoid creating arrow iterator when schema is empty (#2614) +- **go/adbc/driver/bigquery**: Use number of rows (rather than schema) to check if we need an empty arrow iterator (#2674) +- **go/adbc/driver/snowflake**: implement ability to set database options after initialization (#2728) +- **go/adbc/driver/snowflake**: try to suppress stray logs (#2608) +- **python/adbc_driver_postgresql**: handle kwargs in dbapi connect (#2700) +- **rust/core**: remove the Mutex around the FFI driver object (#2736) + +### Documentation Improvements + +- rework "What exactly is ADBC?" in FAQ (#2763) +- update implementation status table (#2580) +- **rust**: show driver_manager features on docs.rs (#2699) + +## ADBC Libraries 19 (2025-07-02) + +### Versions + +- C/C++/GLib/Go/Python/Ruby: 1.7.0 +- C#: 0.19.0 +- Java: 0.19.0 +- R: 0.19.0 +- Rust: 0.19.0 + +### Breaking Changes + +- ⚠️ **rust**: Let immutable drivers create connections (#2788) + +### New Features + +- **c/driver/postgresql**: add read support for int2vector (#2919) +- **c/driver_manager**: add new function to allow loading by manifest (#2918) +- **csharp**: Sanitize thrift protocol generated code for Databricks driver (#2787) +- **csharp/src/Apache.Arrow.Adbc**: OpenTelemetry tracing baseline (#2847) +- **csharp/src/Drivers/Apache**: Add escape underscore parameter to metadata command (#2920) +- **csharp/src/Drivers/Apache**: Add support for Sasl transport in Hive and Impala ADBC Driver (#2822) +- **csharp/src/Drivers/Apache**: Format HiveServer2Exception messages (#2934) +- **csharp/src/Drivers/Apache**: Implement GetColumnsExtended metadata for Databricks (#2766) +- **csharp/src/Drivers/Apache**: Implement Standard SSL mode for Impala (#2745) +- **csharp/src/Drivers/Databricks**: Add cloud fetch heartbeat polling tests (#2898) +- **csharp/src/Drivers/Databricks**: Add configurable multiple catalog support (#2845) +- **csharp/src/Drivers/Databricks**: Allow configurable auth scope for client-credentials flow (#2803) +- **csharp/src/Drivers/Databricks**: BaseDatabricksReader (#2842) +- **csharp/src/Drivers/Databricks**: Databricks Proxy Configurator (#2789) +- **csharp/src/Drivers/Databricks**: Default catalog + schema support (#2806) +- **csharp/src/Drivers/Databricks**: Default catalogs edge cases (#2896) +- **csharp/src/Drivers/Databricks**: Fix for older DBR versions incorrect ResultFormat (#3020) +- **csharp/src/Drivers/Databricks**: Fix initial catalog typo (#3057) +- **csharp/src/Drivers/Databricks**: Fix status polling test (#2838) +- **csharp/src/Drivers/Databricks**: Fixes to heartbeat polling (#2851) +- **csharp/src/Drivers/Databricks**: Handle legacy SPARK catalog (#2884) +- **csharp/src/Drivers/Databricks**: Implement CloudFetchUrlManager to handle presigned URL expiration in CloudFetch (#2855) +- **csharp/src/Drivers/Databricks**: Integrate OAuthClientCredentialsProvider with Databricks Driver (#2762) +- **csharp/src/Drivers/Databricks**: Integrate ProxyConfigurations with Drivers (#2794) +- **csharp/src/Drivers/Databricks**: Multiple catalogs with default database (#2921) +- **csharp/src/Drivers/Databricks**: OAuthClientCredentialsProvider test improvements (#2799) +- **csharp/src/Drivers/Databricks**: Optimize GetColumnsExtendedAsync via DESC TABLE EXTENDED (#2953) +- **csharp/src/Drivers/Databricks**: Poll status to keep query alive (#2820) +- **csharp/src/Drivers/Databricks**: Primary Key and Foreign Key Metadata Optimization (#2886) +- **csharp/src/Drivers/Databricks**: Protocol feature negotiator (#2985) +- **glib**: Add gadbc_database_set_load_flags() (#3041) +- **go/adbc**: prototype OpenTelemetry trace file exporter in go driver (#2729) +- **go/adbc/driver**: initial tracing instrumentation for Snowflake driver (#2825) +- **go/adbc/driver/flightsql**: add SSL root certs to oauth (#2829) +- **go/adbc/driver/snowflake**: New setting to set the maximum timestamp precision to microseconds (#2917) +- **go/adbc/drivermgr**: Set default load flags for drivermgr to load manifests (#3021) +- **python/adbc_driver_manager**: Update python driver_manager to load manifests (#3018) +- **python/adbc_driver_manager**: accept pathlib.Path in Database (#3035) +- **python/adbc_driver_manager**: simplify autocommit=True (#2990) +- **python/adbc_driver_manager**: support more APIs sans PyArrow (#2839) +- **r/adbcdrivermanager**: Add load by manifest to adbcdrivermanager (#3036) + +### Bugfixes + +- **.github/workflows**: update windows os version for csharp workflow (#2950) +- **c**: Ignore dl dependency on Windows with Meson (#2848) +- **c**: enable linking to static builds (#2738) +- **c/driver/postgresql**: ingest zoned timestamp as WITH TIME ZONE (#2904) +- **c/validation**: Use disabler pattern for validation_dep in Meson (#2849) +- **csharp/src/Drivers**: Add FK_NAME and KEQ_SEQ fields to GetColumnsExtended and improve type handling (#2959) +- **csharp/src/Drivers**: Enhance pattern wildcard escaping in metadata operations (#2960) +- **csharp/src/Drivers/Apache**: improve metadata query handling (#2926) +- **csharp/src/Drivers/Apache/Hive2**: Add ServerProtocolVersion in Connection (#2948) +- **csharp/src/Drivers/Apache/Hive2**: improve foreign key handling in GetColumnsExtended (#2894) +- **csharp/src/Drivers/BigQuery**: Fix the bug about QueryResultsOptions and script statement (#2796) +- **csharp/src/Drivers/BigQuery**: Fix the bug about large result and timeout (#2810) +- **csharp/src/Drivers/BigQuery**: Prevent callers from attempting to use the public project ID to create query jobs (#2966) +- **csharp/src/Drivers/BigQuery**: TIME should be Time64Type.Microsecond (#2741) +- **csharp/src/Drivers/Databricks**: Align ConnectionTimeout with TemporarilyUnavailableRetryTimeout (#3073) +- **csharp/src/Drivers/Databricks**: Fix parsing of lowercase server side properties (#2885) +- **csharp/src/Drivers/Databricks**: Remove redundant statement close operation (#2952) +- **csharp/src/Drivers/Databricks**: increase default retry timeout (#2925) +- **docs**: Add `cmake --build` step (#2840) +- **docs**: update go install command in flight sql recipe (#2798) +- **go/adbc**: adding back compatibility for function FlightSQLDriverInit (#3056) +- **go/adbc/driver**: inject version to built Go drivers (#2916) +- **go/adbc/driver/internal/driverbase**: Ensure to propagate the traceParent from the driver/database to connection (#2951) +- **go/adbc/driver/internal/driverbase**: fix missing interface func (#3009) +- **go/adbc/driver/snowflake**: Adjust the precision of the Timestamp values in the JSON-only path (#2965) +- **go/adbc/driver/snowflake**: Boolean columns return as string types in an empty recordset schema (#2854) +- **go/adbc/driver/snowflake**: fix copy concurrency 0 (#2805) +- **go/adbc/driver/snowflake**: set log level to not spam console (#2807) +- **python/adbc_driver_manager**: don't leak array streams (#2922) +- **r/adbcbigquery, r/adbcflightsql, r/adbcsnowflake**: fix warnings on R CMD check for Go-based drivers (#3061) +- **r/adbcsnowflake**: Fix configuration of adbcsnowflake when configure is run from a git shell (#2771) +- **r/adbcsqlite**: Don't print results of compilation failure when checking for extension support (#3003) +- ⚠️ **rust**: Let immutable drivers create connections (#2788) +- **rust**: fix `package.rust-version` fields to match to MSRV (#2997) +- **rust/core**: URLs pointing to `adbc.h` in docs (#2883) +- **rust/core**: use $crate so driver export works outside crate (#2808) + +### Documentation Improvements + +- acknowledge Rust's existence (#3083) +- add USE_COPY recipe and update PostgreSQL docs (#2859) +- add installation of basic Python dependencies to CONTRIBUTING.md (#3053) +- add missing mention of Python in supported languages (#2853) +- describe Snowflake URI creation (#3062) +- fix target name for c/c++ quickstart (#2906) +- fix typo (#2862) +- fix typo (#2888) +- fix typo in docs/source/driver/jdbc.rst (#3081) +- show a warning banner when viewing old/dev docs (#2860) +- update CONTRIBUTING to use mamba consistently (#2995) +- update installation commands for R (#3060) +- update jdbc.rst to fix dependency artifact ID mismatch (#2976) +- **rust**: add ADBC_SNOWFLAKE_GO_LIB_DIR requirement (#2984) +- **rust**: add protobuf requirement (#2964) + +## ADBC Libraries 20 (2025-09-09) + +### Versions + +- C/C++/GLib/Go/Python/Ruby: 1.8.0 +- C#: 0.20.0 +- Java: 0.20.0 +- R: 0.20.0 +- Rust: 0.20.0 + +### Breaking Changes + +- ⚠️ **rust**: not to mark some enums as `non_exhaustive` (#3245) +- ⚠️ **rust/core**: move the driver_manager feature to the new adbc_driver_manager package (#3197) +- ⚠️ **rust/core**: move the ffi related stuff to the new adbc_ffi package (#3381) +- ⚠️ **rust/driver/datafusion**: update to datafusion 48 (#3167) + +### New Features + +- **c/driver/postgresql**: bind arrow.json to JSON (#3333) +- **c/driver/sqlite, python/adbc_driver_manager**: bind params by name (#3362) +- **c/driver_manager**: don't ignore invalid manifests (#3399) +- **c/driver_manager**: improve error reporting for manifests (#3386) +- **c/driver_manager, rust/driver_manager**: add manifest version check (#3393) +- **c/driver_manager, rust/driver_manager**: handle virtual environments in driver manager (#3320) +- **csharp/src**: Add support for adding and configuring OTel exporters (#2949) +- **csharp/src/Apache.Arrow.Adbc/Tracing**: allow ActivitySource tags to be set from TracingConnection (#3218) +- **csharp/src/Drivers**: update drivers to .NET 8 (#3120) +- **csharp/src/Drivers/Apache**: Add compression support for Spark, Impala & Hive Http Connection (#3127) +- **csharp/src/Drivers/Apache**: Enabled Standard protocol for Spark and used SASL transport with basic auth (#3380) +- **csharp/src/Drivers/Apache**: Implement protocol fallback mechanism to support old server version of Spark & Hive (#3312) +- **csharp/src/Drivers/Apache**: Implement self signed ssl certificate validation for Spark, Impala & Hive (#3224) +- **csharp/src/Drivers/Apache**: add env variable config override for databricks (#3304) +- **csharp/src/Drivers/Apache**: add support for Statement.Cancel (#3302) +- **csharp/src/Drivers/BigQuery**: Enhanced tracing and large resultset improvements (#3022) +- **csharp/src/Drivers/Databricks**: Add W3C trace context (#3082) +- **csharp/src/Drivers/Databricks**: Fix EnablePkFk (#3098) +- **csharp/src/Drivers/Databricks**: Fix StatementTimeoutTest (#3133) +- **csharp/src/Drivers/Databricks**: Move DescribeTableExtended to version negotiator (#3137) +- **csharp/src/Drivers/Databricks**: Remove redundant CloseOperation for GetColumnsAsync (#3132) +- **csharp/src/Drivers/Databricks**: Remove redundant closeoperation (#3093) +- **csharp/src/Drivers/Databricks**: Use ArrowSchema for Response Schema (#3140) +- **csharp/test/Drivers/Databricks**: Add mandatory token exchange (#3192) +- **csharp/test/Drivers/Databricks**: Enable RunAsync option in TExecuteStatementReq (#3171) +- **csharp/test/Drivers/Databricks**: Support token refresh to extend connection lifetime (#3177) +- **glib**: add AdbcStatementGetParameterSchema() bindings (#3118) +- **go/adbc**: add GetDriverInfo helper (#3239) +- **go/adbc**: add IngestStream helper for one-call ingestion and add TestIngestStream (#3150) +- **go/adbc/driver/bigquery**: Add "adbc.bigquery.sql.location" param (#3280) +- **go/adbc/driver/bigquery**: error if we lack readSessionUser (#3297) +- **go/adbc/driver/bigquery**: support service account impersonation (#3174) +- **go/adbc/driver/snowflake**: Enable PAT and WIF auth (#3366) +- **go/adbc/sqldriver**: handle timestamp/time.Time values for input (#3109) +- **java/driver/jni**: enable new load flags (#3373) +- **java/driver/jni**: implement parameter binding (#3370) +- **java/driver/jni**: pass through all initial params (#3372) +- **ruby**: don't use adbc-arrow-glib (#3221) +- **rust/core**: add function to load driver manifests (#3099) +- **rust/driver/snowflake**: add `pat` and `wif` auth types (#3376) + +### Bugfixes + +- **c/driver_manager**: add `drivers` subdir in search paths (#3375) +- **c/driver_manager**: fix expected `;` for musl arch (#3105) +- **c/driver_manager**: modify SYSTEM path behavior on macOS (#3250) +- **c/driver_manager**: rename `ADBC_CONFIG_PATH` to `ADBC_DRIVER_PATH` (#3379) +- **c/driver_manager**: use Driver.entrypoint as per docs (#3242) +- **c/driver_manager, rust/driver_manager**: establish standard platform tuples (#3313) +- **csharp/src/Apache.Arrow.Adbc/C**: Stop trying to unload dynamic libraries (#3291) +- **csharp/src/Drivers**: Fix cloud fetch cancel/timeout mechanism (#3285) +- **csharp/src/Drivers/Apache**: generate type-consistent empty result for GetColumnsExtended query (#3096) +- **csharp/src/Drivers/Apache/Hive2**: Remove unnecessary CloseOperation in Statement.Dispose when query is metadata query (#3189) +- **csharp/src/Drivers/Apache/Hive2**: add check to see if operation is already closed (#3301) +- **csharp/src/Drivers/Apache/Spark**: fix column metadata index offset for Spark standard (#3392) +- **csharp/src/Drivers/BigQuery**: Adjust default dataset id (#3187) +- **csharp/src/Drivers/BigQuery**: Include try/catch for InvalidOperationException in ReadRowsStream (#3361) +- **csharp/src/Drivers/BigQuery**: Modify ReadChunk behavior (#3323) +- **csharp/src/Drivers/BigQuery**: add details for retried error message (#3244) +- **csharp/src/Drivers/Databricks**: Add another fallback check of GetColumnsExtendedAsync (#3219) +- **csharp/src/Drivers/Databricks**: Add instructions about driver config setup (#3367) +- **csharp/src/Drivers/Databricks**: Change fallback check of Databricks.GetColumnsExtendedAsync (#3121) +- **csharp/src/Drivers/Databricks**: Correct DatabricksCompositeReader and StatusPoller to Stop/Dispose Appropriately (#3217) +- **csharp/src/Drivers/Databricks**: DatabricksCompositeReader unit tests (#3265) +- **csharp/src/Drivers/Databricks**: Fix Databricks readme (#3365) +- **csharp/src/Drivers/Databricks**: Fix null pointer exception (#3261) +- **csharp/src/Drivers/Databricks**: PECO-2562 Use "default" schema in open session request (#3359) +- **csharp/src/Drivers/Databricks**: Reader Refactors (#3254) +- **csharp/src/Drivers/Databricks**: Set GetObjectsPatternsRequireLowerCase true (#3131) +- **csharp/src/Drivers/Databricks**: Set enable_run_async_thrift default true (#3232) +- **csharp/src/Drivers/Databricks**: Set the SqlState of the exception in RetryHttpHandler (#3092) +- **csharp/src/Drivers/Databricks**: Use default result persistence mode (#3203) +- **csharp/src/Drivers/Databricks**: [PECO-2396] Fix timestamp for dbr 6.6 - Set timestamp configuration on OpenSessionReq (#3327) +- **csharp/src/Drivers/Databricks**: correct tracing instrumentation for assembly name and version (#3170) +- **csharp/src/Drivers/Databricks**: fix CloudFetchResultFetcher initial results processing logic (#3097) +- **csharp/test/Drivers**: Fix databricks tests (#3358) +- **csharp/test/Drivers/Databricks**: Change the default QueryTimeoutSeconds to 3 hours (#3175) +- **csharp/test/Drivers/Databricks**: Enrich RetryHttpHandler with other status codes (#3186) +- **csharp/test/Drivers/Databricks**: Fix Pkfk Testcase (#3193) +- **csharp/test/Drivers/Databricks**: Run token exchange in a background task (#3188) +- **go/adbc**: Forward SQLSTATE and vendor code (#2801) +- **go/adbc**: changing the location of FlightSQLDriverInit function (#3079) +- **go/adbc/driver/bigquery**: accept old auth option value (#3317) +- **go/adbc/driver/bigquery**: fix parsing repeated records with nested fields (#3240) +- **go/adbc/driver/bigquery**: fix timestamp arrow type to use micro seconds (#3364) +- **go/adbc/driver/snowflake**: fix unit tests (#3377) +- **go/adbc/drivermgr**: properly vendor toml++ (#3138) +- **go/adbc/pkg**: Run make regenerate to keep generated code in sync with templates (#3202) +- **go/adbc/pkg**: add PowerShell option to run when executing in a Windows-based ADO pipeline (#3124) +- **java/driver/jni**: update AdbcDriverFactory metadata (#3348) +- **python/adbc_driver_bigquery**: correct string value of credential enum (#3091) +- **python/adbc_driver_manager**: handle empty params in executemany (#3332) +- **python/adbc_driver_manager**: mark calls with nogil (#3321) +- **rust/core**: fix build error on windows and enable ci for windows (#3148) +- **rust/driver_manager**: modify SYSTEM path behavior on macOS (#3252) + +### Documentation Improvements + +- Fix pip install command for arrow-adbc-nightlies (#3222) +- add Snowflake and BigQuery drivers to Python API reference (#3088) +- add docs for driver manifests (#3176) +- clarify relationship specification.rst to adbc.h (#3226) +- consistent use of `pushd` instead of `cd` in the contributing guide (#3089) +- fix invalid link in snowflake docs (#3246) +- fix safari rendering in manifest_load.mmd diagram (#3391) +- fix typo in python/adbc_driver_postgresql/README.md (#3194) +- generate driver status from README badges (#2890) +- improve go docs by adding a readme (#3204) +- link to AdbcDriverInitFunc in how_manager.rst (#3227) +- minor edits for first version of driver manager docs (#3180) +- minor improvements to driver_manifests.rst (#3394) +- organize Documentation steps of CONTRIBUTING.md (#3100) +- rework driver manager references across docs (#3388) +- **rust/core**: add simple usage of Driver Manager (#3086) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c9cbd0afb0..f177e04fcc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,6 +40,13 @@ $ mamba create -n adbc --file ci/conda_env_cpp.txt $ mamba activate adbc ``` +Also you can add dependencies to an existing environment as follows: + +```shell +$ mamba activate adbc +$ mamba install --file ci/conda_env_cpp.txt +``` + (For other Conda distributions, you will likely need `create ... -c conda-forge --file ...`). @@ -90,10 +97,11 @@ You can use CMake presets to build and test: ```shell $ mkdir build -$ cd build +$ pushd build $ cmake ../c --preset debug +$ cmake --build . # ctest reads presets from PWD -$ cd ../c +$ pushd ../c $ ctest --preset debug --test-dir ../build ``` @@ -102,7 +110,7 @@ postgres driver may be built together as follows: ```shell $ mkdir build -$ cd build +$ pushd build $ export CMAKE_EXPORT_COMPILE_COMMANDS=ON $ cmake ../c -DADBC_DRIVER_POSTGRESQL=ON -DADBC_DRIVER_MANAGER=ON $ make -j @@ -132,7 +140,7 @@ For example, to build and run tests for the SQLite3 driver: ```shell $ mkdir build -$ cd build +$ pushd build # You may need to set -DCMAKE_PREFIX_PATH such that googletest can be found $ cmake ../c -DADBC_BUILD_TESTS=ON -DADBC_DRIVER_SQLITE=ON $ make -j @@ -168,7 +176,7 @@ the form ``-D_option_:_value_``. For example, to build the a debug version of the SQLite3 driver along with tests, you would run: ```shell -$ meson configure -Dbuildtype=debug -Dsqlite=true -Dtests=true build +$ meson configure -Dbuildtype=debug -Dsqlite=enabled -Dtests=enabled build ``` With the options set, you can then compile the project. For most dependencies, @@ -190,7 +198,7 @@ $ meson test -C build Make sure [.NET Core is installed](https://dotnet.microsoft.com/en-us/download). ```shell -$ cd csharp +$ pushd csharp $ dotnet build ``` @@ -202,8 +210,8 @@ A list of dependencies for Conda (conda-forge) is included, and can be used as follows: ```shell -$ conda create -n adbc -c conda-forge --file ci/conda_env_docs.txt -$ conda activate adbc +$ mamba create -n adbc --file ci/conda_env_docs.txt +$ mamba activate adbc # Mermaid must be installed separately # While "global", it will end up in your Conda environment $ npm install -g @mermaid-js/mermaid-cli @@ -212,33 +220,58 @@ $ npm install -g @mermaid-js/mermaid-cli To build the HTML documentation: ```shell -$ pushd c/apidoc -$ doxygen -$ popd - -# Optionally: to also build the Python documentation -$ pushd python/adbc_driver_manager -$ pip install -e .[test] -$ popd - -$ cd docs +$ pushd docs $ make html ``` -The output can be found in `build/`. +The output can be found in `build/`. This does not generate API references +and results in some warnings, but it is not a problem if you're not working +with the API documentation. Some documentations are maintained as [Mermaid][mermaid] diagrams, which must be rendered and checked in. This can be done as follows: ```shell -cd docs -make -f mermaid.makefile -j all +$ pushd docs +$ make -f mermaid.makefile -j all # Check in the updated files ``` [mermaid]: https://mermaid.js.org/ [sphinx]: https://www.sphinx-doc.org/en/master/ +#### Building more complete documentation + +You can remove the warnings of `make html` and generate the Python API +reference as follows: + +```shell +$ mamba create -n adbc \ + --file ci/conda_env_docs.txt \ + --file ci/conda_env_cpp.txt \ + --file ci/conda_env_python.txt \ + --file ci/conda_env_java.txt +$ mamba activate adbc +$ env ADBC_USE_ASAN=0 ADBC_USE_UBSAN=0 ./ci/scripts/python_build.sh $(pwd) $(pwd)/build +$ pushd docs +$ make html +``` + +For a more complete build, you can use the following script: + +```shell +$ ./ci/scripts/docs_build.sh "$(pwd)" +``` + +This generates all available API references, and also runs doctests. + +To generate the R API reference, you need to run the following additionally: + +```shell +$ mamba install --file ci/conda_env_r.txt +$ ./ci/scripts/r_build.sh $(pwd) +``` + ### GLib The GLib bindings use the [Meson][meson] build system. @@ -260,8 +293,8 @@ A list of dependencies for Conda (conda-forge) is included, and can be used as follows: ```shell -$ conda create -n adbc -c conda-forge --file ci/conda_env_glib.txt -$ conda activate adbc +$ mamba create -n adbc --file ci/conda_env_glib.txt +$ mamba activate adbc ``` @@ -272,7 +305,7 @@ $ conda activate adbc Go libraries are a standard Go project. ```shell -$ cd go/adbc +$ pushd go/adbc $ go build -v ./... $ go test -v ./... ``` @@ -282,7 +315,7 @@ $ go test -v ./... The Java components are a standard Maven project. ```shell -$ cd java/ +$ pushd java/ # Build and run tests $ mvn clean install ``` @@ -316,6 +349,47 @@ mvn install -Perrorprone [checker-framework]: https://checkerframework.org/ [errorprone]: https://errorprone.info/ +#### JNI + +To build the JNI bridge, the native components must be built. + +``` +# Build the driver manager +export ADBC_BUILD_STATIC=ON +export ADBC_BUILD_TESTS=OFF +export ADBC_USE_ASAN=OFF +export ADBC_USE_UBSAN=OFF +export BUILD_ALL=OFF +export BUILD_DRIVER_MANAGER=ON +export BUILD_DRIVER_SQLITE=ON +./ci/scripts/cpp_build.sh $(pwd) $(pwd)/build $(pwd)/local + +# Ensure JAVA_HOME is set. +# If it's not set, you can find it like so: +# java -XshowSettings:properties -version 2>&1 >/dev/null | grep java.home + +# Build the JNI libraries +./ci/scripts/java_jni_build.sh $(pwd) $(pwd)/java/build $(pwd)/local +``` + +Now build the Java code with the `jni` Maven profile enabled. To run tests, +the SQLite driver must also be present in (DY)LD_LIBRARY_PATH. + +``` +export LD_LIBRARY_PATH=$(pwd)/local/lib +pushd java +mvn install -Pjni +popd +``` + +This will build a JAR with native libraries for a single platform. If the +native libraries are built for multiple platforms, they can all be copied to +appropriate paths in the resources directory to build a single JAR that works +across multiple platforms. + +You can also build and test in IntelliJ; simply edit the run/test +configuration to add `LD_LIBRARY_PATH` to the environment. + ### Python Python libraries are managed with [setuptools][setuptools]. See @@ -323,7 +397,8 @@ individual READMEs for additional dependencies. In general, that means all projects can be built as follows: ```shell -$ cd python/adbc_driver_manager +$ mamba install --file ci/conda_env_python.txt +$ pushd python/adbc_driver_manager $ pip install -e . ``` @@ -363,7 +438,7 @@ The Ruby libraries are bindings around the GLib libraries. The Rust components are a standard Rust project. ```shell -$ cd rust +$ pushd rust # Build and run tests $ cargo test ``` @@ -387,22 +462,34 @@ linters, formatters, and other analysis. For example: # Install pre-commit $ pip install pre-commit # or alternatively -$ conda install -c conda-forge --file ci/conda_env_dev.txt +$ mamba install --file ci/conda_env_dev.txt # Set up hooks $ pre-commit install # Run manually $ pre-commit run -Check Xml............................................(no files to check)Skipped -Check Yaml...........................................(no files to check)Skipped -Fix End of Files.....................................(no files to check)Skipped -Trim Trailing Whitespace.............................(no files to check)Skipped -clang-format.........................................(no files to check)Skipped -cmake-format.........................................(no files to check)Skipped -cpplint..............................................(no files to check)Skipped -Google Java Formatter................................(no files to check)Skipped -black................................................(no files to check)Skipped -flake8...............................................(no files to check)Skipped -isort................................................(no files to check)Skipped +check xml.................................................................(no files to check)Skipped +check yaml................................................................(no files to check)Skipped +fix end of files..........................................................(no files to check)Skipped +Mixed line endings (LF)...................................................(no files to check)Skipped +Mixed line endings (CRLF).................................................(no files to check)Skipped +trim trailing whitespace..................................................(no files to check)Skipped +clang-format..............................................................(no files to check)Skipped +cmake-format..............................................................(no files to check)Skipped +cpplint...................................................................(no files to check)Skipped +golangci-lint.............................................................(no files to check)Skipped +Go Formatter..............................................................(no files to check)Skipped +Google (or Palantir) Java Formatter.......................................(no files to check)Skipped +black.....................................................................(no files to check)Skipped +flake8....................................................................(no files to check)Skipped +isort.....................................................................(no files to check)Skipped +cython-lint...............................................................(no files to check)Skipped +Vala-Lint.................................................................(no files to check)Skipped +meson.....................................................................(no files to check)Skipped +Check for unapproved licenses.............................................(no files to check)Skipped +Ensure CGO adbc.h is syncd................................................(no files to check)Skipped +Ensure GitHub Actions and pre-commit hooks are pinned to a specific SHA...(no files to check)Skipped +rustfmt...................................................................(no files to check)Skipped +codespell.................................................................(no files to check)Skipped # Hooks automatically run on commit $ git commit ``` @@ -447,7 +534,7 @@ $ go install github.com/google/go-licenses@latest You can generate the LICENSE.txt with the following command: ```shell -$ cd go/adbc && go-licenses report ./... \ +$ pushd go/adbc && go-licenses report ./... \ --ignore github.com/apache/arrow-adbc/go/adbc \ --ignore github.com/apache/arrow/go/v11 \ --ignore github.com/apache/arrow/go/v12 \ diff --git a/LICENSE.txt b/LICENSE.txt index 0ef6328723..5022888af7 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -229,6 +229,31 @@ public domain, released using the CC0 1.0 Universal dedication (*). -------------------------------------------------------------------------------- +The files in c/vendor/toml++ contain code from + +https://github.com/marzer/tomlplusplus + +and have the following copyright notice: + +MIT License + +Copyright (c) Mark Gillard + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + The files python/*/*/_version.py and python/*/*/_static_version.py contain code from @@ -379,6 +404,42 @@ https://www.openssl.org/source/license.html distributions, like the Python wheels. SQLite is public domain. +-------------------------------------------------------------------------------- + +3rdparty dependency cloud.google.com/go +is statically linked in certain binary distributions, like the Python wheels. +cloud.google.com/go is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency cloud.google.com/go/auth +is statically linked in certain binary distributions, like the Python wheels. +cloud.google.com/go/auth is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency cloud.google.com/go/auth/oauth2adapt +is statically linked in certain binary distributions, like the Python wheels. +cloud.google.com/go/auth/oauth2adapt is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency cloud.google.com/go/bigquery +is statically linked in certain binary distributions, like the Python wheels. +cloud.google.com/go/bigquery is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency cloud.google.com/go/compute/metadata +is statically linked in certain binary distributions, like the Python wheels. +cloud.google.com/go/compute/metadata is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency cloud.google.com/go/iam +is statically linked in certain binary distributions, like the Python wheels. +cloud.google.com/go/iam is under the Apache-2.0 license. + -------------------------------------------------------------------------------- 3rdparty dependency github.com/99designs/keyring @@ -489,13 +550,12 @@ github.com/Azure/azure-sdk-for-go/sdk/storage/azblob is under the MIT license. SOFTWARE -------------------------------------------------------------------------------- -3rdparty dependency github.com/JohnCGriffin/overflow +3rdparty dependency github.com/BurntSushi/toml is statically linked in certain binary distributions, like the Python wheels. -github.com/JohnCGriffin/overflow is under the MIT license. - -MIT License +github.com/BurntSushi/toml is under the MIT license. +The MIT License (MIT) -Copyright (c) 2017 John C. Griffin, +Copyright (c) 2013 TOML authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -504,16 +564,16 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. -------------------------------------------------------------------------------- @@ -542,12 +602,6 @@ THE SOFTWARE. -------------------------------------------------------------------------------- -3rdparty dependency github.com/apache/thrift/lib/go/thrift -is statically linked in certain binary distributions, like the Python wheels. -github.com/apache/thrift/lib/go/thrift is under the Apache-2.0 license. - --------------------------------------------------------------------------------- - 3rdparty dependency github.com/aws/aws-sdk-go-v2 is statically linked in certain binary distributions, like the Python wheels. github.com/aws/aws-sdk-go-v2 is under the Apache-2.0 license. @@ -560,12 +614,24 @@ github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream is under the Apache-2.0 li -------------------------------------------------------------------------------- +3rdparty dependency github.com/aws/aws-sdk-go-v2/config +is statically linked in certain binary distributions, like the Python wheels. +github.com/aws/aws-sdk-go-v2/config is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + 3rdparty dependency github.com/aws/aws-sdk-go-v2/credentials is statically linked in certain binary distributions, like the Python wheels. github.com/aws/aws-sdk-go-v2/credentials is under the Apache-2.0 license. -------------------------------------------------------------------------------- +3rdparty dependency github.com/aws/aws-sdk-go-v2/feature/ec2/imds +is statically linked in certain binary distributions, like the Python wheels. +github.com/aws/aws-sdk-go-v2/feature/ec2/imds is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + 3rdparty dependency github.com/aws/aws-sdk-go-v2/feature/s3/manager is statically linked in certain binary distributions, like the Python wheels. github.com/aws/aws-sdk-go-v2/feature/s3/manager is under the Apache-2.0 license. @@ -584,6 +650,12 @@ github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 is under the Apache-2.0 licen -------------------------------------------------------------------------------- +3rdparty dependency github.com/aws/aws-sdk-go-v2/internal/ini +is statically linked in certain binary distributions, like the Python wheels. +github.com/aws/aws-sdk-go-v2/internal/ini is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + 3rdparty dependency github.com/aws/aws-sdk-go-v2/internal/sync/singleflight is statically linked in certain binary distributions, like the Python wheels. github.com/aws/aws-sdk-go-v2/internal/sync/singleflight is under the BSD-3-Clause license. @@ -654,6 +726,24 @@ github.com/aws/aws-sdk-go-v2/service/s3 is under the Apache-2.0 license. -------------------------------------------------------------------------------- +3rdparty dependency github.com/aws/aws-sdk-go-v2/service/sso +is statically linked in certain binary distributions, like the Python wheels. +github.com/aws/aws-sdk-go-v2/service/sso is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/aws/aws-sdk-go-v2/service/ssooidc +is statically linked in certain binary distributions, like the Python wheels. +github.com/aws/aws-sdk-go-v2/service/ssooidc is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/aws/aws-sdk-go-v2/service/sts +is statically linked in certain binary distributions, like the Python wheels. +github.com/aws/aws-sdk-go-v2/service/sts is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + 3rdparty dependency github.com/aws/smithy-go is statically linked in certain binary distributions, like the Python wheels. github.com/aws/smithy-go is under the Apache-2.0 license. @@ -721,6 +811,60 @@ THE SOFTWARE. -------------------------------------------------------------------------------- +3rdparty dependency github.com/cenkalti/backoff/v5 +is statically linked in certain binary distributions, like the Python wheels. +github.com/cenkalti/backoff/v5 is under the MIT license. +The MIT License (MIT) + +Copyright (c) 2014 Cenk Altı + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/cespare/xxhash/v2 +is statically linked in certain binary distributions, like the Python wheels. +github.com/cespare/xxhash/v2 is under the MIT license. +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + 3rdparty dependency github.com/davecgh/go-spew/spew is statically linked in certain binary distributions, like the Python wheels. github.com/davecgh/go-spew/spew is under the ISC license. @@ -769,17 +913,28 @@ SOFTWARE. -------------------------------------------------------------------------------- -3rdparty dependency github.com/form3tech-oss/jwt-go +3rdparty dependency github.com/felixge/httpsnoop is statically linked in certain binary distributions, like the Python wheels. -github.com/form3tech-oss/jwt-go is under the MIT license. -Copyright (c) 2012 Dave Grijalva +github.com/felixge/httpsnoop is under the MIT license. +Copyright (c) 2016 Felix Geisendörfer (felix@debuggable.com) -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. -------------------------------------------------------------------------------- @@ -788,7 +943,7 @@ is statically linked in certain binary distributions, like the Python wheels. github.com/gabriel-vasile/mimetype is under the MIT license. MIT License -Copyright (c) 2018-2020 Gabriel Vasile +Copyright (c) 2018 Gabriel Vasile Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -810,6 +965,18 @@ SOFTWARE. -------------------------------------------------------------------------------- +3rdparty dependency github.com/go-logr/logr +is statically linked in certain binary distributions, like the Python wheels. +github.com/go-logr/logr is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/go-logr/stdr +is statically linked in certain binary distributions, like the Python wheels. +github.com/go-logr/stdr is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + 3rdparty dependency github.com/goccy/go-json is statically linked in certain binary distributions, like the Python wheels. github.com/goccy/go-json is under the MIT license. @@ -868,22 +1035,37 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -3rdparty dependency github.com/golang/protobuf +3rdparty dependency github.com/golang-jwt/jwt/v5 is statically linked in certain binary distributions, like the Python wheels. -github.com/golang/protobuf is under the BSD-3-Clause license. -Copyright 2010 The Go Authors. All rights reserved. +github.com/golang-jwt/jwt/v5 is under the MIT license. +Copyright (c) 2012 Dave Grijalva +Copyright (c) 2021 golang-jwt maintainers + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/golang/snappy +is statically linked in certain binary distributions, like the Python wheels. +github.com/golang/snappy is under the BSD-3-Clause license. +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -899,13 +1081,24 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/google/flatbuffers/go +is statically linked in certain binary distributions, like the Python wheels. +github.com/google/flatbuffers/go is under the Apache-2.0 license. -------------------------------------------------------------------------------- -3rdparty dependency github.com/golang/snappy +3rdparty dependency github.com/google/s2a-go is statically linked in certain binary distributions, like the Python wheels. -github.com/golang/snappy is under the BSD-3-Clause license. -Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. +github.com/google/s2a-go is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/google/uuid +is statically linked in certain binary distributions, like the Python wheels. +github.com/google/uuid is under the BSD-3-Clause license. +Copyright (c) 2009,2014 Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -935,9 +1128,75 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -3rdparty dependency github.com/google/flatbuffers/go +3rdparty dependency github.com/googleapis/enterprise-certificate-proxy/client is statically linked in certain binary distributions, like the Python wheels. -github.com/google/flatbuffers/go is under the Apache-2.0 license. +github.com/googleapis/enterprise-certificate-proxy/client is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/googleapis/gax-go/v2 +is statically linked in certain binary distributions, like the Python wheels. +github.com/googleapis/gax-go/v2 is under the BSD-3-Clause license. +Copyright 2016, Google Inc. +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency github.com/grpc-ecosystem/grpc-gateway/v2 +is statically linked in certain binary distributions, like the Python wheels. +github.com/grpc-ecosystem/grpc-gateway/v2 is under the BSD-3-Clause license. +Copyright (c) 2015, Gengo, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Gengo, Inc. nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- @@ -966,13 +1225,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -3rdparty dependency github.com/jmespath/go-jmespath -is statically linked in certain binary distributions, like the Python wheels. -github.com/jmespath/go-jmespath is under the Apache-2.0 license. - -------------------------------------------------------------------------------- 3rdparty dependency github.com/klauspost/compress @@ -1310,10 +1562,82 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- +3rdparty dependency go.opentelemetry.io/auto/sdk +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/auto/sdk is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/exporters/otlp/otlptrace +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/exporters/otlp/otlptrace is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/exporters/stdout/stdouttrace +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/exporters/stdout/stdouttrace is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/metric +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/metric is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/sdk +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/sdk is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/otel/trace +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/otel/trace is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency go.opentelemetry.io/proto/otlp +is statically linked in certain binary distributions, like the Python wheels. +go.opentelemetry.io/proto/otlp is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + 3rdparty dependency golang.org/x/crypto is statically linked in certain binary distributions, like the Python wheels. golang.org/x/crypto is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1325,7 +1649,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1346,7 +1670,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/exp is statically linked in certain binary distributions, like the Python wheels. golang.org/x/exp is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1358,7 +1682,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1379,7 +1703,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/mod/semver is statically linked in certain binary distributions, like the Python wheels. golang.org/x/mod/semver is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1391,7 +1715,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1412,7 +1736,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/net is statically linked in certain binary distributions, like the Python wheels. golang.org/x/net is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1424,7 +1748,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1442,10 +1766,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -3rdparty dependency golang.org/x/sync/errgroup +3rdparty dependency golang.org/x/oauth2 is statically linked in certain binary distributions, like the Python wheels. -golang.org/x/sync/errgroup is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +golang.org/x/oauth2 is under the BSD-3-Clause license. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1457,7 +1781,40 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency golang.org/x/sync +is statically linked in certain binary distributions, like the Python wheels. +golang.org/x/sync is under the BSD-3-Clause license. +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1478,7 +1835,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/sys is statically linked in certain binary distributions, like the Python wheels. golang.org/x/sys is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1490,7 +1847,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1511,7 +1868,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/term is statically linked in certain binary distributions, like the Python wheels. golang.org/x/term is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1523,7 +1880,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1544,7 +1901,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/text is statically linked in certain binary distributions, like the Python wheels. golang.org/x/text is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1556,7 +1913,40 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency golang.org/x/time/rate +is statically linked in certain binary distributions, like the Python wheels. +golang.org/x/time/rate is under the BSD-3-Clause license. +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1577,7 +1967,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/tools is statically linked in certain binary distributions, like the Python wheels. golang.org/x/tools is under the BSD-3-Clause license. -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1589,7 +1979,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -1610,7 +2000,40 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3rdparty dependency golang.org/x/xerrors is statically linked in certain binary distributions, like the Python wheels. golang.org/x/xerrors is under the BSD-3-Clause license. -Copyright (c) 2019 The Go Authors. All rights reserved. +Copyright 2019 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency google.golang.org/api +is statically linked in certain binary distributions, like the Python wheels. +google.golang.org/api is under the BSD-3-Clause license. +Copyright (c) 2011 Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1640,9 +2063,54 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -3rdparty dependency google.golang.org/genproto/googleapis/rpc/status +3rdparty dependency google.golang.org/api/internal/third_party/uritemplates +is statically linked in certain binary distributions, like the Python wheels. +google.golang.org/api/internal/third_party/uritemplates is under the BSD-3-Clause license. +Copyright (c) 2013 Joshua Tacoma. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency google.golang.org/genproto/googleapis/api +is statically linked in certain binary distributions, like the Python wheels. +google.golang.org/genproto/googleapis/api is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency google.golang.org/genproto/googleapis/rpc +is statically linked in certain binary distributions, like the Python wheels. +google.golang.org/genproto/googleapis/rpc is under the Apache-2.0 license. + +-------------------------------------------------------------------------------- + +3rdparty dependency google.golang.org/genproto/googleapis/type/expr is statically linked in certain binary distributions, like the Python wheels. -google.golang.org/genproto/googleapis/rpc/status is under the Apache-2.0 license. +google.golang.org/genproto/googleapis/type/expr is under the Apache-2.0 license. -------------------------------------------------------------------------------- diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index be69103d06..bc6a8305d8 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -18,20 +18,35 @@ cmake_minimum_required(VERSION 3.18) get_filename_component(REPOSITORY_ROOT ".." ABSOLUTE) list(APPEND CMAKE_MODULE_PATH "${REPOSITORY_ROOT}/c/cmake_modules/") -include(AdbcDefines) -include(BuildUtils) +include(AdbcVersion) project(adbc VERSION "${ADBC_BASE_VERSION}" LANGUAGES C CXX) set(CMAKE_C_STANDARD 99) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +include(AdbcDefines) +include(BuildUtils) include(CTest) -add_subdirectory(vendor/fmt EXCLUDE_FROM_ALL) -set_target_properties(fmt PROPERTIES POSITION_INDEPENDENT_CODE ON) -add_subdirectory(vendor/nanoarrow) +set(ADBC_TARGET_COMPILE_DEFINITIONS) +if(NOT ADBC_DEFINE_COMMON_ENTRYPOINTS) + message(STATUS "Defining ADBC_NO_COMMON_ENTRYPOINTS") + set(ADBC_TARGET_COMPILE_DEFINITIONS "ADBC_NO_COMMON_ENTRYPOINTS") +endif() + +if(ADBC_WITH_VENDORED_FMT) + add_subdirectory(vendor/fmt EXCLUDE_FROM_ALL) + set_target_properties(fmt PROPERTIES POSITION_INDEPENDENT_CODE ON) +else() + find_package(fmt REQUIRED) +endif() +if(ADBC_WITH_VENDORED_NANOARROW) + add_subdirectory(vendor/nanoarrow) +else() + find_package(nanoarrow REQUIRED) +endif() add_subdirectory(driver/common) add_subdirectory(driver/framework) @@ -50,6 +65,8 @@ if(ADBC_INTEGRATION_DUCKDB) endif() if(ADBC_DRIVER_FLIGHTSQL) + install(FILES "${REPOSITORY_ROOT}/c/include/arrow-adbc/driver/flightsql.h" + DESTINATION include/arrow-adbc/driver) add_subdirectory(driver/flightsql) endif() @@ -61,18 +78,26 @@ if(ADBC_DRIVER_MANAGER) endif() if(ADBC_DRIVER_POSTGRESQL) + install(FILES "${REPOSITORY_ROOT}/c/include/arrow-adbc/driver/postgresql.h" + DESTINATION include/arrow-adbc/driver) add_subdirectory(driver/postgresql) endif() if(ADBC_DRIVER_SQLITE) + install(FILES "${REPOSITORY_ROOT}/c/include/arrow-adbc/driver/sqlite.h" + DESTINATION include/arrow-adbc/driver) add_subdirectory(driver/sqlite) endif() if(ADBC_DRIVER_SNOWFLAKE) + install(FILES "${REPOSITORY_ROOT}/c/include/arrow-adbc/driver/snowflake.h" + DESTINATION include/arrow-adbc/driver) add_subdirectory(driver/snowflake) endif() if(ADBC_DRIVER_BIGQUERY) + install(FILES "${REPOSITORY_ROOT}/c/include/arrow-adbc/driver/bigquery.h" + DESTINATION include/arrow-adbc/driver) add_subdirectory(driver/bigquery) endif() diff --git a/c/cmake_modules/AdbcDefines.cmake b/c/cmake_modules/AdbcDefines.cmake index 6c83cca54c..11fe1a5d2a 100644 --- a/c/cmake_modules/AdbcDefines.cmake +++ b/c/cmake_modules/AdbcDefines.cmake @@ -20,9 +20,12 @@ enable_language(C CXX) +if(${CMAKE_VERSION} VERSION_GREATER "3.24") + cmake_policy(SET CMP0135 NEW) +endif() + set(BUILD_SUPPORT_DIR "${REPOSITORY_ROOT}/ci/build_support") -include(AdbcVersion) include(CheckLinkerFlag) include(DefineOptions) include(GNUInstallDirs) # Populates CMAKE_INSTALL_INCLUDEDIR @@ -92,8 +95,12 @@ if(MSVC) add_compile_options(/wd4711) # Don't warn about padding added after members add_compile_options(/wd4820) + # Don't warn about enforcing left-to-right evaluation order for operator[] + add_compile_options(/wd4866) add_compile_options(/wd5027) + add_compile_options(/wd5039) add_compile_options(/wd5045) + add_compile_options(/wd5246) elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") @@ -134,8 +141,8 @@ else() endif() macro(adbc_configure_target TARGET) - target_compile_options(${TARGET} - PRIVATE ${ADBC_C_CXX_FLAGS_${ADBC_BUILD_WARNING_LEVEL}}) + target_compile_options(${TARGET} PRIVATE ${ADBC_C_CXX_FLAGS_${ADBC_BUILD_WARNING_LEVEL}} + ${ADBC_CXXFLAGS}) endmacro() # Common testing setup diff --git a/c/cmake_modules/AdbcVersion.cmake b/c/cmake_modules/AdbcVersion.cmake index 4b0c3147ed..91d64f4aa9 100644 --- a/c/cmake_modules/AdbcVersion.cmake +++ b/c/cmake_modules/AdbcVersion.cmake @@ -21,7 +21,7 @@ # ------------------------------------------------------------ # Version definitions -set(ADBC_VERSION "1.3.0-SNAPSHOT") +set(ADBC_VERSION "1.9.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ADBC_BASE_VERSION "${ADBC_VERSION}") string(REPLACE "." ";" _adbc_version_list "${ADBC_BASE_VERSION}") list(GET _adbc_version_list 0 ADBC_VERSION_MAJOR) diff --git a/c/cmake_modules/BuildUtils.cmake b/c/cmake_modules/BuildUtils.cmake index 88209ac5e6..293b18d161 100644 --- a/c/cmake_modules/BuildUtils.cmake +++ b/c/cmake_modules/BuildUtils.cmake @@ -67,12 +67,6 @@ function(add_thirdparty_lib LIB_NAME LIB_TYPE LIB) endif() endfunction() -function(REUSE_PRECOMPILED_HEADER_LIB TARGET_NAME LIB_NAME) - if(ADBC_USE_PRECOMPILED_HEADERS) - target_precompile_headers(${TARGET_NAME} REUSE_FROM ${LIB_NAME}) - endif() -endfunction() - function(arrow_install_cmake_package PACKAGE_NAME EXPORT_NAME) set(CONFIG_CMAKE "${PACKAGE_NAME}Config.cmake") set(BUILT_CONFIG_CMAKE "${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_CMAKE}") @@ -85,11 +79,13 @@ function(arrow_install_cmake_package PACKAGE_NAME EXPORT_NAME) set(INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PACKAGE_NAME}") install(FILES "${BUILT_CONFIG_CMAKE}" "${BUILT_CONFIG_VERSION_CMAKE}" DESTINATION "${INSTALL_CMAKEDIR}") - set(TARGETS_CMAKE "${PACKAGE_NAME}Targets.cmake") - install(EXPORT ${EXPORT_NAME} - DESTINATION "${INSTALL_CMAKEDIR}" - NAMESPACE "${PACKAGE_NAME}::" - FILE "${TARGETS_CMAKE}") + if(EXPORT_NAME) + set(TARGETS_CMAKE "${PACKAGE_NAME}Targets.cmake") + install(EXPORT ${EXPORT_NAME} + DESTINATION "${INSTALL_CMAKEDIR}" + NAMESPACE "${PACKAGE_NAME}::" + FILE "${TARGETS_CMAKE}") + endif() endfunction() # \arg OUTPUTS list to append built targets to @@ -100,11 +96,9 @@ function(ADD_ARROW_LIB LIB_NAME) BUILD_STATIC CMAKE_PACKAGE_NAME PKG_CONFIG_NAME - SHARED_LINK_FLAGS - PRECOMPILED_HEADER_LIB) + SHARED_LINK_FLAGS) set(multi_value_args SOURCES - PRECOMPILED_HEADERS OUTPUTS STATIC_LINK_LIBS SHARED_LINK_LIBS @@ -170,12 +164,6 @@ function(ADD_ARROW_LIB LIB_NAME) if(ARG_DEPENDENCIES) add_dependencies(${LIB_NAME}_objlib ${ARG_DEPENDENCIES}) endif() - if(ARG_PRECOMPILED_HEADER_LIB) - reuse_precompiled_header_lib(${LIB_NAME}_objlib ${ARG_PRECOMPILED_HEADER_LIB}) - endif() - if(ARG_PRECOMPILED_HEADERS AND ADBC_USE_PRECOMPILED_HEADERS) - target_precompile_headers(${LIB_NAME}_objlib PRIVATE ${ARG_PRECOMPILED_HEADERS}) - endif() set(LIB_DEPS $) set(LIB_INCLUDES) set(EXTRA_DEPS) @@ -211,17 +199,13 @@ function(ADD_ARROW_LIB LIB_NAME) if(BUILD_SHARED) add_library(${LIB_NAME}_shared SHARED ${LIB_DEPS}) - target_compile_features(${LIB_NAME}_shared PRIVATE cxx_std_11) + target_compile_features(${LIB_NAME}_shared PRIVATE cxx_std_17) set_property(TARGET ${LIB_NAME}_shared PROPERTY CXX_STANDARD_REQUIRED ON) adbc_configure_target(${LIB_NAME}_shared) if(EXTRA_DEPS) add_dependencies(${LIB_NAME}_shared ${EXTRA_DEPS}) endif() - if(ARG_PRECOMPILED_HEADER_LIB) - reuse_precompiled_header_lib(${LIB_NAME}_shared ${ARG_PRECOMPILED_HEADER_LIB}) - endif() - if(ARG_OUTPUTS) list(APPEND ${ARG_OUTPUTS} ${LIB_NAME}_shared) endif() @@ -310,16 +294,12 @@ function(ADD_ARROW_LIB LIB_NAME) if(BUILD_STATIC) add_library(${LIB_NAME}_static STATIC ${LIB_DEPS}) target_compile_features(${LIB_NAME}_static PRIVATE cxx_std_11) - set_property(TARGET ${LIB_NAME}_shared PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET ${LIB_NAME}_static PROPERTY CXX_STANDARD_REQUIRED ON) adbc_configure_target(${LIB_NAME}_static) if(EXTRA_DEPS) add_dependencies(${LIB_NAME}_static ${EXTRA_DEPS}) endif() - if(ARG_PRECOMPILED_HEADER_LIB) - reuse_precompiled_header_lib(${LIB_NAME}_static ${ARG_PRECOMPILED_HEADER_LIB}) - endif() - if(ARG_OUTPUTS) list(APPEND ${ARG_OUTPUTS} ${LIB_NAME}_static) endif() @@ -548,10 +528,8 @@ endfunction() # names must exist function(ADD_TEST_CASE REL_TEST_NAME) set(options NO_VALGRIND ENABLED) - set(one_value_args PRECOMPILED_HEADER_LIB) set(multi_value_args SOURCES - PRECOMPILED_HEADERS STATIC_LINK_LIBS EXTRA_LINK_LIBS EXTRA_INCLUDES @@ -611,14 +589,6 @@ function(ADD_TEST_CASE REL_TEST_NAME) target_link_libraries(${TEST_NAME} PRIVATE ${ADBC_TEST_LINK_LIBS}) endif() - if(ARG_PRECOMPILED_HEADER_LIB) - reuse_precompiled_header_lib(${TEST_NAME} ${ARG_PRECOMPILED_HEADER_LIB}) - endif() - - if(ARG_PRECOMPILED_HEADERS AND ADBC_USE_PRECOMPILED_HEADERS) - target_precompile_headers(${TEST_NAME} PRIVATE ${ARG_PRECOMPILED_HEADERS}) - endif() - if(ARG_EXTRA_LINK_LIBS) target_link_libraries(${TEST_NAME} PRIVATE ${ARG_EXTRA_LINK_LIBS}) endif() diff --git a/c/cmake_modules/DefineOptions.cmake b/c/cmake_modules/DefineOptions.cmake index 13e6757347..759a8be407 100644 --- a/c/cmake_modules/DefineOptions.cmake +++ b/c/cmake_modules/DefineOptions.cmake @@ -98,34 +98,13 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ADBC_BUILD_SHARED "Build shared libraries" ON) - define_option_string(ADBC_PACKAGE_KIND - "Arbitrary string that identifies the kind of package;\ -(for informational purposes)" "") - define_option_string(ADBC_GIT_ID "The Arrow git commit id (if any)" "") define_option_string(ADBC_GIT_DESCRIPTION "The Arrow git commit description (if any)" "") - define_option(ADBC_NO_DEPRECATED_API "Exclude deprecated APIs from build" OFF) - define_option(ADBC_USE_CCACHE "Use ccache when compiling (if available)" ON) - define_option(ADBC_USE_PRECOMPILED_HEADERS "Use precompiled headers when compiling" OFF) - - # Arm64 architectures and extensions can lead to exploding combinations. - # So set it directly through cmake command line. - # - # If you change this, you need to change the definition in - # python/CMakeLists.txt too. - define_option_string(ADBC_ARMV8_ARCH - "Arm64 arch and extensions" - "armv8-a" # Default - "armv8-a" - "armv8-a+crc+crypto") - - define_option(ADBC_ALTIVEC "Build with Altivec if compiler has support" ON) - define_option(ADBC_RPATH_ORIGIN "Build Arrow libraries with RATH set to \$ORIGIN" OFF) define_option(ADBC_INSTALL_NAME_RPATH @@ -133,6 +112,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ADBC_GGDB_DEBUG "Pass -ggdb flag to debug builds" ON) + define_option(ADBC_WITH_VENDORED_FMT "Use vendored copy of fmt" ON) + + define_option(ADBC_WITH_VENDORED_NANOARROW "Use vendored copy of nanoarrow" ON) + #---------------------------------------------------------------------- set_option_category("Test and benchmark") @@ -156,6 +139,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "shared" "static") + define_option(ADBC_DRIVER_MANAGER_TEST_MANIFEST_USER_LEVEL + "Build driver manager manifest user-level tests" OFF) + + define_option(ADBC_DRIVER_MANAGER_TEST_MANIFEST_SYSTEM_LEVEL + "Build driver manager manifest system-level tests" OFF) + #---------------------------------------------------------------------- set_option_category("Lint") @@ -226,7 +215,11 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") #---------------------------------------------------------------------- set_option_category("Advanced developer") - option(ADBC_BUILD_CONFIG_SUMMARY_JSON "Summarize build configuration in a JSON file" ON) + define_option(ADBC_BUILD_CONFIG_SUMMARY_JSON + "Summarize build configuration in a JSON file" ON) + + define_option(ADBC_DEFINE_COMMON_ENTRYPOINTS + "Define the Adbc functions in static/shared driver libraries" ON) #---------------------------------------------------------------------- set_option_category("Project components") diff --git a/c/cmake_modules/GoUtils.cmake b/c/cmake_modules/GoUtils.cmake index 085d46fefd..a0e2ccef75 100644 --- a/c/cmake_modules/GoUtils.cmake +++ b/c/cmake_modules/GoUtils.cmake @@ -18,6 +18,53 @@ find_program(GO_BIN "go" REQUIRED) message(STATUS "Detecting Go executable: Found ${GO_BIN}") +set(ADBC_GO_PACKAGE_INIT + [=[ +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if(_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif() + +function(adbc_add_shared_library target_name base_name) + set(shared_base_name + "${CMAKE_SHARED_LIBRARY_PREFIX}${base_name}${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(prefix "${_IMPORT_PREFIX}/${ADBC_INSTALL_LIBDIR}") + add_library(${target_name} SHARED IMPORTED) + if(WINDOWS) + set(import_base_name + "${CMAKE_IMPORT_LIBRARY_PREFIX}${base_name}${CMAKE_IMPORT_LIBRARY_SUFFIX}") + set_target_properties(${target_name} + PROPERTIES + IMPORTED_IMPLIB "${prefix}/${import_base_name}" + IMPORTED_LOCATION "${_IMPORT_PREFIX}/bin/${shared_base_name}") + else() + set_target_properties(${target_name} + PROPERTIES + IMPORTED_LOCATION "${prefix}/${shared_base_name}.${ADBC_FULL_SO_VERSION}" + IMPORTED_SONAME "${prefix}/${shared_base_name}.${ADBC_SO_VERSION}") + endif() +endfunction() + +function(adbc_add_static_library target_name base_name) + set(static_base_name + "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${CMAKE_STATIC_LIBRARY_SUFFIX}") + add_library(${target_name} STATIC IMPORTED) + if(WINDOWS) + set_target_properties(${target_name} + PROPERTIES + IMPORTED_LOCATION "${_IMPORT_PREFIX}/bin/${static_base_name}") + else() + set(prefix "${_IMPORT_PREFIX}/${ADBC_INSTALL_LIBDIR}") + set_target_properties(${target_name} + PROPERTIES + IMPORTED_LOCATION "${prefix}/${static_base_name}") + endif() +endfunction() +]=]) + function(add_go_lib GO_MOD_DIR GO_LIBNAME) set(options) set(one_value_args @@ -27,7 +74,7 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME) PKG_CONFIG_NAME BUILD_STATIC BUILD_SHARED) - set(multi_value_args SOURCES OUTPUTS) + set(multi_value_args SOURCES DEFINES OUTPUTS) cmake_parse_arguments(ARG "${options}" @@ -84,14 +131,21 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME) endif() # Go gcflags for disabling optimizations and inlining if debug - separate_arguments(GO_BUILD_FLAGS NATIVE_COMMAND - "${GO_BUILD_FLAGS} $<$:-gcflags=\"-N -l\">") + separate_arguments(GO_BUILD_FLAGS + NATIVE_COMMAND + "${GO_BUILD_FLAGS} -buildvcs=true $<$:-gcflags=\"-N -l\">" + ) # if we're building debug mode then change the default CGO_CFLAGS and CGO_CXXFLAGS from "-g O2" to "-g3" - set(GO_ENV_VARS - "CGO_ENABLED=1 $<$:CGO_CFLAGS=-g3> $<$:CGO_CXXFLAGS=-g3>" - ) - separate_arguments(GO_ENV_VARS NATIVE_COMMAND "${GO_ENV_VARS}") + set(GO_FLAGS "$<$:-g3>") + foreach(DEFINE ${ARG_DEFINES}) + string(APPEND GO_FLAGS " -D${DEFINE}") + endforeach() + + set(GO_ENV_VARS) + list(APPEND GO_ENV_VARS "CGO_ENABLED=1") + list(APPEND GO_ENV_VARS "CGO_CFLAGS=${GO_FLAGS}") + list(APPEND GO_ENV_VARS "CGO_CXXFLAGS=${GO_FLAGS}") if(BUILD_SHARED) set(LIB_NAME_SHARED @@ -118,7 +172,9 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME) separate_arguments(ARG_SHARED_LINK_FLAGS NATIVE_COMMAND "${ARG_SHARED_LINK_FLAGS}") endif() - set(GO_LDFLAGS "-ldflags;\"${ARG_SHARED_LINK_FLAGS};-a;${EXTLDFLAGS}\"") + set(GO_LDFLAGS + "-ldflags;\"${ARG_SHARED_LINK_FLAGS};-X;github.com/apache/arrow-adbc/go/adbc/driver/internal/driverbase.infoDriverVersion=v${ADBC_VERSION};-a;${EXTLDFLAGS}\"" + ) set(LIBOUT_SHARED "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME_SHARED}") @@ -245,6 +301,10 @@ function(add_go_lib GO_MOD_DIR GO_LIBNAME) install(FILES "${LIBOUT_STATIC}" TYPE LIB) endif() + if(ARG_CMAKE_PACKAGE_NAME) + arrow_install_cmake_package(${ARG_CMAKE_PACKAGE_NAME} "") + endif() + if(ARG_PKG_CONFIG_NAME) arrow_add_pkg_config("${ARG_PKG_CONFIG_NAME}") endif() diff --git a/r/adbcbigquery/src/Makevars.win b/c/driver/bigquery/AdbcDriverBigQueryConfig.cmake.in similarity index 55% rename from r/adbcbigquery/src/Makevars.win rename to c/driver/bigquery/AdbcDriverBigQueryConfig.cmake.in index df9fb94b96..9573ecf0a4 100644 --- a/r/adbcbigquery/src/Makevars.win +++ b/c/driver/bigquery/AdbcDriverBigQueryConfig.cmake.in @@ -15,20 +15,27 @@ # specific language governing permissions and limitations # under the License. -PKG_CPPFLAGS=-I$(CURDIR) -DADBC_EXPORT="" -PKG_LIBS=-L$(CURDIR)/go -ladbc_driver_bigquery +@PACKAGE_INIT@ -CGO_CC = `"${R_HOME}/bin${R_ARCH_BIN}/R.exe" CMD config CC` -CGO_CXX = `"${R_HOME}/bin${R_ARCH_BIN}/R.exe" CMD config CXX` -CGO_CFLAGS = $(ALL_CPPFLAGS) -GO_BIN = $(CURDIR)/go/tmp/go/bin/go.exe +set(ADBC_BUILD_SHARED @ADBC_BUILD_SHARED@) +set(ADBC_BUILD_STATIC @ADBC_BUILD_STATIC@) +set(ADBC_FULL_SO_VERSION "@ADBC_FULL_SO_VERSION@") +set(ADBC_INSTALL_LIBDIR "@CMAKE_INSTALL_LIBDIR@") +set(ADBC_SO_VERSION "@ADBC_SO_VERSION@") +set(ADBC_VERSION "@ADBC_VERSION@") -.PHONY: all gostatic gobin -all: $(SHLIB) -$(SHLIB): gostatic +@ADBC_GO_PACKAGE_INIT@ -gostatic: gobin - (cd "$(CURDIR)/go/adbc"; CC="$(CGO_CC)" CXX="$(CGO_CXX)" CGO_CFLAGS="$(CGO_CFLAGS)" "$(GO_BIN)" build -v -tags driverlib -o $(CURDIR)/go/libadbc_driver_bigquery.a -buildmode=c-archive "./pkg/bigquery") +if(ADBC_BUILD_SHARED) + adbc_add_shared_library( + AdbcDriverBigQuery::adbc_driver_bigquery_shared + adbc_driver_bigquery) +endif() -gobin: - (cd ..; "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/download-go.R") +if(ADBC_BUILD_STATIC) + adbc_add_static_library( + AdbcDriverBigQuery::adbc_driver_bigquery_static + adbc_driver_bigquery) +endif() + +check_required_components(AdbcDriverBigQuery) diff --git a/c/driver/bigquery/CMakeLists.txt b/c/driver/bigquery/CMakeLists.txt index fe3937878a..7dc63b90bc 100644 --- a/c/driver/bigquery/CMakeLists.txt +++ b/c/driver/bigquery/CMakeLists.txt @@ -26,17 +26,20 @@ add_go_lib("${REPOSITORY_ROOT}/go/adbc/pkg/bigquery/" utils.c BUILD_TAGS driverlib + CMAKE_PACKAGE_NAME + AdbcDriverBigQuery PKG_CONFIG_NAME adbc-driver-bigquery SHARED_LINK_FLAGS ${LDFLAGS} + DEFINES + ${ADBC_TARGET_COMPILE_DEFINITIONS} OUTPUTS ADBC_LIBRARIES) foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_include_directories(${LIB_TARGET} SYSTEM INTERFACE ${REPOSITORY_ROOT} ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) endforeach() @@ -57,13 +60,10 @@ if(ADBC_BUILD_TESTS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-bigquery-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-bigquery-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ ${REPOSITORY_ROOT}/c/driver ${REPOSITORY_ROOT}/c/driver/common) adbc_configure_target(adbc-driver-bigquery-test) diff --git a/c/driver/bigquery/README.md b/c/driver/bigquery/README.md index 6784787367..e41090d8ad 100644 --- a/c/driver/bigquery/README.md +++ b/c/driver/bigquery/README.md @@ -17,14 +17,23 @@ under the License. --> -# ADBC Snowflake Driver +# ADBC BigQuery Driver + +![Vendor: Google BigQuery](https://img.shields.io/badge/vendor-Google%20BigQuery-blue?style=flat-square) +![Implementation: Go](https://img.shields.io/badge/language-Go-violet?style=flat-square) +![Status: Experimental](https://img.shields.io/badge/status-experimental-red?style=flat-square) + +[![conda-forge: adbc-driver-bigquery](https://img.shields.io/conda/vn/conda-forge/adbc-driver-bigquery?label=conda-forge%3A%20adbc-driver-bigquery&style=flat-square)](https://anaconda.org/conda-forge/adbc-driver-bigquery) +[![conda-forge: libadbc-driver-bigquery](https://img.shields.io/conda/vn/conda-forge/libadbc-driver-bigquery?label=conda-forge%3A%20libadbc-driver-bigquery&style=flat-square)](https://anaconda.org/conda-forge/libadbc-driver-bigquery) +[![PyPI: adbc-driver-bigquery](https://img.shields.io/pypi/v/adbc-driver-bigquery?style=flat-square)](https://pypi.org/project/adbc-driver-bigquery/) +[![R-multiverse: adbcbigquery](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fcommunity.r-multiverse.org%2Fapi%2Fpackages%2Fadbcbigquery&query=%24.Version&label=r-multiverse%3A%20adbcbigquery&style=flat-square)](https://community.r-multiverse.org/adbcbigquery/) This driver provides an interface to [BigQuery](https://cloud.google.com/bigquery) using ADBC. ## Building -See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details. +See [CONTRIBUTING.md](../../../CONTRIBUTING.md) for details. ## Testing diff --git a/c/driver/bigquery/bigquery_test.cc b/c/driver/bigquery/bigquery_test.cc index b80f36336a..e526b511b8 100644 --- a/c/driver/bigquery/bigquery_test.cc +++ b/c/driver/bigquery/bigquery_test.cc @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/c/driver/bigquery/meson.build b/c/driver/bigquery/meson.build index 7eee6bf784..1bad9fa472 100644 --- a/c/driver/bigquery/meson.build +++ b/c/driver/bigquery/meson.build @@ -19,21 +19,21 @@ golang = find_program('go') if build_machine.system() == 'windows' - prefix = '' - suffix = '.lib' + prefix = '' + suffix = '.lib' elif build_machine.system() == 'darwin' - prefix = 'lib' - suffix = '.dylib' + prefix = 'lib' + suffix = '.dylib' else - prefix = 'lib' - suffix = '.so' + prefix = 'lib' + suffix = '.so' endif adbc_driver_bigquery_name = prefix + 'adbc_driver_bigquery' + suffix adbc_driver_bigquery_lib = custom_target( 'adbc_driver_bigquery', output: adbc_driver_bigquery_name, - command : [ + command: [ golang, 'build', '-C', @@ -43,8 +43,8 @@ adbc_driver_bigquery_lib = custom_target( '-o', meson.current_build_dir() + '/' + adbc_driver_bigquery_name, ], - install : true, - install_dir : '.', + install: true, + install_dir: '.', ) pkg.generate( @@ -55,16 +55,16 @@ pkg.generate( filebase: 'adbc-driver-bigquery', ) -if get_option('tests') - exc = executable( - 'adbc-driver-bigquery-test', - 'bigquery_test.cc', - include_directories: [root_dir, driver_dir], - link_with: [ - adbc_common_lib, - adbc_driver_bigquery_lib - ], - dependencies: [adbc_validation_dep], - ) - test('adbc-driver-bigquery', exc) -endif +adbc_driver_bigquery_dep = declare_dependency( + include_directories: include_dir, + link_with: adbc_driver_bigquery_lib, +) + +exc = executable( + 'adbc-driver-bigquery-test', + 'bigquery_test.cc', + include_directories: [c_dir, driver_dir], + link_with: [adbc_common_lib, adbc_driver_bigquery_lib], + dependencies: [adbc_validation_dep], +) +test('adbc-driver-bigquery', exc) diff --git a/c/driver/common/AdbcDriverCommonConfig.cmake.in b/c/driver/common/AdbcDriverCommonConfig.cmake.in new file mode 100644 index 0000000000..165e0b6aaf --- /dev/null +++ b/c/driver/common/AdbcDriverCommonConfig.cmake.in @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +@PACKAGE_INIT@ + +set(ADBC_VERSION "@ADBC_VERSION@") + +include("${CMAKE_CURRENT_LIST_DIR}/AdbcDriverCommonTargets.cmake") + +check_required_components(AdbcDriverCommon) diff --git a/c/driver/common/CMakeLists.txt b/c/driver/common/CMakeLists.txt index 751eda3632..07b9a50ad6 100644 --- a/c/driver/common/CMakeLists.txt +++ b/c/driver/common/CMakeLists.txt @@ -18,8 +18,24 @@ add_library(adbc_driver_common STATIC utils.c) adbc_configure_target(adbc_driver_common) set_target_properties(adbc_driver_common PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(adbc_driver_common PRIVATE "${REPOSITORY_ROOT}/c/include" - "${REPOSITORY_ROOT}/c/vendor") +target_include_directories(adbc_driver_common PRIVATE "${REPOSITORY_ROOT}/c/include") +target_link_libraries(adbc_driver_common PUBLIC nanoarrow::nanoarrow) + +# For static builds, we need to install the static library here so downstream +# applications can link to it +if(ADBC_BUILD_STATIC) + if(ADBC_WITH_VENDORED_NANOARROW) + message(WARNING "adbc_driver_common is not installed when ADBC_WITH_VENDORED_NANOARROW is ON. To use the static libraries, for now you must provide nanoarrow instead of using the vendored copy" + ) + else() + install(TARGETS adbc_driver_common ${INSTALL_IS_OPTIONAL} + EXPORT adbc_driver_common_targets + RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + arrow_install_cmake_package(AdbcDriverCommon adbc_driver_common_targets) + endif() +endif() if(ADBC_BUILD_TESTS) add_test_case(driver_common_test @@ -30,11 +46,9 @@ if(ADBC_BUILD_TESTS) SOURCES utils_test.cc EXTRA_LINK_LIBS - adbc_driver_common - nanoarrow) + adbc_driver_common) target_compile_features(adbc-driver-common-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-common-test - PRIVATE "${REPOSITORY_ROOT}/c/include" - "${REPOSITORY_ROOT}/c/vendor") + PRIVATE "${REPOSITORY_ROOT}/c/include") adbc_configure_target(adbc-driver-common-test) endif() diff --git a/c/driver/common/meson.build b/c/driver/common/meson.build index b1423f0e58..eedfd499d1 100644 --- a/c/driver/common/meson.build +++ b/c/driver/common/meson.build @@ -23,13 +23,11 @@ adbc_common_lib = library( install: true, ) -if get_option('tests') - exc = executable( - 'adbc-driver-common-test', - 'utils_test.cc', - include_directories: [include_dir], - link_with: [adbc_common_lib], - dependencies: [nanoarrow_dep, gtest_main_dep, gmock_dep], - ) - test('adbc-driver-common', exc) -endif +exc = executable( + 'adbc-driver-common-test', + 'utils_test.cc', + include_directories: [include_dir], + link_with: [adbc_common_lib], + dependencies: [nanoarrow_dep, gtest_main_dep, gmock_dep], +) +test('adbc-driver-common', exc) diff --git a/c/driver/common/utils.c b/c/driver/common/utils.c index 00ebd51939..4dc8214ef5 100644 --- a/c/driver/common/utils.c +++ b/c/driver/common/utils.c @@ -27,7 +27,7 @@ static size_t kErrorBufferSize = 1024; -int AdbcStatusCodeToErrno(AdbcStatusCode code) { +int InternalAdbcStatusCodeToErrno(AdbcStatusCode code) { switch (code) { case ADBC_STATUS_OK: return 0; @@ -104,14 +104,15 @@ static void ReleaseError(struct AdbcError* error) { error->release = NULL; } -void SetError(struct AdbcError* error, const char* format, ...) { +void InternalAdbcSetError(struct AdbcError* error, const char* format, ...) { va_list args; va_start(args, format); - SetErrorVariadic(error, format, args); + InternalAdbcSetErrorVariadic(error, format, args); va_end(args); } -void SetErrorVariadic(struct AdbcError* error, const char* format, va_list args) { +void InternalAdbcSetErrorVariadic(struct AdbcError* error, const char* format, + va_list args) { if (!error) return; if (error->release) { // TODO: combine the errors if possible @@ -147,8 +148,8 @@ void SetErrorVariadic(struct AdbcError* error, const char* format, va_list args) vsnprintf(error->message, kErrorBufferSize, format, args); } -void AppendErrorDetail(struct AdbcError* error, const char* key, const uint8_t* detail, - size_t detail_length) { +void InternalAdbcAppendErrorDetail(struct AdbcError* error, const char* key, + const uint8_t* detail, size_t detail_length) { if (error->release != ReleaseErrorWithDetails) return; struct AdbcErrorDetails* details = (struct AdbcErrorDetails*)error->private_data; @@ -212,7 +213,7 @@ void AppendErrorDetail(struct AdbcError* error, const char* key, const uint8_t* details->count++; } -int CommonErrorGetDetailCount(const struct AdbcError* error) { +int InternalAdbcCommonErrorGetDetailCount(const struct AdbcError* error) { if (error->release != ReleaseErrorWithDetails) { return 0; } @@ -220,7 +221,8 @@ int CommonErrorGetDetailCount(const struct AdbcError* error) { return details->count; } -struct AdbcErrorDetail CommonErrorGetDetail(const struct AdbcError* error, int index) { +struct AdbcErrorDetail InternalAdbcCommonErrorGetDetail(const struct AdbcError* error, + int index) { if (error->release != ReleaseErrorWithDetails) { return (struct AdbcErrorDetail){NULL, NULL, 0}; } @@ -235,11 +237,12 @@ struct AdbcErrorDetail CommonErrorGetDetail(const struct AdbcError* error, int i }; } -bool IsCommonError(const struct AdbcError* error) { +bool InternalAdbcIsCommonError(const struct AdbcError* error) { return error->release == ReleaseErrorWithDetails || error->release == ReleaseError; } -int StringBuilderInit(struct StringBuilder* builder, size_t initial_size) { +int InternalAdbcStringBuilderInit(struct InternalAdbcStringBuilder* builder, + size_t initial_size) { builder->buffer = (char*)malloc(initial_size); if (builder->buffer == NULL) return errno; @@ -248,7 +251,8 @@ int StringBuilderInit(struct StringBuilder* builder, size_t initial_size) { return 0; } -int StringBuilderAppend(struct StringBuilder* builder, const char* fmt, ...) { +int InternalAdbcStringBuilderAppend(struct InternalAdbcStringBuilder* builder, + const char* fmt, ...) { va_list argptr; int bytes_available = (int)builder->capacity - (int)builder->size; @@ -278,16 +282,16 @@ int StringBuilderAppend(struct StringBuilder* builder, const char* fmt, ...) { return 0; } -void StringBuilderReset(struct StringBuilder* builder) { +void InternalAdbcStringBuilderReset(struct InternalAdbcStringBuilder* builder) { if (builder->buffer) { free(builder->buffer); } memset(builder, 0, sizeof(*builder)); } -AdbcStatusCode AdbcInitConnectionGetInfoSchema(struct ArrowSchema* schema, - struct ArrowArray* array, - struct AdbcError* error) { +AdbcStatusCode InternalAdbcInitConnectionGetInfoSchema(struct ArrowSchema* schema, + struct ArrowArray* array, + struct AdbcError* error) { // TODO: use C equivalent of UniqueSchema to avoid incomplete schema // on error ArrowSchemaInit(schema); @@ -351,10 +355,10 @@ AdbcStatusCode AdbcInitConnectionGetInfoSchema(struct ArrowSchema* schema, return ADBC_STATUS_OK; } // NOLINT(whitespace/indent) -AdbcStatusCode AdbcConnectionGetInfoAppendString(struct ArrowArray* array, - uint32_t info_code, - const char* info_value, - struct AdbcError* error) { +AdbcStatusCode InternalAdbcConnectionGetInfoAppendString(struct ArrowArray* array, + uint32_t info_code, + const char* info_value, + struct AdbcError* error) { CHECK_NA(INTERNAL, ArrowArrayAppendUInt(array->children[0], info_code), error); // Append to type variant struct ArrowStringView value = ArrowCharView(info_value); @@ -366,9 +370,10 @@ AdbcStatusCode AdbcConnectionGetInfoAppendString(struct ArrowArray* array, return ADBC_STATUS_OK; } -AdbcStatusCode AdbcConnectionGetInfoAppendInt(struct ArrowArray* array, - uint32_t info_code, int64_t info_value, - struct AdbcError* error) { +AdbcStatusCode InternalAdbcConnectionGetInfoAppendInt(struct ArrowArray* array, + uint32_t info_code, + int64_t info_value, + struct AdbcError* error) { CHECK_NA(INTERNAL, ArrowArrayAppendUInt(array->children[0], info_code), error); // Append to type variant CHECK_NA(INTERNAL, ArrowArrayAppendInt(array->children[1]->children[2], info_value), @@ -379,8 +384,8 @@ AdbcStatusCode AdbcConnectionGetInfoAppendInt(struct ArrowArray* array, return ADBC_STATUS_OK; } -AdbcStatusCode AdbcInitConnectionObjectsSchema(struct ArrowSchema* schema, - struct AdbcError* error) { +AdbcStatusCode InternalAdbcInitConnectionObjectsSchema(struct ArrowSchema* schema, + struct AdbcError* error) { ArrowSchemaInit(schema); CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, /*num_columns=*/2), error); CHECK_NA(INTERNAL, ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), @@ -563,7 +568,8 @@ AdbcStatusCode AdbcInitConnectionObjectsSchema(struct ArrowSchema* schema, return ADBC_STATUS_OK; } -struct AdbcGetObjectsData* AdbcGetObjectsDataInit(struct ArrowArrayView* array_view) { +struct AdbcGetObjectsData* InternalAdbcGetObjectsDataInit( + struct ArrowArrayView* array_view) { struct AdbcGetObjectsData* get_objects_data = (struct AdbcGetObjectsData*)calloc(1, sizeof(struct AdbcGetObjectsData)); if (get_objects_data == NULL) { @@ -895,11 +901,11 @@ struct AdbcGetObjectsData* AdbcGetObjectsDataInit(struct ArrowArrayView* array_v return get_objects_data; error_handler: - AdbcGetObjectsDataDelete(get_objects_data); + InternalAdbcGetObjectsDataDelete(get_objects_data); return NULL; } -void AdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data) { +void InternalAdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data) { for (int64_t catalog_index = 0; catalog_index < get_objects_data->n_catalogs; catalog_index++) { struct AdbcGetObjectsCatalog* catalog = get_objects_data->catalogs[catalog_index]; @@ -946,7 +952,7 @@ void AdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data) { free(get_objects_data); } -struct AdbcGetObjectsCatalog* AdbcGetObjectsDataGetCatalogByName( +struct AdbcGetObjectsCatalog* InternalAdbcGetObjectsDataGetCatalogByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name) { if (catalog_name != NULL) { for (int64_t i = 0; i < get_objects_data->n_catalogs; i++) { @@ -960,12 +966,12 @@ struct AdbcGetObjectsCatalog* AdbcGetObjectsDataGetCatalogByName( return NULL; } -struct AdbcGetObjectsSchema* AdbcGetObjectsDataGetSchemaByName( +struct AdbcGetObjectsSchema* InternalAdbcGetObjectsDataGetSchemaByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name) { if (schema_name != NULL) { struct AdbcGetObjectsCatalog* catalog = - AdbcGetObjectsDataGetCatalogByName(get_objects_data, catalog_name); + InternalAdbcGetObjectsDataGetCatalogByName(get_objects_data, catalog_name); if (catalog != NULL) { for (int64_t i = 0; i < catalog->n_db_schemas; i++) { struct AdbcGetObjectsSchema* schema = catalog->catalog_db_schemas[i]; @@ -979,12 +985,12 @@ struct AdbcGetObjectsSchema* AdbcGetObjectsDataGetSchemaByName( return NULL; } -struct AdbcGetObjectsTable* AdbcGetObjectsDataGetTableByName( +struct AdbcGetObjectsTable* InternalAdbcGetObjectsDataGetTableByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name, const char* const table_name) { if (table_name != NULL) { - struct AdbcGetObjectsSchema* schema = - AdbcGetObjectsDataGetSchemaByName(get_objects_data, catalog_name, schema_name); + struct AdbcGetObjectsSchema* schema = InternalAdbcGetObjectsDataGetSchemaByName( + get_objects_data, catalog_name, schema_name); if (schema != NULL) { for (int64_t i = 0; i < schema->n_db_schema_tables; i++) { struct AdbcGetObjectsTable* table = schema->db_schema_tables[i]; @@ -998,12 +1004,12 @@ struct AdbcGetObjectsTable* AdbcGetObjectsDataGetTableByName( return NULL; } -struct AdbcGetObjectsColumn* AdbcGetObjectsDataGetColumnByName( +struct AdbcGetObjectsColumn* InternalAdbcGetObjectsDataGetColumnByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name, const char* const table_name, const char* const column_name) { if (column_name != NULL) { - struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* table = InternalAdbcGetObjectsDataGetTableByName( get_objects_data, catalog_name, schema_name, table_name); if (table != NULL) { for (int64_t i = 0; i < table->n_table_columns; i++) { @@ -1018,12 +1024,12 @@ struct AdbcGetObjectsColumn* AdbcGetObjectsDataGetColumnByName( return NULL; } -struct AdbcGetObjectsConstraint* AdbcGetObjectsDataGetConstraintByName( +struct AdbcGetObjectsConstraint* InternalAdbcGetObjectsDataGetConstraintByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name, const char* const table_name, const char* const constraint_name) { if (constraint_name != NULL) { - struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* table = InternalAdbcGetObjectsDataGetTableByName( get_objects_data, catalog_name, schema_name, table_name); if (table != NULL) { for (int64_t i = 0; i < table->n_table_constraints; i++) { diff --git a/c/driver/common/utils.h b/c/driver/common/utils.h index d204821b2b..fa727d0505 100644 --- a/c/driver/common/utils.h +++ b/c/driver/common/utils.h @@ -29,7 +29,7 @@ extern "C" { #endif -int AdbcStatusCodeToErrno(AdbcStatusCode code); +int InternalAdbcStatusCodeToErrno(AdbcStatusCode code); // If using mingw's c99-compliant printf, we need a different format-checking attribute #if defined(__USE_MINGW_ANSI_STDIO) && defined(__MINGW_PRINTF_FORMAT) @@ -41,64 +41,69 @@ int AdbcStatusCodeToErrno(AdbcStatusCode code); #endif /// Set error message using a format string. -void SetError(struct AdbcError* error, const char* format, - ...) ADBC_CHECK_PRINTF_ATTRIBUTE; +void InternalAdbcSetError(struct AdbcError* error, const char* format, + ...) ADBC_CHECK_PRINTF_ATTRIBUTE; /// Set error message using a format string. -void SetErrorVariadic(struct AdbcError* error, const char* format, va_list args); +void InternalAdbcSetErrorVariadic(struct AdbcError* error, const char* format, + va_list args); /// Add an error detail. -void AppendErrorDetail(struct AdbcError* error, const char* key, const uint8_t* detail, - size_t detail_length); +void InternalAdbcAppendErrorDetail(struct AdbcError* error, const char* key, + const uint8_t* detail, size_t detail_length); -int CommonErrorGetDetailCount(const struct AdbcError* error); -struct AdbcErrorDetail CommonErrorGetDetail(const struct AdbcError* error, int index); -bool IsCommonError(const struct AdbcError* error); +int InternalAdbcCommonErrorGetDetailCount(const struct AdbcError* error); +struct AdbcErrorDetail InternalAdbcCommonErrorGetDetail(const struct AdbcError* error, + int index); +bool InternalAdbcIsCommonError(const struct AdbcError* error); -struct StringBuilder { +struct InternalAdbcStringBuilder { char* buffer; // Not including null terminator size_t size; size_t capacity; }; -int StringBuilderInit(struct StringBuilder* builder, size_t initial_size); +int InternalAdbcStringBuilderInit(struct InternalAdbcStringBuilder* builder, + size_t initial_size); -int ADBC_CHECK_PRINTF_ATTRIBUTE StringBuilderAppend(struct StringBuilder* builder, - const char* fmt, ...); -void StringBuilderReset(struct StringBuilder* builder); +int ADBC_CHECK_PRINTF_ATTRIBUTE InternalAdbcStringBuilderAppend( + struct InternalAdbcStringBuilder* builder, const char* fmt, ...); +void InternalAdbcStringBuilderReset(struct InternalAdbcStringBuilder* builder); #undef ADBC_CHECK_PRINTF_ATTRIBUTE /// Check an NanoArrow status code. -#define CHECK_NA(CODE, EXPR, ERROR) \ - do { \ - ArrowErrorCode arrow_error_code = (EXPR); \ - if (arrow_error_code != 0) { \ - SetError(ERROR, "%s failed: (%d) %s\nDetail: %s:%d", #EXPR, arrow_error_code, \ - strerror(arrow_error_code), __FILE__, __LINE__); \ - return ADBC_STATUS_##CODE; \ - } \ +#define CHECK_NA(CODE, EXPR, ERROR) \ + do { \ + ArrowErrorCode arrow_error_code = (EXPR); \ + if (arrow_error_code != 0) { \ + InternalAdbcSetError(ERROR, "%s failed: (%d) %s\nDetail: %s:%d", #EXPR, \ + arrow_error_code, strerror(arrow_error_code), __FILE__, \ + __LINE__); \ + return ADBC_STATUS_##CODE; \ + } \ } while (0) /// Check an NanoArrow status code. -#define CHECK_NA_DETAIL(CODE, EXPR, NA_ERROR, ERROR) \ - do { \ - ArrowErrorCode arrow_error_code = (EXPR); \ - if (arrow_error_code != 0) { \ - SetError(ERROR, "%s failed: (%d) %s: %s\nDetail: %s:%d", #EXPR, arrow_error_code, \ - strerror(arrow_error_code), (NA_ERROR)->message, __FILE__, __LINE__); \ - return ADBC_STATUS_##CODE; \ - } \ +#define CHECK_NA_DETAIL(CODE, EXPR, NA_ERROR, ERROR) \ + do { \ + ArrowErrorCode arrow_error_code = (EXPR); \ + if (arrow_error_code != 0) { \ + InternalAdbcSetError(ERROR, "%s failed: (%d) %s: %s\nDetail: %s:%d", #EXPR, \ + arrow_error_code, strerror(arrow_error_code), \ + (NA_ERROR)->message, __FILE__, __LINE__); \ + return ADBC_STATUS_##CODE; \ + } \ } while (0) /// Check a generic status. -#define RAISE(CODE, EXPR, ERRMSG, ERROR) \ - do { \ - if (!(EXPR)) { \ - SetError(ERROR, "%s failed: %s\nDetail: %s:%d", #EXPR, ERRMSG, __FILE__, \ - __LINE__); \ - return ADBC_STATUS_##CODE; \ - } \ +#define RAISE(CODE, EXPR, ERRMSG, ERROR) \ + do { \ + if (!(EXPR)) { \ + InternalAdbcSetError(ERROR, "%s failed: %s\nDetail: %s:%d", #EXPR, ERRMSG, \ + __FILE__, __LINE__); \ + return ADBC_STATUS_##CODE; \ + } \ } while (0) /// Check an NanoArrow status code. @@ -217,24 +222,25 @@ struct AdbcGetObjectsData { // does not copy any data from array // returns NULL on error -struct AdbcGetObjectsData* AdbcGetObjectsDataInit(struct ArrowArrayView* array_view); -void AdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data); +struct AdbcGetObjectsData* InternalAdbcGetObjectsDataInit( + struct ArrowArrayView* array_view); +void InternalAdbcGetObjectsDataDelete(struct AdbcGetObjectsData* get_objects_data); // returns NULL on error // for now all arguments are required -struct AdbcGetObjectsCatalog* AdbcGetObjectsDataGetCatalogByName( +struct AdbcGetObjectsCatalog* InternalAdbcGetObjectsDataGetCatalogByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name); -struct AdbcGetObjectsSchema* AdbcGetObjectsDataGetSchemaByName( +struct AdbcGetObjectsSchema* InternalAdbcGetObjectsDataGetSchemaByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name); -struct AdbcGetObjectsTable* AdbcGetObjectsDataGetTableByName( +struct AdbcGetObjectsTable* InternalAdbcGetObjectsDataGetTableByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name, const char* const table_name); -struct AdbcGetObjectsColumn* AdbcGetObjectsDataGetColumnByName( +struct AdbcGetObjectsColumn* InternalAdbcGetObjectsDataGetColumnByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name, const char* const table_name, const char* const column_name); -struct AdbcGetObjectsConstraint* AdbcGetObjectsDataGetConstraintByName( +struct AdbcGetObjectsConstraint* InternalAdbcGetObjectsDataGetConstraintByName( struct AdbcGetObjectsData* get_objects_data, const char* const catalog_name, const char* const schema_name, const char* const table_name, const char* const constraint_name); diff --git a/c/driver/common/utils_test.cc b/c/driver/common/utils_test.cc index 489af163fd..a772ef4ade 100644 --- a/c/driver/common/utils_test.cc +++ b/c/driver/common/utils_test.cc @@ -26,76 +26,76 @@ #include "utils.h" TEST(TestStringBuilder, TestBasic) { - struct StringBuilder str; + struct InternalAdbcStringBuilder str; int ret; - ret = StringBuilderInit(&str, /*initial_size=*/64); + ret = InternalAdbcStringBuilderInit(&str, /*initial_size=*/64); EXPECT_EQ(ret, 0); EXPECT_EQ(str.capacity, 64); - ret = StringBuilderAppend(&str, "%s", "BASIC TEST"); + ret = InternalAdbcStringBuilderAppend(&str, "%s", "BASIC TEST"); EXPECT_EQ(ret, 0); EXPECT_EQ(str.size, 10); EXPECT_STREQ(str.buffer, "BASIC TEST"); - StringBuilderReset(&str); + InternalAdbcStringBuilderReset(&str); } TEST(TestStringBuilder, TestBoundary) { - struct StringBuilder str; + struct InternalAdbcStringBuilder str; int ret; - ret = StringBuilderInit(&str, /*initial_size=*/10); + ret = InternalAdbcStringBuilderInit(&str, /*initial_size=*/10); EXPECT_EQ(ret, 0); EXPECT_EQ(str.capacity, 10); - ret = StringBuilderAppend(&str, "%s", "BASIC TEST"); + ret = InternalAdbcStringBuilderAppend(&str, "%s", "BASIC TEST"); EXPECT_EQ(ret, 0); // should resize to include \0 EXPECT_EQ(str.capacity, 11); EXPECT_EQ(str.size, 10); EXPECT_STREQ(str.buffer, "BASIC TEST"); - StringBuilderReset(&str); + InternalAdbcStringBuilderReset(&str); } TEST(TestStringBuilder, TestMultipleAppends) { - struct StringBuilder str; + struct InternalAdbcStringBuilder str; int ret; - ret = StringBuilderInit(&str, /*initial_size=*/2); + ret = InternalAdbcStringBuilderInit(&str, /*initial_size=*/2); EXPECT_EQ(ret, 0); EXPECT_EQ(str.capacity, 2); - ret = StringBuilderAppend(&str, "%s", "BASIC"); + ret = InternalAdbcStringBuilderAppend(&str, "%s", "BASIC"); EXPECT_EQ(ret, 0); EXPECT_EQ(str.capacity, 6); EXPECT_EQ(str.size, 5); EXPECT_STREQ(str.buffer, "BASIC"); - ret = StringBuilderAppend(&str, "%s", " TEST"); + ret = InternalAdbcStringBuilderAppend(&str, "%s", " TEST"); EXPECT_EQ(ret, 0); EXPECT_EQ(str.capacity, 11); EXPECT_EQ(str.size, 10); EXPECT_STREQ(str.buffer, "BASIC TEST"); - StringBuilderReset(&str); + InternalAdbcStringBuilderReset(&str); } TEST(ErrorDetails, Adbc100) { struct AdbcError error; std::memset(&error, 0, ADBC_ERROR_1_1_0_SIZE); - SetError(&error, "My message"); + InternalAdbcSetError(&error, "My message"); ASSERT_EQ(nullptr, error.private_data); ASSERT_EQ(nullptr, error.private_driver); { std::string detail = "detail"; - AppendErrorDetail(&error, "key", reinterpret_cast(detail.data()), - detail.size()); + InternalAdbcAppendErrorDetail( + &error, "key", reinterpret_cast(detail.data()), detail.size()); } - ASSERT_EQ(0, CommonErrorGetDetailCount(&error)); - struct AdbcErrorDetail detail = CommonErrorGetDetail(&error, 0); + ASSERT_EQ(0, InternalAdbcCommonErrorGetDetailCount(&error)); + struct AdbcErrorDetail detail = InternalAdbcCommonErrorGetDetail(&error, 0); ASSERT_EQ(nullptr, detail.key); ASSERT_EQ(nullptr, detail.value); ASSERT_EQ(0, detail.value_length); @@ -105,29 +105,29 @@ TEST(ErrorDetails, Adbc100) { TEST(ErrorDetails, Adbc110) { struct AdbcError error = ADBC_ERROR_INIT; - SetError(&error, "My message"); + InternalAdbcSetError(&error, "My message"); ASSERT_NE(nullptr, error.private_data); ASSERT_EQ(nullptr, error.private_driver); { std::string detail = "detail"; - AppendErrorDetail(&error, "key", reinterpret_cast(detail.data()), - detail.size()); + InternalAdbcAppendErrorDetail( + &error, "key", reinterpret_cast(detail.data()), detail.size()); } - ASSERT_EQ(1, CommonErrorGetDetailCount(&error)); - struct AdbcErrorDetail detail = CommonErrorGetDetail(&error, 0); + ASSERT_EQ(1, InternalAdbcCommonErrorGetDetailCount(&error)); + struct AdbcErrorDetail detail = InternalAdbcCommonErrorGetDetail(&error, 0); ASSERT_STREQ("key", detail.key); ASSERT_EQ("detail", std::string_view(reinterpret_cast(detail.value), detail.value_length)); - detail = CommonErrorGetDetail(&error, -1); + detail = InternalAdbcCommonErrorGetDetail(&error, -1); ASSERT_EQ(nullptr, detail.key); ASSERT_EQ(nullptr, detail.value); ASSERT_EQ(0, detail.value_length); - detail = CommonErrorGetDetail(&error, 2); + detail = InternalAdbcCommonErrorGetDetail(&error, 2); ASSERT_EQ(nullptr, detail.key); ASSERT_EQ(nullptr, detail.value); ASSERT_EQ(0, detail.value_length); @@ -139,7 +139,7 @@ TEST(ErrorDetails, Adbc110) { TEST(ErrorDetails, RoundTripValues) { struct AdbcError error = ADBC_ERROR_INIT; - SetError(&error, "My message"); + InternalAdbcSetError(&error, "My message"); struct Detail { std::string key; @@ -152,13 +152,13 @@ TEST(ErrorDetails, RoundTripValues) { }; for (const auto& detail : details) { - AppendErrorDetail(&error, detail.key.c_str(), detail.value.data(), - detail.value.size()); + InternalAdbcAppendErrorDetail(&error, detail.key.c_str(), detail.value.data(), + detail.value.size()); } - ASSERT_EQ(details.size(), CommonErrorGetDetailCount(&error)); + ASSERT_EQ(details.size(), InternalAdbcCommonErrorGetDetailCount(&error)); for (int i = 0; i < static_cast(details.size()); i++) { - struct AdbcErrorDetail detail = CommonErrorGetDetail(&error, i); + struct AdbcErrorDetail detail = InternalAdbcCommonErrorGetDetail(&error, i); ASSERT_EQ(details[i].key, detail.key); ASSERT_EQ(details[i].value.size(), detail.value_length); ASSERT_THAT(std::vector(detail.value, detail.value + detail.value_length), @@ -209,42 +209,45 @@ TEST(AdbcGetObjectsData, GetObjectsByName) { mock_table.table_constraints = constraints; mock_table.n_table_constraints = 2; - EXPECT_EQ(AdbcGetObjectsDataGetTableByName(&mock_data, "mock_catalog", "mock_schema", - "table"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetTableByName(&mock_data, "mock_catalog", + "mock_schema", "table"), &mock_table); - EXPECT_EQ(AdbcGetObjectsDataGetTableByName(&mock_data, "mock_catalog", "mock_schema", - "table_suffix"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetTableByName(&mock_data, "mock_catalog", + "mock_schema", "table_suffix"), &mock_table_suffix); - EXPECT_EQ(AdbcGetObjectsDataGetTableByName(&mock_data, "mock_catalog", "mock_schema", - "nonexistent"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetTableByName(&mock_data, "mock_catalog", + "mock_schema", "nonexistent"), nullptr); - EXPECT_EQ(AdbcGetObjectsDataGetCatalogByName(&mock_data, "mock_catalog"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetCatalogByName(&mock_data, "mock_catalog"), &mock_catalog); - EXPECT_EQ(AdbcGetObjectsDataGetCatalogByName(&mock_data, "nonexistent"), nullptr); + EXPECT_EQ(InternalAdbcGetObjectsDataGetCatalogByName(&mock_data, "nonexistent"), + nullptr); - EXPECT_EQ(AdbcGetObjectsDataGetSchemaByName(&mock_data, "mock_catalog", "mock_schema"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetSchemaByName(&mock_data, "mock_catalog", + "mock_schema"), &mock_schema); - EXPECT_EQ(AdbcGetObjectsDataGetSchemaByName(&mock_data, "mock_catalog", "nonexistent"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetSchemaByName(&mock_data, "mock_catalog", + "nonexistent"), nullptr); - EXPECT_EQ(AdbcGetObjectsDataGetColumnByName(&mock_data, "mock_catalog", "mock_schema", - "table", "column"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetColumnByName(&mock_data, "mock_catalog", + "mock_schema", "table", "column"), &mock_column); - EXPECT_EQ(AdbcGetObjectsDataGetColumnByName(&mock_data, "mock_catalog", "mock_schema", - "table", "column_suffix"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetColumnByName( + &mock_data, "mock_catalog", "mock_schema", "table", "column_suffix"), &mock_column_suffix); - EXPECT_EQ(AdbcGetObjectsDataGetColumnByName(&mock_data, "mock_catalog", "mock_schema", - "table", "nonexistent"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetColumnByName( + &mock_data, "mock_catalog", "mock_schema", "table", "nonexistent"), nullptr); - EXPECT_EQ(AdbcGetObjectsDataGetConstraintByName(&mock_data, "mock_catalog", - "mock_schema", "table", "constraint"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetConstraintByName( + &mock_data, "mock_catalog", "mock_schema", "table", "constraint"), &mock_constraint); - EXPECT_EQ(AdbcGetObjectsDataGetConstraintByName( + EXPECT_EQ(InternalAdbcGetObjectsDataGetConstraintByName( &mock_data, "mock_catalog", "mock_schema", "table", "constraint_suffix"), &mock_constraint_suffix); - EXPECT_EQ(AdbcGetObjectsDataGetConstraintByName(&mock_data, "mock_catalog", - "mock_schema", "table", "nonexistent"), + EXPECT_EQ(InternalAdbcGetObjectsDataGetConstraintByName( + &mock_data, "mock_catalog", "mock_schema", "table", "nonexistent"), nullptr); } diff --git a/c/driver/flightsql/AdbcDriverFlightSQLConfig.cmake.in b/c/driver/flightsql/AdbcDriverFlightSQLConfig.cmake.in new file mode 100644 index 0000000000..c414275852 --- /dev/null +++ b/c/driver/flightsql/AdbcDriverFlightSQLConfig.cmake.in @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +@PACKAGE_INIT@ + +set(ADBC_BUILD_SHARED @ADBC_BUILD_SHARED@) +set(ADBC_BUILD_STATIC @ADBC_BUILD_STATIC@) +set(ADBC_FULL_SO_VERSION "@ADBC_FULL_SO_VERSION@") +set(ADBC_INSTALL_LIBDIR "@CMAKE_INSTALL_LIBDIR@") +set(ADBC_SO_VERSION "@ADBC_SO_VERSION@") +set(ADBC_VERSION "@ADBC_VERSION@") + +@ADBC_GO_PACKAGE_INIT@ + +if(ADBC_BUILD_SHARED) + adbc_add_shared_library( + AdbcDriverFlightSQL::adbc_driver_flightsql_shared + adbc_driver_flightsql) +endif() + +if(ADBC_BUILD_STATIC) + adbc_add_static_library( + AdbcDriverFlightSQL::adbc_driver_flightsql_static + adbc_driver_flightsql) +endif() + +check_required_components(AdbcDriverFlightSQL) diff --git a/c/driver/flightsql/CMakeLists.txt b/c/driver/flightsql/CMakeLists.txt index 1101b82430..9f60cbf07a 100644 --- a/c/driver/flightsql/CMakeLists.txt +++ b/c/driver/flightsql/CMakeLists.txt @@ -26,10 +26,14 @@ add_go_lib("${REPOSITORY_ROOT}/go/adbc/pkg/flightsql/" utils.c BUILD_TAGS driverlib + CMAKE_PACKAGE_NAME + AdbcDriverFlightSQL PKG_CONFIG_NAME adbc-driver-flightsql SHARED_LINK_FLAGS ${LDFLAGS} + DEFINES + ${ADBC_TARGET_COMPILE_DEFINITIONS} OUTPUTS ADBC_LIBRARIES) @@ -37,7 +41,6 @@ foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_include_directories(${LIB_TARGET} SYSTEM INTERFACE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) endforeach() @@ -59,12 +62,10 @@ if(ADBC_BUILD_TESTS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-flightsql-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-flightsql-test SYSTEM PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-flightsql-test) endif() diff --git a/c/driver/flightsql/README.md b/c/driver/flightsql/README.md new file mode 100644 index 0000000000..ca2d90629e --- /dev/null +++ b/c/driver/flightsql/README.md @@ -0,0 +1,36 @@ + + +# ADBC Arrow Flight SQL Driver + +![Vendor: Apache Arrow Flight SQL](https://img.shields.io/badge/vendor-Arrow%20Flight%20SQL-blue?style=flat-square) +![Implementation: Go](https://img.shields.io/badge/language-Go-violet?style=flat-square) +![Status: Stable](https://img.shields.io/badge/status-stable-green?style=flat-square) + +[![conda-forge: adbc-driver-flightsql](https://img.shields.io/conda/vn/conda-forge/adbc-driver-flightsql?label=conda-forge%3A%20adbc-driver-flightsql&style=flat-square)](https://anaconda.org/conda-forge/adbc-driver-flightsql) +[![conda-forge: libadbc-driver-flightsql](https://img.shields.io/conda/vn/conda-forge/libadbc-driver-flightsql?label=conda-forge%3A%20libadbc-driver-flightsql&style=flat-square)](https://anaconda.org/conda-forge/libadbc-driver-flightsql) +[![PyPI: adbc-driver-flightsql](https://img.shields.io/pypi/v/adbc-driver-flightsql?style=flat-square)](https://pypi.org/project/adbc-driver-flightsql/) +[![R-multiverse: adbcflightsql](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fcommunity.r-multiverse.org%2Fapi%2Fpackages%2Fadbcflightsql&query=%24.Version&label=r-multiverse%3A%20adbcflightsql&style=flat-square)](https://community.r-multiverse.org/adbcflightsql/) + +This driver provides an interface to databases supporting +[Apache Arrow Flight SQL](https://arrow.apache.org/docs/format/FlightSql.html) using ADBC. + +## Building + +See [CONTRIBUTING.md](../../../CONTRIBUTING.md) for details. diff --git a/c/driver/flightsql/dremio_flightsql_test.cc b/c/driver/flightsql/dremio_flightsql_test.cc index f18344017b..38944837d1 100644 --- a/c/driver/flightsql/dremio_flightsql_test.cc +++ b/c/driver/flightsql/dremio_flightsql_test.cc @@ -15,12 +15,15 @@ // specific language governing permissions and limitations // under the License. +#include + #include #include #include #include #include #include + #include "validation/adbc_validation.h" #include "validation/adbc_validation_util.h" @@ -34,7 +37,7 @@ class DremioFlightSqlQuirks : public adbc_validation::DriverQuirks { const char* user_raw = std::getenv("ADBC_DREMIO_FLIGHTSQL_USER"); const char* pass_raw = std::getenv("ADBC_DREMIO_FLIGHTSQL_PASS"); if (!uri_raw || !user_raw || !pass_raw) { - SetError(error, "Missing required environment variables"); + InternalAdbcSetError(error, "Missing required environment variables"); return ADBC_STATUS_INVALID_ARGUMENT; } EXPECT_THAT(AdbcDatabaseSetOption(database, "uri", uri_raw, error), diff --git a/c/driver/flightsql/meson.build b/c/driver/flightsql/meson.build index cac24d5cf8..ce9e0f3839 100644 --- a/c/driver/flightsql/meson.build +++ b/c/driver/flightsql/meson.build @@ -19,21 +19,21 @@ golang = find_program('go') if build_machine.system() == 'windows' - prefix = '' - suffix = '.lib' + prefix = '' + suffix = '.lib' elif build_machine.system() == 'darwin' - prefix = 'lib' - suffix = '.dylib' + prefix = 'lib' + suffix = '.dylib' else - prefix = 'lib' - suffix = '.so' + prefix = 'lib' + suffix = '.so' endif adbc_driver_flightsql_name = prefix + 'adbc_driver_flightsql' + suffix adbc_driver_flightsql_lib = custom_target( 'adbc_driver_flightsql', output: adbc_driver_flightsql_name, - command : [ + command: [ golang, 'build', '-C', @@ -43,8 +43,8 @@ adbc_driver_flightsql_lib = custom_target( '-o', meson.current_build_dir() + '/' + adbc_driver_flightsql_name, ], - install : true, - install_dir : '.', + install: true, + install_dir: '.', ) pkg.generate( @@ -55,17 +55,17 @@ pkg.generate( filebase: 'adbc-driver-flightsql', ) -if get_option('tests') - exc = executable( - 'adbc-driver-flightsql-test', - 'dremio_flightsql_test.cc', - 'sqlite_flightsql_test.cc', - include_directories: [include_dir, c_dir, driver_dir], - link_with: [ - adbc_common_lib, - adbc_driver_flightsql_lib - ], - dependencies: [adbc_validation_dep], - ) - test('adbc-driver-flightsql', exc) -endif +adbc_driver_flightsql_dep = declare_dependency( + include_directories: include_dir, + link_with: adbc_driver_flightsql_lib, +) + +exc = executable( + 'adbc-driver-flightsql-test', + 'dremio_flightsql_test.cc', + 'sqlite_flightsql_test.cc', + include_directories: [include_dir, c_dir, driver_dir], + link_with: [adbc_common_lib, adbc_driver_flightsql_lib], + dependencies: [adbc_validation_dep], +) +test('adbc-driver-flightsql', exc) diff --git a/c/driver/flightsql/sqlite_flightsql_test.cc b/c/driver/flightsql/sqlite_flightsql_test.cc index 4797d58e77..02c41f9bc4 100644 --- a/c/driver/flightsql/sqlite_flightsql_test.cc +++ b/c/driver/flightsql/sqlite_flightsql_test.cc @@ -18,9 +18,12 @@ #include #include #include +#include #include +#include #include +#include #include #include #include @@ -33,10 +36,6 @@ using adbc_validation::IsOkErrno; using adbc_validation::IsOkStatus; -extern "C" { -AdbcStatusCode FlightSQLDriverInit(int, void*, struct AdbcError*); -} - #define CHECK_OK(EXPR) \ do { \ if (auto adbc_status = (EXPR); adbc_status != ADBC_STATUS_OK) { \ @@ -104,8 +103,7 @@ class SqliteFlightSqlQuirks : public adbc_validation::DriverQuirks { switch (info_code) { case ADBC_INFO_DRIVER_NAME: return "ADBC Flight SQL Driver - Go"; - case ADBC_INFO_DRIVER_VERSION: - return "(unknown or development build)"; + // Do not test ADBC_INFO_DRIVER_VERSION; it differs in different parts of CI case ADBC_INFO_DRIVER_ADBC_VERSION: return ADBC_VERSION_1_1_0; case ADBC_INFO_VENDOR_NAME: @@ -235,18 +233,20 @@ TEST_F(SqliteFlightSqlTest, TestGarbageInput) { ASSERT_THAT(AdbcDatabaseRelease(&database, &error), IsOkStatus(&error)); } +int Canary(const struct AdbcError*) { return 0; } + TEST_F(SqliteFlightSqlTest, AdbcDriverBackwardsCompatibility) { - // XXX: sketchy cast - auto* driver = static_cast(malloc(ADBC_DRIVER_1_0_0_SIZE)); - std::memset(driver, 0, ADBC_DRIVER_1_0_0_SIZE); + struct AdbcDriver driver; + std::memset(&driver, 0, ADBC_DRIVER_1_1_0_SIZE); + driver.ErrorGetDetailCount = Canary; - ASSERT_THAT(::FlightSQLDriverInit(ADBC_VERSION_1_0_0, driver, &error), + ASSERT_THAT(::AdbcDriverFlightsqlInit(ADBC_VERSION_1_0_0, &driver, &error), IsOkStatus(&error)); - ASSERT_THAT(::FlightSQLDriverInit(424242, driver, &error), - adbc_validation::IsStatus(ADBC_STATUS_NOT_IMPLEMENTED, &error)); + ASSERT_EQ(Canary, driver.ErrorGetDetailCount); - free(driver); + ASSERT_THAT(::AdbcDriverFlightsqlInit(424242, &driver, &error), + adbc_validation::IsStatus(ADBC_STATUS_NOT_IMPLEMENTED, &error)); } class SqliteFlightSqlConnectionTest : public ::testing::Test, diff --git a/c/driver/framework/AdbcDriverFrameworkConfig.cmake.in b/c/driver/framework/AdbcDriverFrameworkConfig.cmake.in new file mode 100644 index 0000000000..09a2ea3096 --- /dev/null +++ b/c/driver/framework/AdbcDriverFrameworkConfig.cmake.in @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +@PACKAGE_INIT@ + +set(ADBC_VERSION "@ADBC_VERSION@") + +include("${CMAKE_CURRENT_LIST_DIR}/AdbcDriverFrameworkTargets.cmake") + +check_required_components(AdbcDriverFramework) diff --git a/c/driver/framework/CMakeLists.txt b/c/driver/framework/CMakeLists.txt index f5c642b532..464f3640cf 100644 --- a/c/driver/framework/CMakeLists.txt +++ b/c/driver/framework/CMakeLists.txt @@ -20,11 +20,26 @@ include(FetchContent) add_library(adbc_driver_framework STATIC objects.cc utility.cc) adbc_configure_target(adbc_driver_framework) set_target_properties(adbc_driver_framework PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(adbc_driver_framework - PRIVATE "${REPOSITORY_ROOT}/c/" "${REPOSITORY_ROOT}/c/include" - "${REPOSITORY_ROOT}/c/vendor") +target_include_directories(adbc_driver_framework PRIVATE "${REPOSITORY_ROOT}/c/" + "${REPOSITORY_ROOT}/c/include") target_link_libraries(adbc_driver_framework PUBLIC adbc_driver_common fmt::fmt) +# For static builds, we need to install the static library here so downstream +# applications can link to it +if(ADBC_BUILD_STATIC) + if(ADBC_WITH_VENDORED_FMT OR ADBC_WITH_VENDORED_NANOARROW) + message(WARNING "adbc_driver_framework is not installed when ADBC_WITH_VENDORED_FMT or ADBC_WITH_VENDORED_NANOARROW are ON. To use the static libraries, for now you must provide these dependencies instead of using the vendored copies" + ) + else() + install(TARGETS adbc_driver_framework ${INSTALL_IS_OPTIONAL} + EXPORT adbc_driver_framework_targets + RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + arrow_install_cmake_package(AdbcDriverFramework adbc_driver_framework_targets) + endif() +endif() + if(ADBC_BUILD_TESTS) add_test_case(driver_framework_test PREFIX @@ -34,8 +49,7 @@ if(ADBC_BUILD_TESTS) SOURCES base_driver_test.cc EXTRA_LINK_LIBS - adbc_driver_framework - nanoarrow) + adbc_driver_framework) target_compile_features(adbc-driver-framework-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-framework-test PRIVATE "${REPOSITORY_ROOT}/c/" diff --git a/c/driver/framework/base_driver.h b/c/driver/framework/base_driver.h index eecb506ee2..6120a66b2d 100644 --- a/c/driver/framework/base_driver.h +++ b/c/driver/framework/base_driver.h @@ -116,8 +116,9 @@ class Option { "': trailing data", value); } return parsed; + } else { + return status::InvalidArgument("Invalid integer value ", this->Format()); } - return status::InvalidArgument("Invalid integer value ", this->Format()); }, value_); } @@ -129,8 +130,9 @@ class Option { using T = std::decay_t; if constexpr (std::is_same_v) { return value; + } else { + return status::InvalidArgument("Invalid string value ", this->Format()); } - return status::InvalidArgument("Invalid string value ", this->Format()); }, value_); } @@ -363,14 +365,27 @@ template class Driver { public: static AdbcStatusCode Init(int version, void* raw_driver, AdbcError* error) { - if (version != ADBC_VERSION_1_0_0 && version != ADBC_VERSION_1_1_0) { - return ADBC_STATUS_NOT_IMPLEMENTED; + auto* driver = reinterpret_cast(raw_driver); + + switch (version) { + case ADBC_VERSION_1_2_0: + std::memset(driver, 0, ADBC_DRIVER_1_2_0_SIZE); + break; + case ADBC_VERSION_1_1_0: + std::memset(driver, 0, ADBC_DRIVER_1_1_0_SIZE); + break; + case ADBC_VERSION_1_0_0: + std::memset(driver, 0, ADBC_DRIVER_1_0_0_SIZE); + break; + default: + return ADBC_STATUS_NOT_IMPLEMENTED; } - auto* driver = reinterpret_cast(raw_driver); - if (version >= ADBC_VERSION_1_1_0) { - std::memset(driver, 0, ADBC_DRIVER_1_1_0_SIZE); + if (version >= ADBC_VERSION_1_2_0) { + driver->StatementNextResult = &CStatementNextResult; + } + if (version >= ADBC_VERSION_1_1_0) { driver->ErrorGetDetailCount = &CErrorGetDetailCount; driver->ErrorGetDetail = &CErrorGetDetail; @@ -402,8 +417,6 @@ class Driver { driver->StatementSetOptionBytes = &CSetOptionBytes; driver->StatementSetOptionInt = &CSetOptionInt; driver->StatementSetOptionDouble = &CSetOptionDouble; - } else { - std::memset(driver, 0, ADBC_DRIVER_1_0_0_SIZE); } driver->private_data = new Driver(); @@ -754,6 +767,18 @@ class Driver { return private_data->GetParameterSchema(schema, error); } + static AdbcStatusCode CStatementNextResult(AdbcStatement* statement, + ArrowSchema* schema, + ArrowArrayStream* stream, + AdbcPartitions* partitions + int64_t* rows_affected, + AdbcError* error) +{ + CHECK_INIT(statement, error); + auto private_data = reinterpret_cast(statement->private_data); + return private_data->NextResult(schema, stream, partitions, rows_affected, error); +} + static AdbcStatusCode CStatementPrepare(AdbcStatement* statement, AdbcError* error) { CHECK_INIT(statement, error); auto private_data = reinterpret_cast(statement->private_data); @@ -1131,7 +1156,13 @@ class BaseStatement : public ObjectBase { AdbcStatusCode ExecutePartitions(ArrowSchema* schema, AdbcPartitions* partitions, int64_t* rows_affected, AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; + RAISE_STATUS(error, impl().ExecutePartitionsImpl(schema, partitions, rows_affected)); + return ADBC_STATUS_OK; + } + + Status ExecutePartitionsImpl(ArrowSchema* schema, AdbcPartitions* partitions, + int64_t* rows_affected) { + return status::NotImplemented("ExecutePartitions"); } AdbcStatusCode Cancel(AdbcError* error) { @@ -1141,6 +1172,18 @@ class BaseStatement : public ObjectBase { Status Cancel() { return status::NotImplemented("Cancel"); } + AdbcStatusCode NextResult(ArrowSchema* schema, ArrowArrayStream* stream, + AdbcPartitions* partitions, int64_t* rows_affected, + AdbcError* error) { + RAISE_STATUS(error, impl().NextResultImpl(schema, stream, partitions, rows_affected)); + return ADBC_STATUS_OK; + } + + Status NextResultImpl(ArrowSchema* schema, ArrowArrayStream* stream, + AdbcPartitions* partitions, int64_t* rows_affected) { + return status::NotImplemented("NextResult"); + } + private: Derived& impl() { return static_cast(*this); } }; diff --git a/c/driver/framework/meson.build b/c/driver/framework/meson.build index 08be53eacb..9a59dfb744 100644 --- a/c/driver/framework/meson.build +++ b/c/driver/framework/meson.build @@ -17,10 +17,7 @@ adbc_framework_lib = library( 'adbc_driver_framework', - sources: [ - 'objects.cc', - 'utility.cc', - ], + sources: ['objects.cc', 'utility.cc'], include_directories: [include_dir, c_dir], link_with: [adbc_common_lib], dependencies: [nanoarrow_dep, fmt_dep], diff --git a/c/driver/framework/objects.cc b/c/driver/framework/objects.cc index 691f6e4145..31197c2d32 100644 --- a/c/driver/framework/objects.cc +++ b/c/driver/framework/objects.cc @@ -18,6 +18,7 @@ #include "driver/framework/objects.h" #include +#include #include "nanoarrow/nanoarrow.hpp" diff --git a/c/driver/framework/statement.h b/c/driver/framework/statement.h index c07324849c..e7fe342b11 100644 --- a/c/driver/framework/statement.h +++ b/c/driver/framework/statement.h @@ -191,6 +191,12 @@ class Statement : public BaseStatement { return ObjectBase::Init(parent, error); } + AdbcStatusCode NextResult(ArrowSchema* schema, ArrowArrayStream* stream, + AdbcPartitions* partitions, int64_t* rows_affected, + AdbcError*) { + return ADBC_STATUS_NOT_IMPLEMENTED; + } + AdbcStatusCode Prepare(AdbcError* error) { RAISE_STATUS(error, std::visit( [&](auto&& state) -> Status { diff --git a/c/driver/framework/utility.cc b/c/driver/framework/utility.cc index cbcd8bb54b..d281776e59 100644 --- a/c/driver/framework/utility.cc +++ b/c/driver/framework/utility.cc @@ -163,7 +163,6 @@ Status MakeGetInfoStream(const std::vector& infos, ArrowArrayStream* } else { static_assert(!sizeof(T), "info value type not implemented"); } - return status::Ok(); }, info.value)); UNWRAP_ERRNO(Internal, ArrowArrayFinishElement(array.get())); diff --git a/c/driver/postgresql/AdbcDriverPostgreSQLConfig.cmake.in b/c/driver/postgresql/AdbcDriverPostgreSQLConfig.cmake.in index c879d18e6b..30a22c3a7a 100644 --- a/c/driver/postgresql/AdbcDriverPostgreSQLConfig.cmake.in +++ b/c/driver/postgresql/AdbcDriverPostgreSQLConfig.cmake.in @@ -17,8 +17,12 @@ @PACKAGE_INIT@ +include(CMakeFindDependencyMacro) + set(ADBC_VERSION "@ADBC_VERSION@") include("${CMAKE_CURRENT_LIST_DIR}/AdbcDriverPostgreSQLTargets.cmake") +find_dependency(PostgreSQL) + check_required_components(AdbcDriverPostgreSQL) diff --git a/c/driver/postgresql/CMakeLists.txt b/c/driver/postgresql/CMakeLists.txt index a720696c6a..e044a20d14 100644 --- a/c/driver/postgresql/CMakeLists.txt +++ b/c/driver/postgresql/CMakeLists.txt @@ -46,23 +46,22 @@ add_arrow_lib(adbc_driver_postgresql SHARED_LINK_LIBS adbc_driver_common adbc_driver_framework - nanoarrow ${LIBPQ_LINK_LIBRARIES} STATIC_LINK_LIBS ${LIBPQ_LINK_LIBRARIES} adbc_driver_common adbc_driver_framework - nanoarrow ${LIBPQ_STATIC_LIBRARIES}) foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_EXPORTING) target_include_directories(${LIB_TARGET} SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${LIBPQ_INCLUDE_DIRS} - ${REPOSITORY_ROOT}/c/vendor - ${REPOSITORY_ROOT}/c/driver) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${LIBPQ_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) + + if(NOT ADBC_DEFINE_COMMON_ENTRYPOINTS) + target_compile_definitions(${LIB_TARGET} PRIVATE ${ADBC_TARGET_COMPILE_DEFINITIONS}) + endif() endforeach() if(ADBC_TEST_LINKAGE STREQUAL "shared") @@ -83,15 +82,11 @@ if(ADBC_BUILD_TESTS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-postgresql-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-postgresql-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${LIBPQ_INCLUDE_DIRS} - ${REPOSITORY_ROOT}/c/vendor - ${REPOSITORY_ROOT}/c/driver) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${LIBPQ_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-postgresql-test) add_test_case(driver_postgresql_copy_test @@ -105,15 +100,11 @@ if(ADBC_BUILD_TESTS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-postgresql-copy-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-postgresql-copy-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${LIBPQ_INCLUDE_DIRS} - ${REPOSITORY_ROOT}/c/vendor - ${REPOSITORY_ROOT}/c/driver) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${LIBPQ_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-postgresql-copy-test) endif() @@ -124,12 +115,10 @@ if(ADBC_BUILD_BENCHMARKS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS} benchmark::benchmark) # add_benchmark replaces _ with - when creating target target_include_directories(postgresql-benchmark PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) endif() diff --git a/c/driver/postgresql/README.md b/c/driver/postgresql/README.md index 8ccffb6845..b83ce2de70 100644 --- a/c/driver/postgresql/README.md +++ b/c/driver/postgresql/README.md @@ -19,8 +19,17 @@ # ADBC PostgreSQL Driver +![Vendor: PostgreSQL](https://img.shields.io/badge/vendor-PostgreSQL-blue?style=flat-square) +![Implementation: C/C++](https://img.shields.io/badge/implementation-C%2FC%2B%2B-violet?style=flat-square) +![Status: Stable](https://img.shields.io/badge/status-stable-green?style=flat-square) + +[![conda-forge: adbc-driver-postgresql](https://img.shields.io/conda/vn/conda-forge/adbc-driver-postgresql?label=conda-forge%3A%20adbc-driver-postgresql&style=flat-square)](https://anaconda.org/conda-forge/adbc-driver-postgresql) +[![conda-forge: libadbc-driver-postgresql](https://img.shields.io/conda/vn/conda-forge/libadbc-driver-postgresql?label=conda-forge%3A%20libadbc-driver-postgresql&style=flat-square)](https://anaconda.org/conda-forge/libadbc-driver-postgresql) +[![CRAN: adbcpostgresql](https://img.shields.io/cran/v/adbcpostgresql?style=flat-square)](https://cran.r-project.org/web/packages/adbcpostgresql/index.html) +[![PyPI: adbc-driver-postgresql](https://img.shields.io/pypi/v/adbc-driver-postgresql?style=flat-square)](https://pypi.org/project/adbc-driver-postgresql/) + This implements an ADBC driver that wraps [libpq][libpq], the client -library for PostgreSQL. This is still a work in progress. +library for PostgreSQL. This project owes credit to 0x0L's [pgeon][pgeon] for the overall approach. @@ -36,7 +45,7 @@ Dependencies: libpq itself. This can be installed with your favorite package manager; however, you may need to set the `PKG_CONFIG_PATH` environment variable such that `pkg-config` can find libpq. -See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details. +See [CONTRIBUTING.md](../../../CONTRIBUTING.md) for details. ## Testing diff --git a/c/driver/postgresql/bind_stream.h b/c/driver/postgresql/bind_stream.h index df0b9d2ca5..25c55eec7e 100644 --- a/c/driver/postgresql/bind_stream.h +++ b/c/driver/postgresql/bind_stream.h @@ -56,6 +56,7 @@ struct BindStream { Handle param_buffer; bool has_tz_field = false; + bool autocommit = false; std::string tz_setting; struct ArrowError na_error; @@ -119,6 +120,7 @@ struct BindStream { if (!has_tz_field && type.type_id() == PostgresTypeId::kTimestamptz) { UNWRAP_STATUS(SetDatabaseTimezoneUTC(pg_conn, autocommit)); has_tz_field = true; + this->autocommit = autocommit; } std::unique_ptr writer; @@ -199,9 +201,11 @@ struct BindStream { int result_format) { param_buffer->size_bytes = 0; int64_t last_offset = 0; + std::vector is_null_param(array_view->n_children); for (int64_t col = 0; col < array_view->n_children; col++) { - if (!ArrowArrayViewIsNull(array_view->children[col], current_row)) { + is_null_param[col] = ArrowArrayViewIsNull(array_view->children[col], current_row); + if (!is_null_param[col]) { // Note that this Write() call currently writes the (int32_t) byte size of the // field in addition to the serialized value. UNWRAP_NANOARROW( @@ -213,7 +217,7 @@ struct BindStream { int64_t param_length = param_buffer->size_bytes - last_offset - sizeof(int32_t); if (param_length > (std::numeric_limits::max)()) { - return Status::Internal("Paramter ", col, "serialized to >2GB of binary"); + return Status::Internal("Parameter ", col, "serialized to >2GB of binary"); } param_lengths[col] = static_cast(param_length); @@ -223,7 +227,7 @@ struct BindStream { last_offset = 0; for (int64_t col = 0; col < array_view->n_children; col++) { last_offset += sizeof(int32_t); - if (param_lengths[col] == 0) { + if (is_null_param[col]) { param_values[col] = nullptr; } else { param_values[col] = reinterpret_cast(param_buffer->data) + last_offset; @@ -254,8 +258,14 @@ struct BindStream { PqResultHelper reset(pg_conn, "SET TIME ZONE '" + tz_setting + "'"); UNWRAP_STATUS(reset.Execute()); - PqResultHelper commit(pg_conn, "COMMIT"); - UNWRAP_STATUS(reset.Execute()); + if (autocommit) { + // SetDatabaseTimezoneUTC will start a transaction if autocommit is + // enabled (so the timezone setting will roll back if we error), so we + // need to commit here. Otherwise we should not commit and let the + // user commit. + PqResultHelper commit(pg_conn, "COMMIT"); + UNWRAP_STATUS(commit.Execute()); + } } return Status::Ok(); diff --git a/c/driver/postgresql/connection.cc b/c/driver/postgresql/connection.cc index b5f12ca73f..6ac879e3b6 100644 --- a/c/driver/postgresql/connection.cc +++ b/c/driver/postgresql/connection.cc @@ -32,6 +32,7 @@ #include #include +#include #include #include "database.h" @@ -47,6 +48,9 @@ using adbc::driver::Status; namespace adbcpq { namespace { +constexpr std::string_view kConnectionOptionTransactionStatus = + "adbc.postgresql.transaction_status"; + static const uint32_t kSupportedInfoCodes[] = { ADBC_INFO_VENDOR_NAME, ADBC_INFO_VENDOR_VERSION, ADBC_INFO_DRIVER_NAME, ADBC_INFO_DRIVER_VERSION, @@ -177,8 +181,8 @@ class PostgresGetObjectsHelper : public adbc::driver::GetObjectsHelper { some_constraints_(conn, ConstraintsQuery()) {} // Allow Redshift to execute this query without constraints - // TODO(paleolimbot): Investigate to see if we can simplify the constraits query so that - // it works on both! + // TODO(paleolimbot): Investigate to see if we can simplify the constraints query so + // that it works on both! void SetEnableConstraints(bool enable_constraints) { enable_constraints_ = enable_constraints; } @@ -300,7 +304,8 @@ class PostgresGetObjectsHelper : public adbc::driver::GetObjectsHelper { Column col; col.column_name = next_column_[0].value(); - UNWRAP_RESULT(col.ordinal_position, next_column_[1].ParseInteger()); + UNWRAP_RESULT(int64_t ordinal_position, next_column_[1].ParseInteger()); + col.ordinal_position = static_cast(ordinal_position); if (!next_column_[2].is_null) { col.remarks = next_column_[2].value(); } @@ -458,7 +463,7 @@ AdbcStatusCode PostgresConnection::Cancel(struct AdbcError* error) { // > The return value is 1 if the cancel request was successfully dispatched // > and 0 if not. if (PQcancel(cancel_, errbuf, sizeof(errbuf)) != 1) { - SetError(error, "[libpq] Failed to cancel operation: %s", errbuf); + InternalAdbcSetError(error, "[libpq] Failed to cancel operation: %s", errbuf); return ADBC_STATUS_UNKNOWN; } return ADBC_STATUS_OK; @@ -466,10 +471,18 @@ AdbcStatusCode PostgresConnection::Cancel(struct AdbcError* error) { AdbcStatusCode PostgresConnection::Commit(struct AdbcError* error) { if (autocommit_) { - SetError(error, "%s", "[libpq] Cannot commit when autocommit is enabled"); + InternalAdbcSetError(error, "%s", "[libpq] Cannot commit when autocommit is enabled"); return ADBC_STATUS_INVALID_STATE; } + PGTransactionStatusType txn_status = PQtransactionStatus(conn_); + if (txn_status == PQTRANS_IDLE) { + // https://github.com/apache/arrow-adbc/issues/2673: don't rollback if the + // transaction is idle, since it won't have any effect and PostgreSQL will + // issue a warning on the server side + return ADBC_STATUS_OK; + } + PGresult* result = PQexec(conn_, "COMMIT; BEGIN TRANSACTION"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { AdbcStatusCode code = SetError(error, result, "%s%s", @@ -513,7 +526,8 @@ AdbcStatusCode PostgresConnection::GetInfo(struct AdbcConnection* connection, RAISE_STATUS(error, result_helper.Execute()); auto it = result_helper.begin(); if (it == result_helper.end()) { - SetError(error, "[libpq] PostgreSQL returned no rows for '%s'", stmt); + InternalAdbcSetError(error, "[libpq] PostgreSQL returned no rows for '%s'", + stmt); return ADBC_STATUS_INTERNAL; } const char* server_version_num = (*it)[0].data; @@ -607,13 +621,32 @@ AdbcStatusCode PostgresConnection::GetOption(const char* option, char* value, RAISE_STATUS(error, result_helper.Execute()); auto it = result_helper.begin(); if (it == result_helper.end()) { - SetError(error, - "[libpq] PostgreSQL returned no rows for 'SELECT CURRENT_SCHEMA()'"); + InternalAdbcSetError( + error, "[libpq] PostgreSQL returned no rows for 'SELECT CURRENT_SCHEMA()'"); return ADBC_STATUS_INTERNAL; } output = (*it)[0].data; } else if (std::strcmp(option, ADBC_CONNECTION_OPTION_AUTOCOMMIT) == 0) { output = autocommit_ ? ADBC_OPTION_VALUE_ENABLED : ADBC_OPTION_VALUE_DISABLED; + } else if (std::strcmp(option, kConnectionOptionTransactionStatus.data()) == 0) { + switch (PQtransactionStatus(conn_)) { + case PQTRANS_IDLE: + output = "idle"; + break; + case PQTRANS_ACTIVE: + output = "active"; + break; + case PQTRANS_INTRANS: + output = "intrans"; + break; + case PQTRANS_INERROR: + output = "inerror"; + break; + case PQTRANS_UNKNOWN: + default: + output = "unknown"; + break; + } } else { return ADBC_STATUS_NOT_FOUND; } @@ -781,7 +814,8 @@ AdbcStatusCode PostgresConnectionGetStatisticsImpl(PGconn* conn, const char* db_ for (PqResultRow row : result_helper) { auto reltuples = row[5].ParseDouble(); if (!reltuples) { - SetError(error, "[libpq] Invalid double value in reltuples: '%s'", row[5].data); + InternalAdbcSetError(error, "[libpq] Invalid double value in reltuples: '%s'", + row[5].data); return ADBC_STATUS_INTERNAL; } @@ -805,7 +839,8 @@ AdbcStatusCode PostgresConnectionGetStatisticsImpl(PGconn* conn, const char* db_ auto null_frac = row[2].ParseDouble(); if (!null_frac) { - SetError(error, "[libpq] Invalid double value in null_frac: '%s'", row[2].data); + InternalAdbcSetError(error, "[libpq] Invalid double value in null_frac: '%s'", + row[2].data); return ADBC_STATUS_INTERNAL; } @@ -830,7 +865,8 @@ AdbcStatusCode PostgresConnectionGetStatisticsImpl(PGconn* conn, const char* db_ auto average_byte_width = row[3].ParseDouble(); if (!average_byte_width) { - SetError(error, "[libpq] Invalid double value in avg_width: '%s'", row[3].data); + InternalAdbcSetError(error, "[libpq] Invalid double value in avg_width: '%s'", + row[3].data); return ADBC_STATUS_INTERNAL; } @@ -856,7 +892,8 @@ AdbcStatusCode PostgresConnectionGetStatisticsImpl(PGconn* conn, const char* db_ auto n_distinct = row[4].ParseDouble(); if (!n_distinct) { - SetError(error, "[libpq] Invalid double value in avg_width: '%s'", row[4].data); + InternalAdbcSetError(error, "[libpq] Invalid double value in avg_width: '%s'", + row[4].data); return ADBC_STATUS_INTERNAL; } @@ -906,13 +943,14 @@ AdbcStatusCode PostgresConnection::GetStatistics(const char* catalog, struct AdbcError* error) { // Simplify our jobs here if (!approximate) { - SetError(error, "[libpq] Exact statistics are not implemented"); + InternalAdbcSetError(error, "[libpq] Exact statistics are not implemented"); return ADBC_STATUS_NOT_IMPLEMENTED; } else if (!db_schema) { - SetError(error, "[libpq] Must request statistics for a single schema"); + InternalAdbcSetError(error, "[libpq] Must request statistics for a single schema"); return ADBC_STATUS_NOT_IMPLEMENTED; } else if (catalog && std::strcmp(catalog, PQdb(conn_)) != 0) { - SetError(error, "[libpq] Can only request statistics for current catalog"); + InternalAdbcSetError(error, + "[libpq] Can only request statistics for current catalog"); return ADBC_STATUS_NOT_IMPLEMENTED; } @@ -1028,8 +1066,9 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog, PostgresType pg_type; if (type_resolver_->FindWithDefault(pg_oid, &pg_type) != NANOARROW_OK) { - SetError(error, "%s%d%s%s%s%" PRIu32, "Error resolving type code for column #", - row_counter + 1, " (\"", colname, "\") with oid ", pg_oid); + InternalAdbcSetError(error, "%s%d%s%s%s%" PRIu32, + "Error resolving type code for column #", row_counter + 1, + " (\"", colname, "\") with oid ", pg_oid); final_status = ADBC_STATUS_NOT_IMPLEMENTED; break; } @@ -1060,7 +1099,7 @@ AdbcStatusCode PostgresConnection::GetTableTypes(struct AdbcConnection* connecti AdbcStatusCode PostgresConnection::Init(struct AdbcDatabase* database, struct AdbcError* error) { if (!database || !database->private_data) { - SetError(error, "[libpq] Must provide an initialized AdbcDatabase"); + InternalAdbcSetError(error, "[libpq] Must provide an initialized AdbcDatabase"); return ADBC_STATUS_INVALID_ARGUMENT; } database_ = @@ -1071,12 +1110,17 @@ AdbcStatusCode PostgresConnection::Init(struct AdbcDatabase* database, cancel_ = PQgetCancel(conn_); if (!cancel_) { - SetError(error, "[libpq] Could not initialize PGcancel"); + InternalAdbcSetError(error, "[libpq] Could not initialize PGcancel"); return ADBC_STATUS_UNKNOWN; } std::ignore = PQsetNoticeProcessor(conn_, SilentNoticeProcessor, nullptr); + for (const auto& [key, value] : post_init_options_) { + RAISE_ADBC(SetOption(key.data(), value.data(), error)); + } + post_init_options_.clear(); + return ADBC_STATUS_OK; } @@ -1093,13 +1137,23 @@ AdbcStatusCode PostgresConnection::Release(struct AdbcError* error) { AdbcStatusCode PostgresConnection::Rollback(struct AdbcError* error) { if (autocommit_) { - SetError(error, "%s", "[libpq] Cannot rollback when autocommit is enabled"); + InternalAdbcSetError(error, "%s", + "[libpq] Cannot rollback when autocommit is enabled"); return ADBC_STATUS_INVALID_STATE; } - PGresult* result = PQexec(conn_, "ROLLBACK"); + PGTransactionStatusType txn_status = PQtransactionStatus(conn_); + if (txn_status == PQTRANS_IDLE) { + // https://github.com/apache/arrow-adbc/issues/2673: don't rollback if the + // transaction is idle, since it won't have any effect and PostgreSQL will + // issue a warning on the server side + return ADBC_STATUS_OK; + } + + PGresult* result = PQexec(conn_, "ROLLBACK AND CHAIN"); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - SetError(error, "%s%s", "[libpq] Failed to rollback: ", PQerrorMessage(conn_)); + InternalAdbcSetError(error, "%s%s", + "[libpq] Failed to rollback: ", PQerrorMessage(conn_)); PQclear(result); return ADBC_STATUS_IO; } @@ -1116,17 +1170,23 @@ AdbcStatusCode PostgresConnection::SetOption(const char* key, const char* value, } else if (std::strcmp(value, ADBC_OPTION_VALUE_DISABLED) == 0) { autocommit = false; } else { - SetError(error, "%s%s%s%s", "[libpq] Invalid value for option ", key, ": ", value); + InternalAdbcSetError(error, "%s%s%s%s", "[libpq] Invalid value for option ", key, + ": ", value); return ADBC_STATUS_INVALID_ARGUMENT; } + if (!conn_) { + post_init_options_.emplace_back(key, value); + return ADBC_STATUS_OK; + } + if (autocommit != autocommit_) { const char* query = autocommit ? "COMMIT" : "BEGIN TRANSACTION"; PGresult* result = PQexec(conn_, query); if (PQresultStatus(result) != PGRES_COMMAND_OK) { - SetError(error, "%s%s", - "[libpq] Failed to update autocommit: ", PQerrorMessage(conn_)); + InternalAdbcSetError(error, "%s%s", "[libpq] Failed to update autocommit: ", + PQerrorMessage(conn_)); PQclear(result); return ADBC_STATUS_IO; } @@ -1135,35 +1195,45 @@ AdbcStatusCode PostgresConnection::SetOption(const char* key, const char* value, } return ADBC_STATUS_OK; } else if (std::strcmp(key, ADBC_CONNECTION_OPTION_CURRENT_DB_SCHEMA) == 0) { + if (!conn_) { + post_init_options_.emplace_back(key, value); + return ADBC_STATUS_OK; + } + // PostgreSQL doesn't accept a parameter here char* value_esc = PQescapeIdentifier(conn_, value, strlen(value)); - std::string query = std::string("SET search_path TO ") + value_esc; + if (!value_esc) { + InternalAdbcSetError(error, "[libpq] Could not escape identifier: %s", + PQerrorMessage(conn_)); + return ADBC_STATUS_INTERNAL; + } + std::string query = fmt::format("SET search_path TO {}", value_esc); PQfreemem(value_esc); PqResultHelper result_helper{conn_, query}; RAISE_STATUS(error, result_helper.Execute()); return ADBC_STATUS_OK; } - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresConnection::SetOptionBytes(const char* key, const uint8_t* value, size_t length, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresConnection::SetOptionDouble(const char* key, double value, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresConnection::SetOptionInt(const char* key, int64_t value, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } diff --git a/c/driver/postgresql/connection.h b/c/driver/postgresql/connection.h index 7683875b5f..02e0c4f1bc 100644 --- a/c/driver/postgresql/connection.h +++ b/c/driver/postgresql/connection.h @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #include @@ -83,5 +86,6 @@ class PostgresConnection { PGconn* conn_; PGcancel* cancel_; bool autocommit_; + std::vector> post_init_options_; }; } // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_reader_test.cc b/c/driver/postgresql/copy/postgres_copy_reader_test.cc index 7b9fe230f8..e5ba5fc6c2 100644 --- a/c/driver/postgresql/copy/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/copy/postgres_copy_reader_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include + #include #include @@ -795,6 +797,86 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadArray) { ASSERT_EQ(data_buffer[4], 123); } +TEST(PostgresCopyUtilsTest, PostgresCopyReadInt2vector) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyInt2vector; + data.size_bytes = sizeof(kTestPgCopyInt2vector); + + auto col_type = PostgresType(PostgresTypeId::kInt2vector); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("empty", col_type); + input_type.AppendChild("len1", col_type); + input_type.AppendChild("len2", col_type); + input_type.AppendChild("len4", col_type); + + PostgresCopyStreamTester tester; + ArrowError error; + ASSERT_EQ(tester.Init(input_type, &error), NANOARROW_OK) << error.message; + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyInt2vector, sizeof(kTestPgCopyInt2vector)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 1); + ASSERT_EQ(array->n_children, 4); + + for (int col = 0; col < 4; col++) { + ASSERT_EQ(array->children[col]->n_children, 1); + } + + { + auto* child = array->children[0]; + ASSERT_EQ(child->children[0]->length, 0); + auto offsets = reinterpret_cast(child->buffers[1]); + auto data_buffer = reinterpret_cast(child->children[0]->buffers[1]); + ASSERT_NE(data_buffer, nullptr); + + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 0); + } + { + auto* child = array->children[1]; + ASSERT_EQ(child->children[0]->length, 1); + auto offsets = reinterpret_cast(child->buffers[1]); + auto data_buffer = reinterpret_cast(child->children[0]->buffers[1]); + ASSERT_NE(data_buffer, nullptr); + + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 1); + + EXPECT_EQ(data_buffer[0], -32768); + } + { + auto* child = array->children[2]; + ASSERT_EQ(child->children[0]->length, 2); + auto offsets = reinterpret_cast(child->buffers[1]); + auto data_buffer = reinterpret_cast(child->children[0]->buffers[1]); + ASSERT_NE(data_buffer, nullptr); + + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 2); + + EXPECT_EQ(data_buffer[0], -32768); + EXPECT_EQ(data_buffer[1], 32767); + } + { + auto* child = array->children[3]; + ASSERT_EQ(child->children[0]->length, 4); + auto offsets = reinterpret_cast(child->buffers[1]); + auto data_buffer = reinterpret_cast(child->children[0]->buffers[1]); + ASSERT_NE(data_buffer, nullptr); + + EXPECT_EQ(offsets[0], 0); + EXPECT_EQ(offsets[1], 4); + + EXPECT_EQ(data_buffer[0], -1); + EXPECT_EQ(data_buffer[1], 0); + EXPECT_EQ(data_buffer[2], 1); + EXPECT_EQ(data_buffer[3], 42); + } +} + TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) { ArrowBufferView data; data.data.as_uint8 = kTestPgCopyCustomRecord; diff --git a/c/driver/postgresql/copy/postgres_copy_test_common.h b/c/driver/postgresql/copy/postgres_copy_test_common.h index 8872ada6d0..61aa156f38 100644 --- a/c/driver/postgresql/copy/postgres_copy_test_common.h +++ b/c/driver/postgresql/copy/postgres_copy_test_common.h @@ -21,7 +21,7 @@ namespace adbcpq { -// New cases can be genereated using: +// New cases can be generated using: // psql --host 127.0.0.1 --port 5432 --username postgres -c "COPY (SELECT ...) TO STDOUT // WITH (FORMAT binary);" > test.copy Rscript -e "dput(brio::read_file_raw('test.copy'))" @@ -221,4 +221,25 @@ static const uint8_t kTestPgCopyNumeric16_10[] = { 0x7B, 0x11, 0xD7, 0x22, 0xC4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; +// COPY (SELECT +// CAST('' AS int2vector), +// CAST('-32768' AS int2vector), +// CAST('-32768 32767' AS int2vector), +// CAST('-1 0 1 42' AS int2vector) +// ) TO '/tmp/pgdata.bin' WITH (FORMAT binary); +static const uint8_t kTestPgCopyInt2vector[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x02, 0x7f, 0xff, 0x00, + 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, + 0xff, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x2a, 0xff, 0xff, +}; + } // namespace adbcpq diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc b/c/driver/postgresql/copy/postgres_copy_writer_test.cc index 5010848cf5..cd8cb30083 100644 --- a/c/driver/postgresql/copy/postgres_copy_writer_test.cc +++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc @@ -16,8 +16,11 @@ // under the License. #include +#include #include +#include #include +#include #include #include diff --git a/c/driver/postgresql/copy/reader.h b/c/driver/postgresql/copy/reader.h index 07f91d545e..d14f8752f4 100644 --- a/c/driver/postgresql/copy/reader.h +++ b/c/driver/postgresql/copy/reader.h @@ -543,10 +543,16 @@ class PostgresCopyArrayFieldReader : public PostgresCopyFieldReader { int32_t lower_bound; NANOARROW_RETURN_NOT_OK(ReadChecked(data, &lower_bound, error)); - if (lower_bound != 1) { - ArrowErrorSet(error, "Array value with lower bound != 1 is not supported"); + if (lower_bound != 0 && lower_bound != 1) { + ArrowErrorSet(error, + "Array value with lower bound not in {0, 1} is not supported"); return EINVAL; } + // In theory, for other lower bounds, we could insert NULLs + // appropriately. We could treat lower_bound == 1 as an array with a + // NULL at index 0 but since the default is 1, it makes more sense to + // treat it as a 1-indexed array. However, lower_bound == 0 is also + // possible (e.g. for int2vector). } for (int64_t i = 0; i < n_items; i++) { @@ -855,6 +861,19 @@ static inline ArrowErrorCode MakeCopyFieldReader( *out = std::move(array_reader); return NANOARROW_OK; } + case PostgresTypeId::kInt2vector: { + PostgresType int2type(PostgresTypeId::kInt2); + auto array_reader = std::make_unique(); + array_reader->Init(int2type.Array(0, "int2vector")); + + std::unique_ptr child_reader; + NANOARROW_RETURN_NOT_OK( + MakeCopyFieldReader(int2type, schema->children[0], &child_reader, error)); + array_reader->InitChild(std::move(child_reader)); + + *out = std::move(array_reader); + return NANOARROW_OK; + } default: return ErrorCantConvert(error, pg_type, schema_view); } @@ -864,7 +883,7 @@ static inline ArrowErrorCode MakeCopyFieldReader( case PostgresTypeId::kRecord: { if (pg_type.n_children() != schema->n_children) { ArrowErrorSet(error, - "Can't convert Postgres record type with %ld chlidren to Arrow " + "Can't convert Postgres record type with %ld children to Arrow " "struct type with %ld children", static_cast(pg_type.n_children()), // NOLINT(runtime/int) static_cast(schema->n_children)); // NOLINT(runtime/int) diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index e88ed691cd..b352635a9f 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -498,9 +498,9 @@ class PostgresCopyListFieldWriter : public PostgresCopyFieldWriter { NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, lb, error)); } - ArrowBufferAppend(buffer, tmp->data, tmp->size_bytes); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(buffer, tmp->data, tmp->size_bytes)); - return ADBC_STATUS_OK; + return NANOARROW_OK; } private: @@ -726,7 +726,7 @@ static inline ArrowErrorCode MakeCopyFieldWriter( case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: { // For now our implementation only supports primitive children types - // See PostgresCopyListFieldWriter::Write for limtiations + // See PostgresCopyListFieldWriter::Write for limitations struct ArrowSchemaView child_schema_view; NANOARROW_RETURN_NOT_OK( ArrowSchemaViewInit(&child_schema_view, schema->children[0], error)); diff --git a/c/driver/postgresql/database.cc b/c/driver/postgresql/database.cc index cdbad7535f..cecf7a6eb0 100644 --- a/c/driver/postgresql/database.cc +++ b/c/driver/postgresql/database.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -74,8 +75,8 @@ AdbcStatusCode PostgresDatabase::Init(struct AdbcError* error) { AdbcStatusCode PostgresDatabase::Release(struct AdbcError* error) { if (open_connections_ != 0) { - SetError(error, "%s%" PRId32 "%s", "[libpq] Database released with ", - open_connections_, " open connections"); + InternalAdbcSetError(error, "%s%" PRId32 "%s", "[libpq] Database released with ", + open_connections_, " open connections"); return ADBC_STATUS_INVALID_STATE; } return ADBC_STATUS_OK; @@ -86,7 +87,7 @@ AdbcStatusCode PostgresDatabase::SetOption(const char* key, const char* value, if (strcmp(key, "uri") == 0) { uri_ = value; } else { - SetError(error, "%s%s", "[libpq] Unknown database option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown database option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } return ADBC_STATUS_OK; @@ -94,31 +95,33 @@ AdbcStatusCode PostgresDatabase::SetOption(const char* key, const char* value, AdbcStatusCode PostgresDatabase::SetOptionBytes(const char* key, const uint8_t* value, size_t length, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresDatabase::SetOptionDouble(const char* key, double value, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresDatabase::SetOptionInt(const char* key, int64_t value, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresDatabase::Connect(PGconn** conn, struct AdbcError* error) { if (uri_.empty()) { - SetError(error, "%s", - "[libpq] Must set database option 'uri' before creating a connection"); + InternalAdbcSetError( + error, "%s", + "[libpq] Must set database option 'uri' before creating a connection"); return ADBC_STATUS_INVALID_STATE; } *conn = PQconnectdb(uri_.c_str()); if (PQstatus(*conn) != CONNECTION_OK) { - SetError(error, "%s%s", "[libpq] Failed to connect: ", PQerrorMessage(*conn)); + InternalAdbcSetError(error, "%s%s", + "[libpq] Failed to connect: ", PQerrorMessage(*conn)); PQfinish(*conn); *conn = nullptr; return ADBC_STATUS_IO; @@ -131,7 +134,7 @@ AdbcStatusCode PostgresDatabase::Disconnect(PGconn** conn, struct AdbcError* err PQfinish(*conn); *conn = nullptr; if (--open_connections_ < 0) { - SetError(error, "%s", "[libpq] Open connection count underflowed"); + InternalAdbcSetError(error, "%s", "[libpq] Open connection count underflowed"); return ADBC_STATUS_INTERNAL; } return ADBC_STATUS_OK; @@ -294,7 +297,7 @@ static Status InsertPgAttributeResult( UNWRAP_RESULT(int64_t col_oid, item[2].ParseInteger()); if (type_oid != current_type_oid && !columns.empty()) { - resolver->InsertClass(current_type_oid, columns); + resolver->InsertClass(static_cast(current_type_oid), columns); columns.clear(); current_type_oid = type_oid; } @@ -347,12 +350,12 @@ static Status InsertPgTypeResult(const PqResultHelper& result, type_item.class_oid = static_cast(typrelid); type_item.base_oid = static_cast(typbasetype); - int result = resolver->Insert(type_item, nullptr); + int insert_result = resolver->Insert(type_item, nullptr); // If there's an array type and the insert succeeded, add that now too - if (result == NANOARROW_OK && typarray != 0) { + if (insert_result == NANOARROW_OK && typarray != 0) { std::string array_typname = "_" + std::string(typname); - type_item.oid = typarray; + type_item.oid = static_cast(typarray); type_item.typname = array_typname.c_str(); type_item.typreceive = "array_recv"; type_item.child_oid = static_cast(oid); diff --git a/c/driver/postgresql/database.h b/c/driver/postgresql/database.h index e0a00267e3..5adf3af9a6 100644 --- a/c/driver/postgresql/database.h +++ b/c/driver/postgresql/database.h @@ -87,10 +87,3 @@ class PostgresDatabase { std::array redshift_server_version_{}; }; } // namespace adbcpq - -extern "C" { -/// For applications that want to use the driver struct directly, this gives -/// them access to the Init routine. -ADBC_EXPORT -AdbcStatusCode PostgresqlDriverInit(int, void*, struct AdbcError*); -} diff --git a/c/driver/postgresql/error.cc b/c/driver/postgresql/error.cc index 173868baf5..d21edb68fe 100644 --- a/c/driver/postgresql/error.cc +++ b/c/driver/postgresql/error.cc @@ -18,7 +18,7 @@ #include "error.h" #include -#include +#include #include #include #include diff --git a/c/driver/postgresql/meson.build b/c/driver/postgresql/meson.build index ac075417f5..7dbd11dd34 100644 --- a/c/driver/postgresql/meson.build +++ b/c/driver/postgresql/meson.build @@ -41,35 +41,32 @@ pkg.generate( filebase: 'adbc-driver-postgresql', ) -if get_option('tests') - postgres_tests = { - 'driver-postgresql': { - 'src_name': 'driver_postgresql', - 'sources': [ - 'postgres_type_test.cc', - 'postgresql_test.cc', - ] - }, - 'driver-postgresql-copy': { - 'src_name': 'driver_postgresql_copy', - 'sources': [ - 'copy/postgres_copy_reader_test.cc', - 'copy/postgres_copy_writer_test.cc', - ] - }, - } +adbc_driver_postgresql_dep = declare_dependency( + include_directories: include_dir, + link_with: adbc_postgres_driver_lib, +) + +postgres_tests = { + 'driver-postgresql': { + 'src_name': 'driver_postgresql', + 'sources': ['postgres_type_test.cc', 'postgresql_test.cc'], + }, + 'driver-postgresql-copy': { + 'src_name': 'driver_postgresql_copy', + 'sources': [ + 'copy/postgres_copy_reader_test.cc', + 'copy/postgres_copy_writer_test.cc', + ], + }, +} - foreach name, conf : postgres_tests - exc = executable( - 'adbc-' + name + '-test', - sources: conf['sources'], - include_directories: [include_dir, driver_dir, c_dir], - link_with: [ - adbc_common_lib, - adbc_postgres_driver_lib, - ], - dependencies: [libpq_dep, adbc_validation_dep], - ) - test('adbc-' + name, exc) - endforeach -endif +foreach name, conf : postgres_tests + exc = executable( + 'adbc-' + name + '-test', + sources: conf['sources'], + include_directories: [include_dir, driver_dir, c_dir], + link_with: [adbc_common_lib, adbc_postgres_driver_lib], + dependencies: [libpq_dep, adbc_validation_dep], + ) + test('adbc-' + name, exc) +endforeach diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h index d2a5356293..e8935cc76b 100644 --- a/c/driver/postgresql/postgres_type.h +++ b/c/driver/postgresql/postgres_type.h @@ -211,7 +211,7 @@ class PostgresType { // initialize and set the appropriate number of children). Returns NANOARROW_OK // on success and perhaps ENOMEM if memory cannot be allocated. Types that // do not have a corresponding Arrow type are returned as Binary with field - // metadata ADBC:posgresql:typname. These types can be represented as their + // metadata ADBC:postgresql:typname. These types can be represented as their // binary COPY representation in the output. ArrowErrorCode SetSchema(ArrowSchema* schema, const std::string& vendor_name = "PostgreSQL") const { @@ -310,6 +310,14 @@ class PostgresType { NANOARROW_RETURN_NOT_OK(children_[0].SetSchema(schema->children[0], vendor_name)); break; + case PostgresTypeId::kInt2vector: + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); + // Postgres conceives of this as a single type, so no child is + // given. We need to allocate it ourselves. + NANOARROW_RETURN_NOT_OK( + ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_INT16)); + break; + case PostgresTypeId::kUserDefined: default: // For user-defined types or types we don't explicitly know how to deal with, we @@ -349,7 +357,7 @@ class PostgresType { !typname_.empty() ? typname_.c_str() : PostgresTypname(type_id_); nanoarrow::UniqueBuffer buffer; - ArrowMetadataBuilderInit(buffer.get(), nullptr); + NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(buffer.get(), nullptr)); // TODO(lidavidm): we have deprecated this in favor of arrow.opaque, // remove once we feel enough time has passed NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderAppend( @@ -569,6 +577,24 @@ inline ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resol ArrowSchemaView schema_view; NANOARROW_RETURN_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, error)); + if (schema_view.extension_name.data != nullptr && + std::string_view(schema_view.extension_name.data, + schema_view.extension_name.size_bytes) + .compare("arrow.json") == 0) { + switch (schema_view.type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_STRING_VIEW: + return resolver.Find(resolver.GetOID(PostgresTypeId::kJson), out, error); + default: + break; + } + ArrowErrorSet( + error, "Field '%s' is of type arrow.json but storage type is not a string type", + schema_view.schema->name); + return EINVAL; + } + switch (schema_view.type) { case NANOARROW_TYPE_BOOL: return resolver.Find(resolver.GetOID(PostgresTypeId::kBool), out, error); @@ -607,10 +633,10 @@ inline ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resol case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: return resolver.Find(resolver.GetOID(PostgresTypeId::kInterval), out, error); case NANOARROW_TYPE_TIMESTAMP: - if (strcmp("", schema_view.timezone) == 0) { - return resolver.Find(resolver.GetOID(PostgresTypeId::kTimestamptz), out, error); - } else { + if (std::string_view(schema_view.timezone).empty()) { return resolver.Find(resolver.GetOID(PostgresTypeId::kTimestamp), out, error); + } else { + return resolver.Find(resolver.GetOID(PostgresTypeId::kTimestamptz), out, error); } case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: diff --git a/c/driver/postgresql/postgres_type_test.cc b/c/driver/postgresql/postgres_type_test.cc index 2c76f4c1f4..bd2fdd6385 100644 --- a/c/driver/postgresql/postgres_type_test.cc +++ b/c/driver/postgresql/postgres_type_test.cc @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. +#include #include +#include #include #include @@ -301,6 +303,24 @@ TEST(PostgresTypeTest, PostgresTypeFromSchema) { EXPECT_EQ(type.type_id(), PostgresTypeId::kText); schema.reset(); + ArrowSchemaInit(schema.get()); + ASSERT_EQ(ArrowSchemaSetTypeDateTime(schema.get(), NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TIME_UNIT_MICRO, ""), + NANOARROW_OK); + EXPECT_EQ(PostgresType::FromSchema(resolver, schema.get(), &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::kTimestamp); + schema.reset(); + + ArrowSchemaInit(schema.get()); + ASSERT_EQ(ArrowSchemaSetTypeDateTime(schema.get(), NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TIME_UNIT_MICRO, "America/Phoenix"), + NANOARROW_OK); + EXPECT_EQ(PostgresType::FromSchema(resolver, schema.get(), &type, nullptr), + NANOARROW_OK); + EXPECT_EQ(type.type_id(), PostgresTypeId::kTimestamptz); + schema.reset(); + ArrowSchemaInit(schema.get()); ASSERT_EQ(ArrowSchemaSetType(schema.get(), NANOARROW_TYPE_LIST), NANOARROW_OK); ASSERT_EQ(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_BOOL), NANOARROW_OK); @@ -439,4 +459,18 @@ TEST(PostgresTypeTest, PostgresTypeResolveRecord) { EXPECT_EQ(type.child(1).type_id(), PostgresTypeId::kText); } +TEST(PostgresTypeTest, PostgresTypeResolveInt2vector) { + MockTypeResolver resolver; + ASSERT_EQ(resolver.Init(), NANOARROW_OK); + + PostgresType type; + + const auto int2vector_oid = resolver.GetOID(PostgresTypeId::kInt2vector); + EXPECT_EQ(resolver.Find(int2vector_oid, &type, nullptr), NANOARROW_OK); + EXPECT_EQ(type.oid(), int2vector_oid); + EXPECT_EQ(type.typname(), "int2vector"); + EXPECT_EQ(type.type_id(), PostgresTypeId::kInt2vector); + EXPECT_EQ(0, type.n_children()); +} + } // namespace adbcpq diff --git a/c/driver/postgresql/postgresql.cc b/c/driver/postgresql/postgresql.cc index e43db98879..858c743789 100644 --- a/c/driver/postgresql/postgresql.cc +++ b/c/driver/postgresql/postgresql.cc @@ -60,8 +60,8 @@ const struct AdbcError* PostgresErrorFromArrayStream(struct ArrowArrayStream* st } int PostgresErrorGetDetailCount(const struct AdbcError* error) { - if (IsCommonError(error)) { - return CommonErrorGetDetailCount(error); + if (InternalAdbcIsCommonError(error)) { + return InternalAdbcCommonErrorGetDetailCount(error); } if (error->vendor_code != ADBC_ERROR_VENDOR_CODE_PRIVATE_DATA) { @@ -73,8 +73,8 @@ int PostgresErrorGetDetailCount(const struct AdbcError* error) { } struct AdbcErrorDetail PostgresErrorGetDetail(const struct AdbcError* error, int index) { - if (IsCommonError(error)) { - return CommonErrorGetDetail(error, index); + if (InternalAdbcIsCommonError(error)) { + return InternalAdbcCommonErrorGetDetail(error, index); } auto error_obj = reinterpret_cast(error->private_data); @@ -82,6 +82,7 @@ struct AdbcErrorDetail PostgresErrorGetDetail(const struct AdbcError* error, int } } // namespace +#if !defined(ADBC_NO_COMMON_ENTRYPOINTS) int AdbcErrorGetDetailCount(const struct AdbcError* error) { return PostgresErrorGetDetailCount(error); } @@ -94,6 +95,7 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream AdbcStatusCode* status) { return PostgresErrorFromArrayStream(stream, status); } +#endif // ADBC_NO_COMMON_ENTRYPOINTS // --------------------------------------------------------------------- // AdbcDatabase @@ -109,11 +111,11 @@ AdbcStatusCode PostgresDatabaseInit(struct AdbcDatabase* database, AdbcStatusCode PostgresDatabaseNew(struct AdbcDatabase* database, struct AdbcError* error) { if (!database) { - SetError(error, "%s", "[libpq] database must not be null"); + InternalAdbcSetError(error, "%s", "[libpq] database must not be null"); return ADBC_STATUS_INVALID_STATE; } if (database->private_data) { - SetError(error, "%s", "[libpq] database is already initialized"); + InternalAdbcSetError(error, "%s", "[libpq] database is already initialized"); return ADBC_STATUS_INVALID_STATE; } auto impl = std::make_shared(); @@ -195,6 +197,7 @@ AdbcStatusCode PostgresDatabaseSetOptionInt(struct AdbcDatabase* database, } } // namespace +#if !defined(ADBC_NO_COMMON_ENTRYPOINTS) AdbcStatusCode AdbcDatabaseGetOption(struct AdbcDatabase* database, const char* key, char* value, size_t* length, struct AdbcError* error) { @@ -250,6 +253,7 @@ AdbcStatusCode AdbcDatabaseSetOptionDouble(struct AdbcDatabase* database, const double value, struct AdbcError* error) { return PostgresDatabaseSetOptionDouble(database, key, value, error); } +#endif // ADBC_NO_COMMON_ENTRYPOINTS // --------------------------------------------------------------------- // AdbcConnection @@ -450,6 +454,7 @@ AdbcStatusCode PostgresConnectionSetOptionInt(struct AdbcConnection* connection, } // namespace +#if !defined(ADBC_NO_COMMON_ENTRYPOINTS) AdbcStatusCode AdbcConnectionCancel(struct AdbcConnection* connection, struct AdbcError* error) { return PostgresConnectionCancel(connection, error); @@ -584,6 +589,7 @@ AdbcStatusCode AdbcConnectionSetOptionDouble(struct AdbcConnection* connection, struct AdbcError* error) { return PostgresConnectionSetOptionDouble(connection, key, value, error); } +#endif // ADBC_NO_COMMON_ENTRYPOINTS // --------------------------------------------------------------------- // AdbcStatement @@ -761,6 +767,7 @@ AdbcStatusCode PostgresStatementSetSqlQuery(struct AdbcStatement* statement, } } // namespace +#if !defined(ADBC_NO_COMMON_ENTRYPOINTS) AdbcStatusCode AdbcStatementBind(struct AdbcStatement* statement, struct ArrowArray* values, struct ArrowSchema* schema, struct AdbcError* error) { @@ -870,11 +877,12 @@ AdbcStatusCode AdbcStatementSetSqlQuery(struct AdbcStatement* statement, const char* query, struct AdbcError* error) { return PostgresStatementSetSqlQuery(statement, query, error); } +#endif // ADBC_NO_COMMON_ENTRYPOINTS extern "C" { ADBC_EXPORT -AdbcStatusCode PostgresqlDriverInit(int version, void* raw_driver, - struct AdbcError* error) { +AdbcStatusCode AdbcDriverPostgresqlInit(int version, void* raw_driver, + struct AdbcError* error) { if (version != ADBC_VERSION_1_0_0 && version != ADBC_VERSION_1_1_0) { return ADBC_STATUS_NOT_IMPLEMENTED; } @@ -951,8 +959,10 @@ AdbcStatusCode PostgresqlDriverInit(int version, void* raw_driver, return ADBC_STATUS_OK; } +#if !defined(ADBC_NO_COMMON_ENTRYPOINTS) ADBC_EXPORT AdbcStatusCode AdbcDriverInit(int version, void* raw_driver, struct AdbcError* error) { - return PostgresqlDriverInit(version, raw_driver, error); + return AdbcDriverPostgresqlInit(version, raw_driver, error); } +#endif // ADBC_NO_COMMON_ENTRYPOINTS } diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc index be32bd893b..2a80f92873 100644 --- a/c/driver/postgresql/postgresql_test.cc +++ b/c/driver/postgresql/postgresql_test.cc @@ -22,16 +22,20 @@ #include #include #include +#include +#include #include +#include #include +#include #include #include #include +#include #include "common/options.h" #include "common/utils.h" -#include "database.h" #include "validation/adbc_validation.h" #include "validation/adbc_validation_util.h" @@ -223,18 +227,20 @@ class PostgresDatabaseTest : public ::testing::Test, }; ADBCV_TEST_DATABASE(PostgresDatabaseTest) +int Canary(const struct AdbcError*) { return 0; } + TEST_F(PostgresDatabaseTest, AdbcDriverBackwardsCompatibility) { - // XXX: sketchy cast - auto* driver = static_cast(malloc(ADBC_DRIVER_1_0_0_SIZE)); - std::memset(driver, 0, ADBC_DRIVER_1_0_0_SIZE); + struct AdbcDriver driver; + std::memset(&driver, 0, ADBC_DRIVER_1_1_0_SIZE); + driver.ErrorGetDetailCount = Canary; - ASSERT_THAT(::PostgresqlDriverInit(ADBC_VERSION_1_0_0, driver, &error), + ASSERT_THAT(::AdbcDriverPostgresqlInit(ADBC_VERSION_1_0_0, &driver, &error), IsOkStatus(&error)); - ASSERT_THAT(::PostgresqlDriverInit(424242, driver, &error), - IsStatus(ADBC_STATUS_NOT_IMPLEMENTED, &error)); + ASSERT_EQ(Canary, driver.ErrorGetDetailCount); - free(driver); + ASSERT_THAT(::AdbcDriverPostgresqlInit(424242, &driver, &error), + IsStatus(ADBC_STATUS_NOT_IMPLEMENTED, &error)); } class PostgresConnectionTest : public ::testing::Test, @@ -338,7 +344,7 @@ TEST_F(PostgresConnectionTest, GetObjectsGetCatalogs) { auto catalogs = {"postgres", "template0", "template1"}; for (auto catalog : catalogs) { struct AdbcGetObjectsCatalog* cat = - AdbcGetObjectsDataGetCatalogByName(*get_objects_data, catalog); + InternalAdbcGetObjectsDataGetCatalogByName(*get_objects_data, catalog); ASSERT_NE(cat, nullptr) << "catalog " << catalog << " not found"; } } @@ -362,7 +368,7 @@ TEST_F(PostgresConnectionTest, GetObjectsGetDbSchemas) { << "could not initialize the AdbcGetObjectsData object"; struct AdbcGetObjectsSchema* schema = - AdbcGetObjectsDataGetSchemaByName(*get_objects_data, "postgres", "public"); + InternalAdbcGetObjectsDataGetSchemaByName(*get_objects_data, "postgres", "public"); ASSERT_NE(schema, nullptr) << "schema public not found"; } @@ -406,12 +412,12 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsPrimaryKey) { ASSERT_NE(*get_objects_data, nullptr) << "could not initialize the AdbcGetObjectsData object"; - struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* table = InternalAdbcGetObjectsDataGetTableByName( *get_objects_data, "postgres", "public", "adbc_pkey_test"); ASSERT_NE(table, nullptr) << "could not find adbc_pkey_test table"; ASSERT_EQ(table->n_table_columns, 2); - struct AdbcGetObjectsColumn* column = AdbcGetObjectsDataGetColumnByName( + struct AdbcGetObjectsColumn* column = InternalAdbcGetObjectsDataGetColumnByName( *get_objects_data, "postgres", "public", "adbc_pkey_test", "id"); ASSERT_NE(column, nullptr) << "could not find id column on adbc_pkey_test table"; @@ -419,8 +425,10 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsPrimaryKey) { << "expected 1 constraint on adbc_pkey_test table, found: " << table->n_table_constraints; - struct AdbcGetObjectsConstraint* constraint = AdbcGetObjectsDataGetConstraintByName( - *get_objects_data, "postgres", "public", "adbc_pkey_test", "adbc_pkey_test_pkey"); + struct AdbcGetObjectsConstraint* constraint = + InternalAdbcGetObjectsDataGetConstraintByName(*get_objects_data, "postgres", + "public", "adbc_pkey_test", + "adbc_pkey_test_pkey"); ASSERT_NE(constraint, nullptr) << "could not find adbc_pkey_test_pkey constraint"; auto constraint_type = std::string(constraint->constraint_type.data, @@ -498,7 +506,7 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsForeignKey) { ASSERT_NE(*get_objects_data, nullptr) << "could not initialize the AdbcGetInfoData object"; - struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* table = InternalAdbcGetObjectsDataGetTableByName( *get_objects_data, "postgres", "public", "adbc_fkey_test"); ASSERT_NE(table, nullptr) << "could not find adbc_fkey_test table"; ASSERT_EQ(table->n_table_constraints, 1) @@ -508,8 +516,9 @@ TEST_F(PostgresConnectionTest, GetObjectsGetAllFindsForeignKey) { const std::string version = adbc_validation::GetDriverVendorVersion(&connection); const std::string search_name = version < "120000" ? "adbc_fkey_test_fid1_fkey" : "adbc_fkey_test_fid1_fid2_fkey"; - struct AdbcGetObjectsConstraint* constraint = AdbcGetObjectsDataGetConstraintByName( - *get_objects_data, "postgres", "public", "adbc_fkey_test", search_name.c_str()); + struct AdbcGetObjectsConstraint* constraint = + InternalAdbcGetObjectsDataGetConstraintByName( + *get_objects_data, "postgres", "public", "adbc_fkey_test", search_name.c_str()); ASSERT_NE(constraint, nullptr) << "could not find " << search_name << " constraint"; auto constraint_type = std::string(constraint->constraint_type.data, @@ -610,11 +619,11 @@ TEST_F(PostgresConnectionTest, GetObjectsTableTypesFilter) { ASSERT_NE(*get_objects_data, nullptr) << "could not initialize the AdbcGetInfoData object"; - struct AdbcGetObjectsTable* table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* table = InternalAdbcGetObjectsDataGetTableByName( *get_objects_data, "postgres", "public", "adbc_table_types_table_test"); ASSERT_EQ(table, nullptr) << "unexpected table adbc_table_types_table_test found"; - struct AdbcGetObjectsTable* view = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* view = InternalAdbcGetObjectsDataGetTableByName( *get_objects_data, "postgres", "public", "adbc_table_types_view_test"); ASSERT_NE(view, nullptr) << "did not find view adbc_table_types_view_test"; } @@ -686,6 +695,44 @@ TEST_F(PostgresConnectionTest, MetadataSetCurrentDbSchema) { ASSERT_THAT(AdbcStatementRelease(&statement.value, &error), IsOkStatus(&error)); } +TEST_F(PostgresConnectionTest, MetadataSetCurrentDbSchemaInit) { + // Regression test: setting the schema before Init (which Python does) + + // 1. Create the schema + { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + adbc_validation::Handle statement; + ASSERT_THAT(AdbcStatementNew(&connection, &statement.value, &error), + IsOkStatus(&error)); + + ASSERT_THAT( + AdbcStatementSetSqlQuery(&statement.value, + "CREATE SCHEMA IF NOT EXISTS regtestschema", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr, &error), + IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementRelease(&statement.value, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionRelease(&connection, &error), IsOkStatus(&error)); + } + + // 2. Initialize a connection with the schema + { + ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT( + AdbcConnectionSetOption(&connection, ADBC_CONNECTION_OPTION_CURRENT_DB_SCHEMA, + "regtestschema", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); + + ASSERT_THAT(adbc_validation::ConnectionGetOption( + &connection, ADBC_CONNECTION_OPTION_CURRENT_DB_SCHEMA, &error), + ::testing::Optional("regtestschema"s)); + } +} + TEST_F(PostgresConnectionTest, MetadataGetSchemaCaseSensitiveTable) { ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error)); ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error)); @@ -900,7 +947,7 @@ class PostgresStatementTest : public ::testing::Test, void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); } void TestSqlPrepareErrorParamCountMismatch() { GTEST_SKIP() << "Not yet implemented"; } - void TestSqlPrepareGetParameterSchema() { GTEST_SKIP() << "Not yet implemented"; } + void TestSqlPrepareSelectParams() { GTEST_SKIP() << "Not yet implemented"; } void TestConcurrentStatements() { @@ -990,6 +1037,51 @@ class PostgresStatementTest : public ::testing::Test, }; ADBCV_TEST_STATEMENT(PostgresStatementTest) +TEST_F(PostgresStatementTest, TransactionStatus) { + using adbc_validation::ConnectionGetOption; + const char* txn_status = "adbc.postgresql.transaction_status"; + ASSERT_THAT(quirks()->DropTable(&connection, "txntest", &error), IsOkStatus(&error)); + + ASSERT_EQ("idle", ConnectionGetOption(&connection, txn_status, &error)); + + ASSERT_THAT(AdbcConnectionSetOption(&connection, ADBC_CONNECTION_OPTION_AUTOCOMMIT, + ADBC_OPTION_VALUE_DISABLED, &error), + IsOkStatus(&error)); + + ASSERT_EQ("intrans", ConnectionGetOption(&connection, txn_status, &error)); + + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + { + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "SELECT 1", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + + ASSERT_EQ("active", ConnectionGetOption(&connection, txn_status, &error)); + + ASSERT_THAT(AdbcConnectionRollback(&connection, &error), IsOkStatus(&error)); + ASSERT_EQ("intrans", ConnectionGetOption(&connection, txn_status, &error)); + } + { + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "SELECT 1", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + + ASSERT_EQ("active", ConnectionGetOption(&connection, txn_status, &error)); + + ASSERT_THAT(AdbcConnectionCommit(&connection, &error), IsOkStatus(&error)); + ASSERT_EQ("intrans", ConnectionGetOption(&connection, txn_status, &error)); + } +} + TEST_F(PostgresStatementTest, SqlIngestSchema) { const std::string schema_name = "testschema"; @@ -1186,6 +1278,180 @@ TEST_F(PostgresStatementTest, SqlIngestTimestampOverflow) { } } +TEST_F(PostgresStatementTest, SqlIngestJson) { + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + std::string drop = "DROP TABLE IF EXISTS jsontable"; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, drop.c_str(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + + { + adbc_validation::Handle schema; + adbc_validation::Handle batch; + + ArrowSchemaInit(&schema.value); + ASSERT_THAT(ArrowSchemaSetTypeStruct(&schema.value, 1), adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetName(schema->children[0], "j"), + adbc_validation::IsOkErrno()); + + nanoarrow::UniqueBuffer buffer; + ASSERT_THAT(ArrowMetadataBuilderInit(buffer.get(), nullptr), + adbc_validation::IsOkErrno()); + ASSERT_THAT( + ArrowMetadataBuilderAppend(buffer.get(), ArrowCharView("ARROW:extension:name"), + ArrowCharView("arrow.json")), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetMetadata(schema->children[0], + reinterpret_cast(buffer->data)), + adbc_validation::IsOkErrno()); + + ASSERT_THAT((adbc_validation::MakeBatch( + &schema.value, &batch.value, static_cast(nullptr), + {R"({"a": 1, "b": [1, 2, 3]})", std::nullopt})), + adbc_validation::IsOkErrno()); + + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, + "jsontable", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_MODE, + ADBC_INGEST_OPTION_MODE_CREATE, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, &batch.value, &schema.value, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + } + + // Check round-trip + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "SELECT * FROM jsontable", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_EQ(1, reader.fields.size()); + ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ArrowStringView view = ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], 0); + std::string_view v(view.data, static_cast(view.size_bytes)); + ASSERT_EQ(R"({"a": 1, "b": [1, 2, 3]})", v); +} + +// Ensure the table is actually created with the JSON type by trying to ingest +// invalid JSON +TEST_F(PostgresStatementTest, SqlIngestJsonInvalid) { + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + std::string drop = "DROP TABLE IF EXISTS jsontable"; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, drop.c_str(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + + adbc_validation::Handle schema; + adbc_validation::Handle batch; + + ArrowSchemaInit(&schema.value); + ASSERT_THAT(ArrowSchemaSetTypeStruct(&schema.value, 1), adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetName(schema->children[0], "j"), adbc_validation::IsOkErrno()); + + nanoarrow::UniqueBuffer buffer; + ASSERT_THAT(ArrowMetadataBuilderInit(buffer.get(), nullptr), + adbc_validation::IsOkErrno()); + ASSERT_THAT( + ArrowMetadataBuilderAppend(buffer.get(), ArrowCharView("ARROW:extension:name"), + ArrowCharView("arrow.json")), + adbc_validation::IsOkErrno()); + ASSERT_THAT( + ArrowSchemaSetMetadata(schema->children[0], reinterpret_cast(buffer->data)), + adbc_validation::IsOkErrno()); + + ASSERT_THAT((adbc_validation::MakeBatch( + &schema.value, &batch.value, static_cast(nullptr), + {R"({)", std::nullopt})), + adbc_validation::IsOkErrno()); + + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, + "jsontable", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_MODE, + ADBC_INGEST_OPTION_MODE_CREATE, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, &batch.value, &schema.value, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + ASSERT_THAT(error.message, ::testing::HasSubstr("invalid input syntax for type json")); +} + +TEST_F(PostgresStatementTest, SqlIngestJsonb) { + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + std::string drop = "DROP TABLE IF EXISTS jsontable"; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, drop.c_str(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + + drop = "CREATE TABLE jsontable (j JSONB)"; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, drop.c_str(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + + { + adbc_validation::Handle schema; + adbc_validation::Handle batch; + + ArrowSchemaInit(&schema.value); + ASSERT_THAT(ArrowSchemaSetTypeStruct(&schema.value, 1), adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetName(schema->children[0], "j"), + adbc_validation::IsOkErrno()); + + nanoarrow::UniqueBuffer buffer; + ASSERT_THAT(ArrowMetadataBuilderInit(buffer.get(), nullptr), + adbc_validation::IsOkErrno()); + ASSERT_THAT( + ArrowMetadataBuilderAppend(buffer.get(), ArrowCharView("ARROW:extension:name"), + ArrowCharView("arrow.json")), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetMetadata(schema->children[0], + reinterpret_cast(buffer->data)), + adbc_validation::IsOkErrno()); + + ASSERT_THAT((adbc_validation::MakeBatch( + &schema.value, &batch.value, static_cast(nullptr), + {R"({"a": 1, "b": [1, 2, 3]})", std::nullopt})), + adbc_validation::IsOkErrno()); + + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_TARGET_TABLE, + "jsontable", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetOption(&statement, ADBC_INGEST_OPTION_MODE, + ADBC_INGEST_OPTION_MODE_APPEND, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, &batch.value, &schema.value, &error), + IsOkStatus(&error)); + // TODO(https://github.com/apache/arrow-adbc/issues/3293): we need a + // different extension type for JSONB so the driver can know to generate + // the appropriate COPY representation + // (JSON-representation-version-prefixed JSON string). + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + } +} + TEST_F(PostgresStatementTest, SqlReadIntervalOverflow) { ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); @@ -1446,6 +1712,104 @@ TEST_F(PostgresStatementTest, ExecuteParameterizedQueryWithRowsAffected) { } } +// Test for making sure empty string/binary parameters are inserted correct +TEST_F(PostgresStatementTest, EmptyStringAndBinaryParameter) { + ASSERT_THAT(quirks()->DropTable(&connection, "adbc_test", &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + // Create test table with both TEXT and BYTEA columns + { + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "CREATE TABLE adbc_test (text_data TEXT, binary_data BYTEA)", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcStatementExecuteQuery(&statement, &reader.stream.value, nullptr, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + + // Insert empty string and binary via parameters + { + nanoarrow::UniqueSchema schema_bind; + ArrowSchemaInit(schema_bind.get()); + ASSERT_THAT(ArrowSchemaSetTypeStruct(schema_bind.get(), 2), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetType(schema_bind->children[0], NANOARROW_TYPE_STRING), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetType(schema_bind->children[1], NANOARROW_TYPE_BINARY), + adbc_validation::IsOkErrno()); + + nanoarrow::UniqueArray bind; + ASSERT_THAT(ArrowArrayInitFromSchema(bind.get(), schema_bind.get(), nullptr), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowArrayStartAppending(bind.get()), adbc_validation::IsOkErrno()); + + // Add one row with empty string and empty binary parameters + ASSERT_THAT(ArrowArrayAppendString(bind->children[0], ArrowCharView("")), + adbc_validation::IsOkErrno()); + ArrowBufferView empty_buffer = {{nullptr}, 0}; + ASSERT_THAT(ArrowArrayAppendBytes(bind->children[1], empty_buffer), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowArrayFinishElement(bind.get()), adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowArrayFinishBuildingDefault(bind.get(), nullptr), + adbc_validation::IsOkErrno()); + + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, + "INSERT INTO adbc_test VALUES ($1, $2)", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, bind.get(), schema_bind.get(), &error), + IsOkStatus(&error)); + + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcStatementExecuteQuery(&statement, &reader.stream.value, nullptr, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + + // Verify empty values were inserted correctly (not as NULL) + { + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, "SELECT text_data, binary_data FROM adbc_test", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT( + AdbcStatementExecuteQuery(&statement, &reader.stream.value, nullptr, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NE(reader.array->release, nullptr); + ASSERT_EQ(reader.array->length, 1); + + // Row should contain empty values, not NULL + ASSERT_EQ(reader.array->children[0]->null_count, 0); // text_data + ASSERT_EQ(reader.array->children[1]->null_count, 0); // binary_data + + // Check that both values are empty (string and binary) + // Check the single row + ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[0], 0)); + struct ArrowBufferView string_view = + ArrowArrayViewGetBytesUnsafe(reader.array_view->children[0], 0); + ASSERT_EQ(string_view.size_bytes, 0); // Empty string should have size 0 + + ASSERT_FALSE(ArrowArrayViewIsNull(reader.array_view->children[1], 0)); + struct ArrowBufferView binary_view = + ArrowArrayViewGetBytesUnsafe(reader.array_view->children[1], 0); + ASSERT_EQ(binary_view.size_bytes, 0); // Empty binary should have size 0 + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_EQ(reader.array->release, nullptr); + } + + ASSERT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); +} + TEST_F(PostgresStatementTest, SqlExecuteCopyZeroRowOutputError) { ASSERT_THAT(quirks()->DropTable(&connection, "adbc_test", &error), IsOkStatus(&error)); ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); @@ -1552,24 +1916,25 @@ TEST_F(PostgresStatementTest, BatchSizeHint) { // Test that an ADBC 1.0.0-sized error still works TEST_F(PostgresStatementTest, AdbcErrorBackwardsCompatibility) { - // XXX: sketchy cast - auto* error = static_cast(malloc(ADBC_ERROR_1_0_0_SIZE)); - std::memset(error, 0, ADBC_ERROR_1_0_0_SIZE); + struct AdbcError error; + std::memset(&error, 0, ADBC_ERROR_1_1_0_SIZE); + struct AdbcDriver canary; + error.private_data = &canary; + error.private_driver = &canary; - ASSERT_THAT(AdbcStatementNew(&connection, &statement, error), IsOkStatus(error)); + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); ASSERT_THAT( - AdbcStatementSetSqlQuery(&statement, "SELECT * FROM thistabledoesnotexist", error), - IsOkStatus(error)); + AdbcStatementSetSqlQuery(&statement, "SELECT * FROM thistabledoesnotexist", &error), + IsOkStatus(&error)); adbc_validation::StreamReader reader; ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, - &reader.rows_affected, error), - IsStatus(ADBC_STATUS_NOT_FOUND, error)); - - ASSERT_EQ("42P01", std::string_view(error->sqlstate, 5)); - ASSERT_EQ(0, AdbcErrorGetDetailCount(error)); - - error->release(error); - free(error); + &reader.rows_affected, &error), + IsStatus(ADBC_STATUS_NOT_FOUND, &error)); + ASSERT_EQ("42P01", std::string_view(error.sqlstate, 5)); + ASSERT_EQ(0, AdbcErrorGetDetailCount(&error)); + ASSERT_EQ(&canary, error.private_data); + ASSERT_EQ(&canary, error.private_driver); + error.release(&error); } TEST_F(PostgresStatementTest, Cancel) { @@ -1693,6 +2058,133 @@ TEST_F(PostgresStatementTest, SetUseCopyFalse) { ASSERT_EQ(reader.array->release, nullptr); } +TEST_F(PostgresStatementTest, SqlQueryInt2vector) { + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + const char* query = R"(SELECT CAST('-1 42 0' AS int2vector) AS thevector;)"; + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, query, &error), IsOkStatus(&error)); + + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + + reader.GetSchema(); + ASSERT_EQ(reader.schema->n_children, 1); + ASSERT_STREQ(reader.schema->children[0]->format, "+l"); + ASSERT_STREQ(reader.schema->children[0]->name, "thevector"); + ASSERT_EQ(reader.schema->children[0]->n_children, 1); + ASSERT_STREQ(reader.schema->children[0]->children[0]->format, "s"); + + ASSERT_THAT(reader.MaybeNext(), adbc_validation::IsOkErrno()); + ASSERT_EQ(reader.array->length, 1); + ASSERT_EQ(reader.array->n_children, 1); + ASSERT_EQ(reader.array->children[0]->null_count, 0); + const auto* offsets = + reinterpret_cast(reader.array->children[0]->buffers[1]); + ASSERT_EQ(offsets[0], 0); + ASSERT_EQ(offsets[1], 3); + + ASSERT_EQ(reader.array->children[0]->children[0]->null_count, 0); + ASSERT_EQ(reader.array->children[0]->children[0]->length, 3); + const auto* data = reinterpret_cast( + reader.array->children[0]->children[0]->buffers[1]); + ASSERT_EQ(data[0], -1); + ASSERT_EQ(data[1], 42); + ASSERT_EQ(data[2], 0); + + ASSERT_THAT(reader.MaybeNext(), adbc_validation::IsOkErrno()); + ASSERT_EQ(reader.array->release, nullptr); +} + +TEST_F(PostgresStatementTest, UnknownOid) { + // Regression test for https://github.com/apache/arrow-adbc/issues/2448 + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, "SELECT typacl FROM pg_type WHERE oid <= 6157", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_EQ(1, reader.fields.size()); + ASSERT_EQ(NANOARROW_TYPE_BINARY, reader.fields[0].type); + struct ArrowStringView extension_name = reader.fields[0].extension_name; + ASSERT_EQ("arrow.opaque", + std::string_view(extension_name.data, + static_cast(extension_name.size_bytes))); + struct ArrowStringView extension_metadata = reader.fields[0].extension_metadata; + ASSERT_EQ(R"({"type_name": "unnamed", "vendor_name": "PostgreSQL"})", + std::string_view(extension_metadata.data, + static_cast(extension_metadata.size_bytes))); +} + +TEST_F(PostgresStatementTest, SqlQueryJsonb) { + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + + // Setup table + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, + "DROP TABLE IF EXISTS jsonbtest; CREATE TABLE jsonbtest (value JSONB);", + &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + + // Insert data + adbc_validation::Handle schema; + adbc_validation::Handle batch; + ArrowSchemaInit(&schema.value); + ASSERT_THAT(ArrowSchemaSetTypeStruct(&schema.value, 1), adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_STRING), + adbc_validation::IsOkErrno()); + ASSERT_THAT(ArrowSchemaSetName(schema->children[0], "value"), + adbc_validation::IsOkErrno()); + + // We need the extension type for the driver to bind data properly + nanoarrow::UniqueBuffer buffer; + ASSERT_THAT(ArrowMetadataBuilderInit(buffer.get(), nullptr), + adbc_validation::IsOkErrno()); + ASSERT_THAT( + ArrowMetadataBuilderAppend(buffer.get(), ArrowCharView("ARROW:extension:name"), + ArrowCharView("arrow.json")), + adbc_validation::IsOkErrno()); + ASSERT_THAT( + ArrowSchemaSetMetadata(schema->children[0], reinterpret_cast(buffer->data)), + adbc_validation::IsOkErrno()); + + ASSERT_THAT((adbc_validation::MakeBatch( + &schema.value, &batch.value, static_cast(nullptr), + {R"({"a": 1, "b": [1, 2, 3]})", std::nullopt})), + adbc_validation::IsOkErrno()); + + ASSERT_THAT(AdbcStatementSetSqlQuery( + &statement, "INSERT INTO jsonbtest(value) VALUES ($1)", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementBind(&statement, &batch.value, &schema.value, &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + IsOkStatus(&error)); + + // Check round-trip + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "SELECT * FROM jsonbtest", &error), + IsOkStatus(&error)); + adbc_validation::StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + ASSERT_EQ(1, reader.fields.size()); + ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ArrowStringView view = ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], 0); + std::string_view v(view.data, static_cast(view.size_bytes)); + ASSERT_EQ(R"({"a": 1, "b": [1, 2, 3]})", v); +} + struct TypeTestCase { std::string name; std::string sql_type; diff --git a/c/driver/postgresql/result_helper.cc b/c/driver/postgresql/result_helper.cc index 6dd7527a0e..862dd0aacb 100644 --- a/c/driver/postgresql/result_helper.cc +++ b/c/driver/postgresql/result_helper.cc @@ -19,6 +19,8 @@ #include #include +#include +#include #define ADBC_FRAMEWORK_USE_FMT #include "driver/framework/status.h" @@ -46,7 +48,7 @@ Status PqResultHelper::PrepareInternal(int n_params, const Oid* param_oids) cons Status PqResultHelper::Prepare() const { return PrepareInternal(0, nullptr); } Status PqResultHelper::Prepare(const std::vector& param_oids) const { - return PrepareInternal(param_oids.size(), param_oids.data()); + return PrepareInternal(static_cast(param_oids.size()), param_oids.data()); } Status PqResultHelper::DescribePrepared() { @@ -90,8 +92,8 @@ Status PqResultHelper::Execute(const std::vector& params, } ClearResult(); - result_ = PQexecParams(conn_, query_.c_str(), param_values.size(), param_oids_ptr, - param_values.data(), param_lengths.data(), + result_ = PQexecParams(conn_, query_.c_str(), static_cast(param_values.size()), + param_oids_ptr, param_values.data(), param_lengths.data(), param_formats.data(), static_cast(output_format_)); } @@ -167,11 +169,10 @@ Status PqResultHelper::ResolveOutputTypes(PostgresTypeResolver& type_resolver, const Oid pg_oid = PQftype(result_, i); PostgresType pg_type; if (type_resolver.Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) { - Status status = - Status::NotImplemented("[libpq] Column #", i + 1, " (\"", PQfname(result_, i), - "\") has unknown type code ", pg_oid); - ClearResult(); - return status; + // We couldn't look up the OID. + // TODO(apache/arrow-adbc#1243): issue a warning (maybe reloading the + // connection will load the OIDs if it was a newly created type) + pg_type = PostgresType::Unnamed(pg_oid); } root_type.AppendChild(PQfname(result_, i), pg_type); diff --git a/c/driver/postgresql/result_helper.h b/c/driver/postgresql/result_helper.h index 1f3f93c46b..4b0e9a6b3c 100644 --- a/c/driver/postgresql/result_helper.h +++ b/c/driver/postgresql/result_helper.h @@ -110,7 +110,7 @@ class PqResultRow { }; // Helper to manager the lifecycle of a PQResult. The query argument -// will be evaluated as part of the constructor, with the desctructor handling cleanup +// will be evaluated as part of the constructor, with the destructor handling cleanup // Caller must call Prepare then Execute, checking both for an OK AdbcStatusCode // prior to iterating class PqResultHelper { diff --git a/c/driver/postgresql/result_reader.cc b/c/driver/postgresql/result_reader.cc index 464bad74a7..61d17bb038 100644 --- a/c/driver/postgresql/result_reader.cc +++ b/c/driver/postgresql/result_reader.cc @@ -100,8 +100,8 @@ int PqResultArrayReader::GetNext(struct ArrowArray* out) { item.size_bytes = pg_item.len; } - NANOARROW_RETURN_NOT_OK( - field_readers_[i]->Read(&item, item.size_bytes, tmp->children[i], &na_error_)); + NANOARROW_RETURN_NOT_OK(field_readers_[i]->Read( + &item, static_cast(item.size_bytes), tmp->children[i], &na_error_)); } } diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc index 129ddebff8..0dec60452b 100644 --- a/c/driver/postgresql/statement.cc +++ b/c/driver/postgresql/statement.cc @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -55,12 +57,12 @@ int TupleReader::GetSchema(struct ArrowSchema* out) { int na_res = copy_reader_->GetSchema(out); if (out->release == nullptr) { - SetError(&error_, "[libpq] Result set was already consumed or freed"); + InternalAdbcSetError(&error_, "[libpq] Result set was already consumed or freed"); status_ = ADBC_STATUS_INVALID_STATE; - return AdbcStatusCodeToErrno(status_); + return InternalAdbcStatusCodeToErrno(status_); } else if (na_res != NANOARROW_OK) { // e.g., Can't allocate memory - SetError(&error_, "[libpq] Error copying schema"); + InternalAdbcSetError(&error_, "[libpq] Error copying schema"); status_ = ADBC_STATUS_INTERNAL; } @@ -78,9 +80,10 @@ int TupleReader::GetCopyData() { int get_copy_res = PQgetCopyData(conn_, &pgbuf_, /*async=*/0); if (get_copy_res == -2) { - SetError(&error_, "[libpq] PQgetCopyData() failed: %s", PQerrorMessage(conn_)); + InternalAdbcSetError(&error_, "[libpq] PQgetCopyData() failed: %s", + PQerrorMessage(conn_)); status_ = ADBC_STATUS_IO; - return AdbcStatusCodeToErrno(status_); + return InternalAdbcStatusCodeToErrno(status_); } if (get_copy_res == -1) { @@ -91,7 +94,7 @@ int TupleReader::GetCopyData() { if (pq_status != PGRES_COMMAND_OK) { status_ = SetError(&error_, result_, "[libpq] Execution error [%s]: %s", PQresStatus(pq_status), PQresultErrorMessage(result_)); - return AdbcStatusCodeToErrno(status_); + return InternalAdbcStatusCodeToErrno(status_); } else { return ENODATA; } @@ -107,8 +110,8 @@ int TupleReader::AppendRowAndFetchNext() { // call to PQgetCopyData()) int na_res = copy_reader_->ReadRecord(&data_, &na_error_); if (na_res != NANOARROW_OK && na_res != ENODATA) { - SetError(&error_, "[libpq] ReadRecord failed at row %" PRId64 ": %s", row_id_, - na_error_.message); + InternalAdbcSetError(&error_, "[libpq] ReadRecord failed at row %" PRId64 ": %s", + row_id_, na_error_.message); status_ = ADBC_STATUS_IO; return na_res; } @@ -134,7 +137,8 @@ int TupleReader::BuildOutput(struct ArrowArray* out) { int na_res = copy_reader_->GetArray(out, &na_error_); if (na_res != NANOARROW_OK) { - SetError(&error_, "[libpq] Failed to build result array: %s", na_error_.message); + InternalAdbcSetError(&error_, "[libpq] Failed to build result array: %s", + na_error_.message); status_ = ADBC_STATUS_INTERNAL; return na_res; } @@ -163,7 +167,7 @@ int TupleReader::GetNext(struct ArrowArray* out) { na_res = copy_reader_->ReadHeader(&data_, &na_error_); if (na_res != NANOARROW_OK) { - SetError(&error_, "[libpq] ReadHeader() failed: %s", na_error_.message); + InternalAdbcSetError(&error_, "[libpq] ReadHeader() failed: %s", na_error_.message); return na_res; } @@ -218,55 +222,89 @@ void TupleReader::Release() { row_id_ = -1; } +// Instead of directly exporting the TupleReader, which is tied to the +// lifetime of the Statement, we export a weak_ptr reference instead. That +// way if the user accidentally closes the Statement before the +// ArrowArrayStream, we can avoid a crash. +// See https://github.com/apache/arrow-adbc/issues/2629 +struct ExportedTupleReader { + std::weak_ptr self; +}; + void TupleReader::ExportTo(struct ArrowArrayStream* stream) { stream->get_schema = &GetSchemaTrampoline; stream->get_next = &GetNextTrampoline; stream->get_last_error = &GetLastErrorTrampoline; stream->release = &ReleaseTrampoline; - stream->private_data = this; + stream->private_data = new ExportedTupleReader{weak_from_this()}; } -const struct AdbcError* TupleReader::ErrorFromArrayStream(struct ArrowArrayStream* stream, +const struct AdbcError* TupleReader::ErrorFromArrayStream(struct ArrowArrayStream* self, AdbcStatusCode* status) { - if (!stream->private_data || stream->release != &ReleaseTrampoline) { + if (!self->private_data || self->release != &ReleaseTrampoline) { return nullptr; } - TupleReader* reader = static_cast(stream->private_data); - if (status) { - *status = reader->status_; + auto* wrapper = static_cast(self->private_data); + auto maybe_reader = wrapper->self.lock(); + if (maybe_reader) { + if (status) { + *status = maybe_reader->status_; + } + return &maybe_reader->error_; } - return &reader->error_; + return nullptr; } int TupleReader::GetSchemaTrampoline(struct ArrowArrayStream* self, struct ArrowSchema* out) { if (!self || !self->private_data) return EINVAL; - TupleReader* reader = static_cast(self->private_data); - return reader->GetSchema(out); + auto* wrapper = static_cast(self->private_data); + auto maybe_reader = wrapper->self.lock(); + if (maybe_reader) { + return maybe_reader->GetSchema(out); + } + // statement was closed or reader was otherwise invalidated + return EINVAL; } int TupleReader::GetNextTrampoline(struct ArrowArrayStream* self, struct ArrowArray* out) { if (!self || !self->private_data) return EINVAL; - TupleReader* reader = static_cast(self->private_data); - return reader->GetNext(out); + auto* wrapper = static_cast(self->private_data); + auto maybe_reader = wrapper->self.lock(); + if (maybe_reader) { + return maybe_reader->GetNext(out); + } + // statement was closed or reader was otherwise invalidated + return EINVAL; } const char* TupleReader::GetLastErrorTrampoline(struct ArrowArrayStream* self) { if (!self || !self->private_data) return nullptr; + constexpr std::string_view kReaderInvalidated = + "[libpq] Reader invalidated (statement or reader was closed)"; - TupleReader* reader = static_cast(self->private_data); - return reader->last_error(); + auto* wrapper = static_cast(self->private_data); + auto maybe_reader = wrapper->self.lock(); + if (maybe_reader) { + return maybe_reader->last_error(); + } + // statement was closed or reader was otherwise invalidated + return kReaderInvalidated.data(); } void TupleReader::ReleaseTrampoline(struct ArrowArrayStream* self) { if (!self || !self->private_data) return; - TupleReader* reader = static_cast(self->private_data); - reader->Release(); + auto* wrapper = static_cast(self->private_data); + auto maybe_reader = wrapper->self.lock(); + if (maybe_reader) { + maybe_reader->Release(); + } + delete wrapper; self->private_data = nullptr; self->release = nullptr; } @@ -274,13 +312,14 @@ void TupleReader::ReleaseTrampoline(struct ArrowArrayStream* self) { AdbcStatusCode PostgresStatement::New(struct AdbcConnection* connection, struct AdbcError* error) { if (!connection || !connection->private_data) { - SetError(error, "%s", "[libpq] Must provide an initialized AdbcConnection"); + InternalAdbcSetError(error, "%s", + "[libpq] Must provide an initialized AdbcConnection"); return ADBC_STATUS_INVALID_ARGUMENT; } connection_ = *reinterpret_cast*>(connection->private_data); type_resolver_ = connection_->type_resolver(); - reader_.conn_ = connection_->conn(); + ClearResult(); return ADBC_STATUS_OK; } @@ -288,10 +327,10 @@ AdbcStatusCode PostgresStatement::Bind(struct ArrowArray* values, struct ArrowSchema* schema, struct AdbcError* error) { if (!values || !values->release) { - SetError(error, "%s", "[libpq] Must provide non-NULL array"); + InternalAdbcSetError(error, "%s", "[libpq] Must provide non-NULL array"); return ADBC_STATUS_INVALID_ARGUMENT; } else if (!schema || !schema->release) { - SetError(error, "%s", "[libpq] Must provide non-NULL schema"); + InternalAdbcSetError(error, "%s", "[libpq] Must provide non-NULL schema"); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -304,7 +343,7 @@ AdbcStatusCode PostgresStatement::Bind(struct ArrowArray* values, AdbcStatusCode PostgresStatement::Bind(struct ArrowArrayStream* stream, struct AdbcError* error) { if (!stream || !stream->release) { - SetError(error, "%s", "[libpq] Must provide non-NULL stream"); + InternalAdbcSetError(error, "%s", "[libpq] Must provide non-NULL stream"); return ADBC_STATUS_INVALID_ARGUMENT; } // Move stream @@ -327,8 +366,9 @@ AdbcStatusCode PostgresStatement::CreateBulkTable(const std::string& current_sch PGconn* conn = connection_->conn(); if (!ingest_.db_schema.empty() && ingest_.temporary) { - SetError(error, "[libpq] Cannot set both %s and %s", - ADBC_INGEST_OPTION_TARGET_DB_SCHEMA, ADBC_INGEST_OPTION_TEMPORARY); + InternalAdbcSetError(error, "[libpq] Cannot set both %s and %s", + ADBC_INGEST_OPTION_TARGET_DB_SCHEMA, + ADBC_INGEST_OPTION_TEMPORARY); return ADBC_STATUS_INVALID_STATE; } @@ -337,8 +377,9 @@ AdbcStatusCode PostgresStatement::CreateBulkTable(const std::string& current_sch char* escaped = PQescapeIdentifier(conn, ingest_.db_schema.c_str(), ingest_.db_schema.size()); if (escaped == nullptr) { - SetError(error, "[libpq] Failed to escape target schema %s for ingestion: %s", - ingest_.db_schema.c_str(), PQerrorMessage(conn)); + InternalAdbcSetError( + error, "[libpq] Failed to escape target schema %s for ingestion: %s", + ingest_.db_schema.c_str(), PQerrorMessage(conn)); return ADBC_STATUS_INTERNAL; } *escaped_table += escaped; @@ -361,8 +402,9 @@ AdbcStatusCode PostgresStatement::CreateBulkTable(const std::string& current_sch char* escaped = PQescapeIdentifier(conn, ingest_.target.c_str(), ingest_.target.size()); if (escaped == nullptr) { - SetError(error, "[libpq] Failed to escape target table %s for ingestion: %s", - ingest_.target.c_str(), PQerrorMessage(conn)); + InternalAdbcSetError(error, + "[libpq] Failed to escape target table %s for ingestion: %s", + ingest_.target.c_str(), PQerrorMessage(conn)); return ADBC_STATUS_INTERNAL; } *escaped_table += escaped; @@ -415,8 +457,8 @@ AdbcStatusCode PostgresStatement::CreateBulkTable(const std::string& current_sch const char* unescaped = source_schema.children[i]->name; char* escaped = PQescapeIdentifier(conn, unescaped, std::strlen(unescaped)); if (escaped == nullptr) { - SetError(error, "[libpq] Failed to escape column %s for ingestion: %s", unescaped, - PQerrorMessage(conn)); + InternalAdbcSetError(error, "[libpq] Failed to escape column %s for ingestion: %s", + unescaped, PQerrorMessage(conn)); return ADBC_STATUS_INTERNAL; } create += escaped; @@ -437,7 +479,7 @@ AdbcStatusCode PostgresStatement::CreateBulkTable(const std::string& current_sch } create += ")"; - SetError(error, "%s%s", "[libpq] ", create.c_str()); + InternalAdbcSetError(error, "%s%s", "[libpq] ", create.c_str()); PGresult* result = PQexecParams(conn, create.c_str(), /*nParams=*/0, /*paramTypes=*/nullptr, /*paramValues=*/nullptr, /*paramLengths=*/nullptr, /*paramFormats=*/nullptr, @@ -475,7 +517,7 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, } if (query_.empty()) { - SetError(error, "%s", "[libpq] Must SetSqlQuery before ExecuteQuery"); + InternalAdbcSetError(error, "%s", "[libpq] Must SetSqlQuery before ExecuteQuery"); return ADBC_STATUS_INVALID_STATE; } @@ -513,24 +555,24 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream, } struct ArrowError na_error; - reader_.copy_reader_ = std::make_unique(); - CHECK_NA(INTERNAL, reader_.copy_reader_->Init(root_type), error); + reader_->copy_reader_ = std::make_unique(); + CHECK_NA(INTERNAL, reader_->copy_reader_->Init(root_type), error); CHECK_NA_DETAIL(INTERNAL, - reader_.copy_reader_->InferOutputSchema( + reader_->copy_reader_->InferOutputSchema( std::string(connection_->VendorName()), &na_error), &na_error, error); - CHECK_NA_DETAIL(INTERNAL, reader_.copy_reader_->InitFieldReaders(&na_error), &na_error, + CHECK_NA_DETAIL(INTERNAL, reader_->copy_reader_->InitFieldReaders(&na_error), &na_error, error); // Execute the COPY query RAISE_STATUS(error, helper.ExecuteCopy()); // We need the PQresult back for the reader - reader_.result_ = helper.ReleaseResult(); + reader_->result_ = helper.ReleaseResult(); // Export to stream - reader_.ExportTo(stream); + reader_->ExportTo(stream); if (rows_affected) *rows_affected = -1; return ADBC_STATUS_OK; } @@ -539,29 +581,30 @@ AdbcStatusCode PostgresStatement::ExecuteSchema(struct ArrowSchema* schema, struct AdbcError* error) { ClearResult(); if (query_.empty()) { - SetError(error, "%s", "[libpq] Must SetSqlQuery before ExecuteQuery"); + InternalAdbcSetError(error, "%s", "[libpq] Must SetSqlQuery before ExecuteQuery"); return ADBC_STATUS_INVALID_STATE; } PqResultHelper helper(connection_->conn(), query_); if (bind_.release) { - nanoarrow::UniqueSchema schema; + nanoarrow::UniqueSchema param_schema; struct ArrowError na_error; ArrowErrorInit(&na_error); - CHECK_NA_DETAIL(INTERNAL, ArrowArrayStreamGetSchema(&bind_, schema.get(), &na_error), + CHECK_NA_DETAIL(INTERNAL, + ArrowArrayStreamGetSchema(&bind_, param_schema.get(), &na_error), &na_error, error); - if (std::string(schema->format) != "+s") { - SetError(error, "%s", "[libpq] Bind parameters must have type STRUCT"); + if (std::string(param_schema->format) != "+s") { + InternalAdbcSetError(error, "%s", "[libpq] Bind parameters must have type STRUCT"); return ADBC_STATUS_INVALID_STATE; } - std::vector param_oids(schema->n_children); - for (int64_t i = 0; i < schema->n_children; i++) { + std::vector param_oids(param_schema->n_children); + for (int64_t i = 0; i < param_schema->n_children; i++) { PostgresType pg_type; CHECK_NA_DETAIL(INTERNAL, - PostgresType::FromSchema(*type_resolver_, schema->children[i], + PostgresType::FromSchema(*type_resolver_, param_schema->children[i], &pg_type, &na_error), &na_error, error); param_oids[i] = pg_type.oid(); @@ -591,12 +634,14 @@ AdbcStatusCode PostgresStatement::ExecuteIngest(struct ArrowArrayStream* stream, int64_t* rows_affected, struct AdbcError* error) { if (!bind_.release) { - SetError(error, "%s", "[libpq] Must Bind() before Execute() for bulk ingestion"); + InternalAdbcSetError(error, "%s", + "[libpq] Must Bind() before Execute() for bulk ingestion"); return ADBC_STATUS_INVALID_STATE; } if (stream != nullptr) { - SetError(error, "%s", "[libpq] Bulk ingest with result set is not supported"); + InternalAdbcSetError(error, "%s", + "[libpq] Bulk ingest with result set is not supported"); return ADBC_STATUS_NOT_IMPLEMENTED; } @@ -608,8 +653,8 @@ AdbcStatusCode PostgresStatement::ExecuteIngest(struct ArrowArrayStream* stream, RAISE_STATUS(error, result_helper.Execute()); auto it = result_helper.begin(); if (it == result_helper.end()) { - SetError(error, - "[libpq] PostgreSQL returned no rows for 'SELECT CURRENT_SCHEMA()'"); + InternalAdbcSetError( + error, "[libpq] PostgreSQL returned no rows for 'SELECT CURRENT_SCHEMA()'"); return ADBC_STATUS_INTERNAL; } current_schema = (*it)[0].data; @@ -672,7 +717,7 @@ AdbcStatusCode PostgresStatement::GetOption(const char* key, char* value, size_t break; } } else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES) == 0) { - result = std::to_string(reader_.batch_size_hint_bytes_); + result = std::to_string(reader_->batch_size_hint_bytes_); } else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_USE_COPY) == 0) { if (UseCopy()) { result = "true"; @@ -680,7 +725,7 @@ AdbcStatusCode PostgresStatement::GetOption(const char* key, char* value, size_t result = "false"; } } else { - SetError(error, "[libpq] Unknown statement option '%s'", key); + InternalAdbcSetError(error, "[libpq] Unknown statement option '%s'", key); return ADBC_STATUS_NOT_FOUND; } @@ -694,13 +739,13 @@ AdbcStatusCode PostgresStatement::GetOption(const char* key, char* value, size_t AdbcStatusCode PostgresStatement::GetOptionBytes(const char* key, uint8_t* value, size_t* length, struct AdbcError* error) { - SetError(error, "[libpq] Unknown statement option '%s'", key); + InternalAdbcSetError(error, "[libpq] Unknown statement option '%s'", key); return ADBC_STATUS_NOT_FOUND; } AdbcStatusCode PostgresStatement::GetOptionDouble(const char* key, double* value, struct AdbcError* error) { - SetError(error, "[libpq] Unknown statement option '%s'", key); + InternalAdbcSetError(error, "[libpq] Unknown statement option '%s'", key); return ADBC_STATUS_NOT_FOUND; } @@ -708,21 +753,37 @@ AdbcStatusCode PostgresStatement::GetOptionInt(const char* key, int64_t* value, struct AdbcError* error) { std::string result; if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES) == 0) { - *value = reader_.batch_size_hint_bytes_; + *value = reader_->batch_size_hint_bytes_; return ADBC_STATUS_OK; } - SetError(error, "[libpq] Unknown statement option '%s'", key); + InternalAdbcSetError(error, "[libpq] Unknown statement option '%s'", key); return ADBC_STATUS_NOT_FOUND; } AdbcStatusCode PostgresStatement::GetParameterSchema(struct ArrowSchema* schema, struct AdbcError* error) { - return ADBC_STATUS_NOT_IMPLEMENTED; + if (query_.empty()) { + InternalAdbcSetError(error, "[libpq] Must SetSqlQuery before GetParameterSchema"); + return ADBC_STATUS_INVALID_STATE; + } + + PqResultHelper helper(connection_->conn(), query_); + RAISE_STATUS(error, helper.Prepare()); + RAISE_STATUS(error, helper.DescribePrepared()); + PostgresType param_types; + RAISE_STATUS(error, + helper.ResolveParamTypes(*connection_->type_resolver(), ¶m_types)); + + ArrowSchemaInit(schema); + + RAISE_NA(param_types.SetSchema(schema, std::string(connection_->VendorName()))); + + return ADBC_STATUS_OK; } AdbcStatusCode PostgresStatement::Prepare(struct AdbcError* error) { if (query_.empty()) { - SetError(error, "%s", "[libpq] Must SetSqlQuery() before Prepare()"); + InternalAdbcSetError(error, "%s", "[libpq] Must SetSqlQuery() before Prepare()"); return ADBC_STATUS_INVALID_STATE; } @@ -773,7 +834,8 @@ AdbcStatusCode PostgresStatement::SetOption(const char* key, const char* value, } else if (std::strcmp(value, ADBC_INGEST_OPTION_MODE_CREATE_APPEND) == 0) { ingest_.mode = IngestMode::kCreateAppend; } else { - SetError(error, "[libpq] Invalid value '%s' for option '%s'", value, key); + InternalAdbcSetError(error, "[libpq] Invalid value '%s' for option '%s'", value, + key); return ADBC_STATUS_INVALID_ARGUMENT; } prepared_ = false; @@ -786,29 +848,32 @@ AdbcStatusCode PostgresStatement::SetOption(const char* key, const char* value, } else if (std::strcmp(value, ADBC_OPTION_VALUE_DISABLED) == 0) { ingest_.temporary = false; } else { - SetError(error, "[libpq] Invalid value '%s' for option '%s'", value, key); + InternalAdbcSetError(error, "[libpq] Invalid value '%s' for option '%s'", value, + key); return ADBC_STATUS_INVALID_ARGUMENT; } prepared_ = false; } else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES) == 0) { int64_t int_value = std::atol(value); if (int_value <= 0) { - SetError(error, "[libpq] Invalid value '%s' for option '%s'", value, key); + InternalAdbcSetError(error, "[libpq] Invalid value '%s' for option '%s'", value, + key); return ADBC_STATUS_INVALID_ARGUMENT; } - this->reader_.batch_size_hint_bytes_ = int_value; + this->batch_size_hint_bytes_ = this->reader_->batch_size_hint_bytes_ = int_value; } else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_USE_COPY) == 0) { if (std::strcmp(value, ADBC_OPTION_VALUE_ENABLED) == 0) { use_copy_ = true; } else if (std::strcmp(value, ADBC_OPTION_VALUE_DISABLED) == 0) { use_copy_ = false; } else { - SetError(error, "[libpq] Invalid value '%s' for option '%s'", value, key); + InternalAdbcSetError(error, "[libpq] Invalid value '%s' for option '%s'", value, + key); return ADBC_STATUS_INVALID_ARGUMENT; } } else { - SetError(error, "[libpq] Unknown statement option '%s'", key); + InternalAdbcSetError(error, "[libpq] Unknown statement option '%s'", key); return ADBC_STATUS_NOT_IMPLEMENTED; } return ADBC_STATUS_OK; @@ -816,13 +881,13 @@ AdbcStatusCode PostgresStatement::SetOption(const char* key, const char* value, AdbcStatusCode PostgresStatement::SetOptionBytes(const char* key, const uint8_t* value, size_t length, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown statement option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown statement option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } AdbcStatusCode PostgresStatement::SetOptionDouble(const char* key, double value, struct AdbcError* error) { - SetError(error, "%s%s", "[libpq] Unknown statement option ", key); + InternalAdbcSetError(error, "%s%s", "[libpq] Unknown statement option ", key); return ADBC_STATUS_NOT_IMPLEMENTED; } @@ -830,20 +895,23 @@ AdbcStatusCode PostgresStatement::SetOptionInt(const char* key, int64_t value, struct AdbcError* error) { if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES) == 0) { if (value <= 0) { - SetError(error, "[libpq] Invalid value '%" PRIi64 "' for option '%s'", value, key); + InternalAdbcSetError(error, "[libpq] Invalid value '%" PRIi64 "' for option '%s'", + value, key); return ADBC_STATUS_INVALID_ARGUMENT; } - this->reader_.batch_size_hint_bytes_ = value; + this->batch_size_hint_bytes_ = this->reader_->batch_size_hint_bytes_ = value; return ADBC_STATUS_OK; } - SetError(error, "[libpq] Unknown statement option '%s'", key); + InternalAdbcSetError(error, "[libpq] Unknown statement option '%s'", key); return ADBC_STATUS_NOT_IMPLEMENTED; } void PostgresStatement::ClearResult() { // TODO: we may want to synchronize here for safety - reader_.Release(); + if (reader_) reader_->Release(); + reader_ = std::make_shared(connection_->conn()); + reader_->batch_size_hint_bytes_ = batch_size_hint_bytes_; } int PostgresStatement::UseCopy() { diff --git a/c/driver/postgresql/statement.h b/c/driver/postgresql/statement.h index 60ada992b0..a2c3f5e885 100644 --- a/c/driver/postgresql/statement.h +++ b/c/driver/postgresql/statement.h @@ -39,8 +39,10 @@ namespace adbcpq { class PostgresConnection; class PostgresStatement; +constexpr static int64_t kDefaultBatchSizeHintBytes = 16777216; + /// \brief An ArrowArrayStream that reads tuples from a PGresult. -class TupleReader final { +class TupleReader final : public std::enable_shared_from_this { public: TupleReader(PGconn* conn) : status_(ADBC_STATUS_OK), @@ -50,7 +52,7 @@ class TupleReader final { pgbuf_(nullptr), copy_reader_(nullptr), row_id_(-1), - batch_size_hint_bytes_(16777216), + batch_size_hint_bytes_(kDefaultBatchSizeHintBytes), is_finished_(false) { ArrowErrorInit(&na_error_); data_.data.as_char = nullptr; @@ -98,7 +100,8 @@ class PostgresStatement { query_(), prepared_(false), use_copy_(-1), - reader_(nullptr) { + reader_(nullptr), + batch_size_hint_bytes_(kDefaultBatchSizeHintBytes) { std::memset(&bind_, 0, sizeof(bind_)); } @@ -170,7 +173,8 @@ class PostgresStatement { bool temporary = false; } ingest_; - TupleReader reader_; + std::shared_ptr reader_; + int64_t batch_size_hint_bytes_; int UseCopy(); }; diff --git a/c/driver/snowflake/AdbcDriverSnowflakeConfig.cmake.in b/c/driver/snowflake/AdbcDriverSnowflakeConfig.cmake.in new file mode 100644 index 0000000000..04d5397adc --- /dev/null +++ b/c/driver/snowflake/AdbcDriverSnowflakeConfig.cmake.in @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +@PACKAGE_INIT@ + +set(ADBC_BUILD_SHARED @ADBC_BUILD_SHARED@) +set(ADBC_BUILD_STATIC @ADBC_BUILD_STATIC@) +set(ADBC_FULL_SO_VERSION "@ADBC_FULL_SO_VERSION@") +set(ADBC_INSTALL_LIBDIR "@CMAKE_INSTALL_LIBDIR@") +set(ADBC_SO_VERSION "@ADBC_SO_VERSION@") +set(ADBC_VERSION "@ADBC_VERSION@") + +@ADBC_GO_PACKAGE_INIT@ + +if(ADBC_BUILD_SHARED) + adbc_add_shared_library( + AdbcDriverSnowflake::adbc_driver_snowflake_shared + adbc_driver_snowflake) +endif() + +if(ADBC_BUILD_STATIC) + adbc_add_static_library( + AdbcDriverSnowflake::adbc_driver_snowflake_static + adbc_driver_snowflake) +endif() + +check_required_components(AdbcDriverSnowflake) diff --git a/c/driver/snowflake/CMakeLists.txt b/c/driver/snowflake/CMakeLists.txt index 1d3874b41f..2a81eb2c49 100644 --- a/c/driver/snowflake/CMakeLists.txt +++ b/c/driver/snowflake/CMakeLists.txt @@ -26,10 +26,14 @@ add_go_lib("${REPOSITORY_ROOT}/go/adbc/pkg/snowflake/" utils.c BUILD_TAGS driverlib + CMAKE_PACKAGE_NAME + AdbcDriverSnowflake PKG_CONFIG_NAME adbc-driver-snowflake SHARED_LINK_FLAGS ${LDFLAGS} + DEFINES + ${ADBC_TARGET_COMPILE_DEFINITIONS} OUTPUTS ADBC_LIBRARIES) @@ -37,7 +41,6 @@ foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_include_directories(${LIB_TARGET} SYSTEM INTERFACE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) endforeach() @@ -62,9 +65,7 @@ if(ADBC_BUILD_TESTS) ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-snowflake-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-snowflake-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ ${REPOSITORY_ROOT}/c/driver ${REPOSITORY_ROOT}/c/driver/common) adbc_configure_target(adbc-driver-snowflake-test) diff --git a/c/driver/snowflake/README.md b/c/driver/snowflake/README.md index 32ec416304..180b4c0fce 100644 --- a/c/driver/snowflake/README.md +++ b/c/driver/snowflake/README.md @@ -19,12 +19,24 @@ # ADBC Snowflake Driver +![Vendor: Snowflake](https://img.shields.io/badge/vendor-Snowflake-blue?style=flat-square) +![Implementation: Go](https://img.shields.io/badge/implementation-Go-violet?style=flat-square) +![Status: Stable](https://img.shields.io/badge/status-stable-green?style=flat-square) + +[![conda-forge: adbc-driver-snowflake](https://img.shields.io/conda/vn/conda-forge/adbc-driver-snowflake?label=conda-forge%3A%20adbc-driver-snowflake&style=flat-square)](https://anaconda.org/conda-forge/adbc-driver-snowflake) +[![conda-forge: libadbc-driver-snowflake](https://img.shields.io/conda/vn/conda-forge/libadbc-driver-snowflake?label=conda-forge%3A%20libadbc-driver-snowflake&style=flat-square)](https://anaconda.org/conda-forge/libadbc-driver-snowflake) +[![crates.io: adbc_snowflake](https://img.shields.io/crates/v/adbc_snowflake?style=flat-square)](https://crates.io/crates/adbc_snowflake) +[![Go: github.com/apache/arrow-adbc/go/adbc/driver/snowflake](https://img.shields.io/badge/Go-go%2Fadbc%2Fdriver%2Fsnowflake-blue)](https://pkg.go.dev/github.com/apache/arrow-adbc/go/adbc/driver/snowflake) +[![NuGet: Apache.Arrow.Adbc.Drivers.Interop.Snowflake](https://img.shields.io/nuget/v/Apache.Arrow.Adbc.Drivers.Interop.Snowflake)](https://www.nuget.org/packages/Apache.Arrow.Adbc.Drivers.Interop.Snowflake) +[![PyPI: adbc-driver-snowflake](https://img.shields.io/pypi/v/adbc-driver-snowflake?style=flat-square)](https://pypi.org/project/adbc-driver-snowflake/) +[![R-multiverse: adbcsnowflake](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fcommunity.r-multiverse.org%2Fapi%2Fpackages%2Fadbcsnowflake&query=%24.Version&label=r-multiverse%3A%20adbcsnowflake&style=flat-square)](https://community.r-multiverse.org/adbcsnowflake/) + This driver provides an interface to [Snowflake](https://www.snowflake.com/) using ADBC. ## Building -See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details. +See [CONTRIBUTING.md](../../../CONTRIBUTING.md) for details. ## Testing diff --git a/c/driver/snowflake/meson.build b/c/driver/snowflake/meson.build index 20a7d3c70e..1ce6f34a35 100644 --- a/c/driver/snowflake/meson.build +++ b/c/driver/snowflake/meson.build @@ -19,21 +19,21 @@ golang = find_program('go') if build_machine.system() == 'windows' - prefix = '' - suffix = '.lib' + prefix = '' + suffix = '.lib' elif build_machine.system() == 'darwin' - prefix = 'lib' - suffix = '.dylib' + prefix = 'lib' + suffix = '.dylib' else - prefix = 'lib' - suffix = '.so' + prefix = 'lib' + suffix = '.so' endif adbc_driver_snowflake_name = prefix + 'adbc_driver_snowflake' + suffix adbc_driver_snowflake_lib = custom_target( 'adbc_driver_snowflake', output: adbc_driver_snowflake_name, - command : [ + command: [ golang, 'build', '-C', @@ -43,8 +43,8 @@ adbc_driver_snowflake_lib = custom_target( '-o', meson.current_build_dir() + '/' + adbc_driver_snowflake_name, ], - install : true, - install_dir : '.', + install: true, + install_dir: '.', ) pkg.generate( @@ -55,16 +55,16 @@ pkg.generate( filebase: 'adbc-driver-snowflake', ) -if get_option('tests') - exc = executable( - 'adbc-driver-snowflake-test', - 'snowflake_test.cc', - include_directories: [include_dir, c_dir, driver_dir], - link_with: [ - adbc_common_lib, - adbc_driver_snowflake_lib, - ], - dependencies: [adbc_validation_dep], - ) - test('adbc-driver-snowflake', exc) -endif +adbc_driver_snowflake_dep = declare_dependency( + include_directories: include_dir, + link_with: adbc_driver_snowflake_lib, +) + +exc = executable( + 'adbc-driver-snowflake-test', + 'snowflake_test.cc', + include_directories: [include_dir, c_dir, driver_dir], + link_with: [adbc_common_lib, adbc_driver_snowflake_lib], + dependencies: [adbc_validation_dep], +) +test('adbc-driver-snowflake', exc) diff --git a/c/driver/snowflake/snowflake_test.cc b/c/driver/snowflake/snowflake_test.cc index 262286192a..37ceb83e31 100644 --- a/c/driver/snowflake/snowflake_test.cc +++ b/c/driver/snowflake/snowflake_test.cc @@ -15,15 +15,19 @@ // specific language governing permissions and limitations // under the License. +#include +#include +#include +#include +#include + #include #include #include #include #include #include -#include -#include -#include + #include "validation/adbc_validation.h" #include "validation/adbc_validation_util.h" diff --git a/c/driver/sqlite/AdbcDriverSQLiteConfig.cmake.in b/c/driver/sqlite/AdbcDriverSQLiteConfig.cmake.in index cea8cf27f4..6310ff1861 100644 --- a/c/driver/sqlite/AdbcDriverSQLiteConfig.cmake.in +++ b/c/driver/sqlite/AdbcDriverSQLiteConfig.cmake.in @@ -17,8 +17,12 @@ @PACKAGE_INIT@ +include(CMakeFindDependencyMacro) + set(ADBC_VERSION "@ADBC_VERSION@") include("${CMAKE_CURRENT_LIST_DIR}/AdbcDriverSQLiteTargets.cmake") +find_dependency(SQLite3) + check_required_components(AdbcDriverSQLite) diff --git a/c/driver/sqlite/CMakeLists.txt b/c/driver/sqlite/CMakeLists.txt index d0c45b7433..4c6bb1f1a7 100644 --- a/c/driver/sqlite/CMakeLists.txt +++ b/c/driver/sqlite/CMakeLists.txt @@ -52,23 +52,22 @@ add_arrow_lib(adbc_driver_sqlite ${SQLite3_LINK_LIBRARIES} adbc_driver_common adbc_driver_framework - nanoarrow STATIC_LINK_LIBS ${SQLite3_LINK_LIBRARIES} adbc_driver_common adbc_driver_framework - nanoarrow ${LIBPQ_STATIC_LIBRARIES}) foreach(LIB_TARGET ${ADBC_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_EXPORTING ${ADBC_SQLITE_COMPILE_DEFINES}) target_include_directories(${LIB_TARGET} SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${SQLite3_INCLUDE_DIRS} - ${REPOSITORY_ROOT}/c/vendor - ${REPOSITORY_ROOT}/c/driver) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${SQLite3_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) + + if(NOT ADBC_DEFINE_COMMON_ENTRYPOINTS) + target_compile_definitions(${LIB_TARGET} PRIVATE ${ADBC_TARGET_COMPILE_DEFINITIONS}) + endif() endforeach() include(CheckTypeSize) @@ -92,16 +91,12 @@ if(ADBC_BUILD_TESTS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS}) target_compile_definitions(adbc-driver-sqlite-test PRIVATE ${ADBC_SQLITE_COMPILE_DEFINES}) target_compile_features(adbc-driver-sqlite-test PRIVATE cxx_std_17) target_include_directories(adbc-driver-sqlite-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/ - ${REPOSITORY_ROOT}/c/include/ - ${LIBPQ_INCLUDE_DIRS} - ${REPOSITORY_ROOT}/c/vendor - ${REPOSITORY_ROOT}/c/driver) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${LIBPQ_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-driver-sqlite-test) endif() diff --git a/c/driver/sqlite/README.md b/c/driver/sqlite/README.md index f8aea1c001..1f1bdadd65 100644 --- a/c/driver/sqlite/README.md +++ b/c/driver/sqlite/README.md @@ -19,6 +19,15 @@ # ADBC SQLite Driver +![Vendor: SQLite](https://img.shields.io/badge/vendor-SQLite-blue?style=flat-square) +![Implementation: C/C++](https://img.shields.io/badge/implementation-C%2FC%2B%2B-violet?style=flat-square) +![Status: Stable](https://img.shields.io/badge/status-stable-green?style=flat-square) + +[![conda-forge: adbc-driver-sqlite](https://img.shields.io/conda/vn/conda-forge/adbc-driver-sqlite?label=conda-forge%3A%20adbc-driver-sqlite&style=flat-square)](https://anaconda.org/conda-forge/adbc-driver-sqlite) +[![conda-forge: libadbc-driver-sqlite](https://img.shields.io/conda/vn/conda-forge/libadbc-driver-sqlite?label=conda-forge%3A%20libadbc-driver-sqlite&style=flat-square)](https://anaconda.org/conda-forge/libadbc-driver-sqlite) +[![CRAN: adbcsqlite](https://img.shields.io/cran/v/adbcsqlite?style=flat-square)](https://cran.r-project.org/web/packages/adbcsqlite/index.html) +[![PyPI: adbc-driver-sqlite](https://img.shields.io/pypi/v/adbc-driver-sqlite?style=flat-square)](https://pypi.org/project/adbc-driver-sqlite/) + This driver provides an interface to [SQLite](https://sqlite.org/index.html) using ADBC. @@ -27,7 +36,7 @@ This driver provides an interface to Dependencies: SQLite itself. This can be installed with your favorite package manager. -See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details. +See [CONTRIBUTING.md](../../../CONTRIBUTING.md) for details. ## Testing diff --git a/c/driver/sqlite/meson.build b/c/driver/sqlite/meson.build index ad61f7e435..6ddc91d03e 100644 --- a/c/driver/sqlite/meson.build +++ b/c/driver/sqlite/meson.build @@ -19,15 +19,12 @@ sqlite3_dep = dependency('sqlite3') time_t_size = meson.get_compiler('c').sizeof( 'time_t', - prefix : '#include ', + prefix: '#include ', ) adbc_sqlite3_driver_lib = library( 'adbc_driver_sqlite', - sources: [ - 'sqlite.cc', - 'statement_reader.c', - ], + sources: ['sqlite.cc', 'statement_reader.c'], include_directories: [include_dir, c_dir], link_with: [adbc_common_lib, adbc_framework_lib], dependencies: [nanoarrow_dep, fmt_dep, sqlite3_dep], @@ -42,16 +39,16 @@ pkg.generate( filebase: 'adbc-driver-sqlite', ) -if get_option('tests') - exc = executable( - 'adbc-driver-sqlite-test', - sources: ['sqlite_test.cc'], - include_directories: [include_dir, c_dir, driver_dir], - link_with: [ - adbc_common_lib, - adbc_sqlite3_driver_lib, - ], - dependencies: [sqlite3_dep, adbc_validation_dep], - ) - test('adbc-driver-sqlite', exc) -endif +adbc_driver_sqlite_dep = declare_dependency( + include_directories: include_dir, + link_with: adbc_sqlite3_driver_lib, +) + +exc = executable( + 'adbc-driver-sqlite-test', + sources: ['sqlite_test.cc'], + include_directories: [include_dir, c_dir, driver_dir], + link_with: [adbc_common_lib, adbc_sqlite3_driver_lib], + dependencies: [sqlite3_dep, adbc_validation_dep], +) +test('adbc-driver-sqlite', exc) diff --git a/c/driver/sqlite/sqlite.cc b/c/driver/sqlite/sqlite.cc index a5186d00b7..dc3a7de435 100644 --- a/c/driver/sqlite/sqlite.cc +++ b/c/driver/sqlite/sqlite.cc @@ -15,7 +15,12 @@ // specific language governing permissions and limitations // under the License. +#include #include +#include +#include +#include +#include #include #include @@ -46,6 +51,7 @@ constexpr std::string_view kConnectionOptionLoadExtensionEntrypoint = "adbc.sqlite.load_extension.entrypoint"; /// The batch size for query results (and for initial type inference) constexpr std::string_view kStatementOptionBatchRows = "adbc.sqlite.query.batch_rows"; +constexpr std::string_view kStatementOptionBindByName = "adbc.statement.bind_by_name"; std::string_view GetColumnText(sqlite3_stmt* stmt, int index) { return { @@ -150,7 +156,7 @@ class SqliteQuery { return Close(rc); } - Status Close(int rc) { + Status Close(int last_rc) { if (stmt_) { int rc = sqlite3_finalize(stmt_); stmt_ = nullptr; @@ -158,7 +164,7 @@ class SqliteQuery { return status::fmt::Internal("failed to execute: {}\nquery was: {}", sqlite3_errmsg(conn_), query_); } - } else if (rc != SQLITE_OK) { + } else if (last_rc != SQLITE_OK) { return status::fmt::Internal("failed to execute: {}\nquery was: {}", sqlite3_errmsg(conn_), query_); } @@ -192,7 +198,7 @@ class SqliteQuery { UNWRAP_RESULT(bool has_row, q.Next()); if (!has_row) break; - int rc = std::forward(row_func)(q.stmt_); + rc = std::forward(row_func)(q.stmt_); if (rc != SQLITE_OK) break; } return q.Close(); @@ -312,7 +318,7 @@ struct SqliteGetObjectsHelper : public driver::GetObjectsHelper { // XXX: because we're saving the SqliteQuery, we also need to save the string builder columns_query.Reset(); columns_query.Append( - R"(SELECT cid, name, type, "notnull", dflt_value FROM pragma_table_info("%w" , "%w") WHERE NAME LIKE ?)", + R"(SELECT cid, name, type, 'notnull', dflt_value FROM pragma_table_info(%Q, %Q) WHERE NAME LIKE ?)", table.data(), catalog.data()); UNWRAP_RESULT(auto query, columns_query.GetString()); assert(!query.empty()); @@ -343,7 +349,7 @@ struct SqliteGetObjectsHelper : public driver::GetObjectsHelper { { SqliteStringBuilder builder; builder.Append( - R"(SELECT name FROM pragma_table_info("%w" , "%w") WHERE pk > 0 ORDER BY pk ASC)", + R"(SELECT name FROM pragma_table_info(%Q, %Q) WHERE pk > 0 ORDER BY pk ASC)", table.data(), catalog.data()); UNWRAP_RESULT(auto pk_query, builder.GetString()); std::vector pk; @@ -595,8 +601,8 @@ class SqliteConnection : public driver::Connection { nanoarrow::UniqueArrayStream stream; struct AdbcError error = ADBC_ERROR_INIT; AdbcStatusCode status = - AdbcSqliteExportReader(conn_, stmt, /*binder=*/NULL, - /*batch_size=*/64, stream.get(), &error); + InternalAdbcSqliteExportReader(conn_, stmt, /*binder=*/NULL, + /*batch_size=*/64, stream.get(), &error); if (status == ADBC_STATUS_OK) { int code = stream->get_schema(stream.get(), schema); if (code != 0) { @@ -758,11 +764,11 @@ class SqliteStatement : public driver::Statement { public: [[maybe_unused]] constexpr static std::string_view kErrorPrefix = "[SQLite]"; - Status BindImpl() { + Status BindImpl(bool ingest) { if (bind_parameters_.release) { struct AdbcError error = ADBC_ERROR_INIT; - if (AdbcStatusCode code = - AdbcSqliteBinderSetArrayStream(&binder_, &bind_parameters_, &error); + if (AdbcStatusCode code = InternalAdbcSqliteBinderSetArrayStream( + &binder_, &bind_parameters_, !ingest && bind_by_name_, &error); code != ADBC_STATUS_OK) { return Status::FromAdbc(code, error); } @@ -771,7 +777,7 @@ class SqliteStatement : public driver::Statement { } Result ExecuteIngestImpl(IngestState& state) { - UNWRAP_STATUS(BindImpl()); + UNWRAP_STATUS(BindImpl(true)); if (!binder_.schema.release) { return status::InvalidState("must Bind() before bulk ingestion"); } @@ -933,7 +939,8 @@ class SqliteStatement : public driver::Statement { struct AdbcError error = ADBC_ERROR_INIT; while (true) { char finished = 0; - status_code = AdbcSqliteBinderBindNext(&binder_, conn_, stmt, &finished, &error); + status_code = + InternalAdbcSqliteBinderBindNext(&binder_, conn_, stmt, &finished, &error); if (status_code != ADBC_STATUS_OK || finished) { status = Status::FromAdbc(status_code, error); break; @@ -969,7 +976,7 @@ class SqliteStatement : public driver::Statement { Result ExecuteQueryImpl(ArrowArrayStream* stream) { struct AdbcError error = ADBC_ERROR_INIT; - UNWRAP_STATUS(BindImpl()); + UNWRAP_STATUS(BindImpl(false)); const int64_t expected = sqlite3_bind_parameter_count(stmt_); const int64_t actual = binder_.schema.n_children; @@ -978,9 +985,9 @@ class SqliteStatement : public driver::Statement { "parameter count mismatch: expected {} but found {}", expected, actual); } - auto status = - AdbcSqliteExportReader(conn_, stmt_, binder_.schema.release ? &binder_ : nullptr, - batch_size_, stream, &error); + auto status = InternalAdbcSqliteExportReader( + conn_, stmt_, binder_.schema.release ? &binder_ : nullptr, batch_size_, stream, + &error); if (status != ADBC_STATUS_OK) { return Status::FromAdbc(status, error); } @@ -997,7 +1004,7 @@ class SqliteStatement : public driver::Statement { } Result ExecuteUpdateImpl() { - UNWRAP_STATUS(BindImpl()); + UNWRAP_STATUS(BindImpl(false)); const int64_t expected = sqlite3_bind_parameter_count(stmt_); const int64_t actual = binder_.schema.n_children; @@ -1015,10 +1022,10 @@ class SqliteStatement : public driver::Statement { if (binder_.schema.release) { char finished = 0; struct AdbcError error = ADBC_ERROR_INIT; - if (AdbcStatusCode code = - AdbcSqliteBinderBindNext(&binder_, conn_, stmt_, &finished, &error); + if (AdbcStatusCode code = InternalAdbcSqliteBinderBindNext(&binder_, conn_, stmt_, + &finished, &error); code != ADBC_STATUS_OK) { - AdbcSqliteBinderRelease(&binder_); + InternalAdbcSqliteBinderRelease(&binder_); return Status::FromAdbc(code, error); } else if (finished != 0) { break; @@ -1035,7 +1042,7 @@ class SqliteStatement : public driver::Statement { if (!binder_.schema.release) break; } - AdbcSqliteBinderRelease(&binder_); + InternalAdbcSqliteBinderRelease(&binder_); if (sqlite3_reset(stmt_) != SQLITE_OK) { const char* msg = sqlite3_errmsg(conn_); @@ -1122,7 +1129,7 @@ class SqliteStatement : public driver::Statement { rc, sqlite3_errmsg(conn_)); } } - AdbcSqliteBinderRelease(&binder_); + InternalAdbcSqliteBinderRelease(&binder_); return Statement::ReleaseImpl(); } @@ -1137,11 +1144,15 @@ class SqliteStatement : public driver::Statement { } batch_size_ = static_cast(batch_size); return status::Ok(); + } else if (key == kStatementOptionBindByName) { + UNWRAP_RESULT(bind_by_name_, value.AsBool()); + return status::Ok(); } return Base::SetOptionImpl(key, std::move(value)); } int batch_size_ = 1024; + bool bind_by_name_ = false; AdbcSqliteBinder binder_; sqlite3* conn_ = nullptr; sqlite3_stmt* stmt_ = nullptr; @@ -1154,6 +1165,8 @@ using SqliteDriver = // Public names +extern "C" { +#if !defined(ADBC_NO_COMMON_ENTRYPOINTS) AdbcStatusCode AdbcDatabaseGetOption(struct AdbcDatabase* database, const char* key, char* value, size_t* length, struct AdbcError* error) { @@ -1472,7 +1485,12 @@ AdbcStatusCode AdbcStatementExecutePartitions(struct AdbcStatement* statement, statement, schema, partitions, rows_affected, error); } -extern "C" { +[[maybe_unused]] ADBC_EXPORT AdbcStatusCode AdbcDriverInit(int version, void* raw_driver, + AdbcError* error) { + return adbc::sqlite::SqliteDriver::Init(version, raw_driver, error); +} +#endif // ADBC_NO_COMMON_ENTRYPOINTS + [[maybe_unused]] ADBC_EXPORT AdbcStatusCode AdbcDriverSqliteInit(int version, void* raw_driver, AdbcError* error) { @@ -1484,9 +1502,4 @@ extern "C" { AdbcError* error) { return adbc::sqlite::SqliteDriver::Init(version, raw_driver, error); } - -[[maybe_unused]] ADBC_EXPORT AdbcStatusCode AdbcDriverInit(int version, void* raw_driver, - AdbcError* error) { - return adbc::sqlite::SqliteDriver::Init(version, raw_driver, error); -} } diff --git a/c/driver/sqlite/sqlite_test.cc b/c/driver/sqlite/sqlite_test.cc index 8ceb747aca..f270f50592 100644 --- a/c/driver/sqlite/sqlite_test.cc +++ b/c/driver/sqlite/sqlite_test.cc @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -437,30 +439,34 @@ class SqliteReaderTest : public ::testing::Test { } void TearDown() override { if (error.release) error.release(&error); - AdbcSqliteBinderRelease(&binder); + InternalAdbcSqliteBinderRelease(&binder); sqlite3_finalize(stmt); sqlite3_close(db); } void Exec(const std::string& query) { - ASSERT_EQ(SQLITE_OK, sqlite3_prepare_v2(db, query.c_str(), query.size(), &stmt, - /*pzTail=*/nullptr)); + SCOPED_TRACE(query); + int rc = sqlite3_prepare_v2(db, query.c_str(), query.size(), &stmt, + /*pzTail=*/nullptr); + ASSERT_EQ(SQLITE_OK, rc) << "Failed to prepare query: " << sqlite3_errmsg(db); ASSERT_EQ(SQLITE_DONE, sqlite3_step(stmt)); sqlite3_finalize(stmt); stmt = nullptr; } - void Bind(struct ArrowArray* batch, struct ArrowSchema* schema) { + void Bind(struct ArrowArray* batch, struct ArrowSchema* schema, + bool bind_by_name = false) { Handle stream; struct ArrowArray batch_internal = *batch; batch->release = nullptr; adbc_validation::MakeStream(&stream.value, schema, {batch_internal}); - ASSERT_NO_FATAL_FAILURE(Bind(&stream.value)); + ASSERT_NO_FATAL_FAILURE(Bind(&stream.value, bind_by_name)); } - void Bind(struct ArrowArrayStream* stream) { - ASSERT_THAT(AdbcSqliteBinderSetArrayStream(&binder, stream, &error), - IsOkStatus(&error)); + void Bind(struct ArrowArrayStream* stream, bool bind_by_name = false) { + ASSERT_THAT( + InternalAdbcSqliteBinderSetArrayStream(&binder, stream, bind_by_name, &error), + IsOkStatus(&error)); } void ExecSelect(const std::string& values, size_t infer_rows, @@ -478,8 +484,8 @@ class SqliteReaderTest : public ::testing::Test { /*pzTail=*/nullptr)); struct AdbcSqliteBinder* binder = this->binder.schema.release ? &this->binder : nullptr; - ASSERT_THAT(AdbcSqliteExportReader(db, stmt, binder, infer_rows, - &reader->stream.value, &error), + ASSERT_THAT(InternalAdbcSqliteExportReader(db, stmt, binder, infer_rows, + &reader->stream.value, &error), IsOkStatus(&error)); ASSERT_NO_FATAL_FAILURE(reader->GetSchema()); } @@ -526,7 +532,7 @@ TEST_F(SqliteReaderTest, IntsFloatsNulls) { TEST_F(SqliteReaderTest, IntsNullsStrsNullsInts) { adbc_validation::StreamReader reader; ASSERT_NO_FATAL_FAILURE(ExecSelect( - R"((NULL), (1), (NULL), (-1), ("foo"), (NULL), (""), (24))", kInferRows, &reader)); + R"((NULL), (1), (NULL), (-1), ('foo'), (NULL), (''), (24))", kInferRows, &reader)); ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type); ASSERT_NO_FATAL_FAILURE(reader.Next()); @@ -552,7 +558,7 @@ TEST_F(SqliteReaderTest, IntExtremes) { TEST_F(SqliteReaderTest, IntExtremesStrs) { adbc_validation::StreamReader reader; ASSERT_NO_FATAL_FAILURE(ExecSelect( - R"((NULL), (9223372036854775807), (-9223372036854775808), (""), (9223372036854775807), (-9223372036854775808))", + R"((NULL), (9223372036854775807), (-9223372036854775808), (''), (9223372036854775807), (-9223372036854775808))", kInferRows, &reader)); ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type); @@ -587,7 +593,7 @@ TEST_F(SqliteReaderTest, FloatExtremes) { TEST_F(SqliteReaderTest, IntsFloatsStrs) { adbc_validation::StreamReader reader; ASSERT_NO_FATAL_FAILURE( - ExecSelect(R"((1), (1.0), (""), (9e999), (-9e999))", kInferRows, &reader)); + ExecSelect(R"((1), (1.0), (''), (9e999), (-9e999))", kInferRows, &reader)); ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type); ASSERT_NO_FATAL_FAILURE(reader.Next()); @@ -629,7 +635,7 @@ TEST_F(SqliteReaderTest, InferIntRejectFloat) { TEST_F(SqliteReaderTest, InferIntRejectStr) { adbc_validation::StreamReader reader; ASSERT_NO_FATAL_FAILURE( - ExecSelect(R"((1), (NULL), (""), (NULL))", /*infer_rows=*/2, &reader)); + ExecSelect(R"((1), (NULL), (''), (NULL))", /*infer_rows=*/2, &reader)); ASSERT_EQ(NANOARROW_TYPE_INT64, reader.fields[0].type); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_NO_FATAL_FAILURE( @@ -678,7 +684,7 @@ TEST_F(SqliteReaderTest, InferFloatReadIntFloat) { TEST_F(SqliteReaderTest, InferFloatRejectStr) { adbc_validation::StreamReader reader; - ASSERT_NO_FATAL_FAILURE(ExecSelect(R"((1E0), (NULL), (2E0), (3), (""), (NULL))", + ASSERT_NO_FATAL_FAILURE(ExecSelect(R"((1E0), (NULL), (2E0), (3), (''), (NULL))", /*infer_rows=*/2, &reader)); ASSERT_EQ(NANOARROW_TYPE_DOUBLE, reader.fields[0].type); ASSERT_NO_FATAL_FAILURE(reader.Next()); @@ -716,7 +722,7 @@ TEST_F(SqliteReaderTest, InferFloatRejectBlob) { TEST_F(SqliteReaderTest, InferStrReadAll) { adbc_validation::StreamReader reader; - ASSERT_NO_FATAL_FAILURE(ExecSelect(R"((""), (NULL), (2), (3E0), ("foo"), (NULL))", + ASSERT_NO_FATAL_FAILURE(ExecSelect(R"((''), (NULL), (2), (3E0), ('foo'), (NULL))", /*infer_rows=*/2, &reader)); ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type); ASSERT_NO_FATAL_FAILURE(reader.Next()); @@ -799,7 +805,7 @@ TEST_F(SqliteReaderTest, InferTypedParams) { ASSERT_NO_FATAL_FAILURE(Exec("CREATE TABLE foo (idx, value)")); ASSERT_NO_FATAL_FAILURE( - Exec(R"(INSERT INTO foo VALUES (0, "foo"), (1, NULL), (2, 4), (3, 1E2))")); + Exec(R"(INSERT INTO foo VALUES (0, 'foo'), (1, NULL), (2, 4), (3, 1E2))")); ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, {{"", NANOARROW_TYPE_INT64}}), IsOkErrno()); @@ -822,6 +828,32 @@ TEST_F(SqliteReaderTest, InferTypedParams) { "[SQLite] Type mismatch in column 0: expected INT64 but got DOUBLE")); } +TEST_F(SqliteReaderTest, BindByName) { + adbc_validation::StreamReader reader; + Handle schema; + Handle batch; + + ASSERT_THAT(adbc_validation::MakeSchema(&schema.value, + { + {"@b", NANOARROW_TYPE_INT64}, + {"@a", NANOARROW_TYPE_INT64}, + }), + IsOkErrno()); + ASSERT_THAT((adbc_validation::MakeBatch(&schema.value, &batch.value, + /*error=*/nullptr, {1}, {2})), + IsOkErrno()); + + ASSERT_NO_FATAL_FAILURE(Bind(&batch.value, &schema.value, true)); + ASSERT_NO_FATAL_FAILURE(Exec("SELECT @a, @b", /*infer_rows=*/2, &reader)); + ASSERT_EQ(2, reader.schema->n_children); + ASSERT_EQ(NANOARROW_TYPE_INT64, reader.fields[0].type); + ASSERT_EQ(NANOARROW_TYPE_INT64, reader.fields[1].type); + + ASSERT_NO_FATAL_FAILURE(reader.Next()); + ASSERT_NO_FATAL_FAILURE(CompareArray(reader.array_view->children[0], {2})); + ASSERT_NO_FATAL_FAILURE(CompareArray(reader.array_view->children[1], {1})); +} + TEST_F(SqliteReaderTest, MultiValueParams) { // Regression test for apache/arrow-adbc#734 adbc_validation::StreamReader reader; diff --git a/c/driver/sqlite/statement_reader.c b/c/driver/sqlite/statement_reader.c index f73151673d..554bdaf203 100644 --- a/c/driver/sqlite/statement_reader.c +++ b/c/driver/sqlite/statement_reader.c @@ -34,44 +34,50 @@ #include "driver/common/utils.h" -AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* binder, - struct AdbcError* error) { +AdbcStatusCode InternalAdbcSqliteBinderSet(struct AdbcSqliteBinder* binder, + bool bind_by_name, struct AdbcError* error) { int status = binder->params.get_schema(&binder->params, &binder->schema); if (status != 0) { const char* message = binder->params.get_last_error(&binder->params); if (!message) message = "(unknown error)"; - SetError(error, "Failed to get parameter schema: (%d) %s: %s", status, - strerror(status), message); + InternalAdbcSetError(error, "Failed to get parameter schema: (%d) %s: %s", status, + strerror(status), message); return ADBC_STATUS_INVALID_ARGUMENT; } struct ArrowError arrow_error = {0}; status = ArrowArrayViewInitFromSchema(&binder->batch, &binder->schema, &arrow_error); if (status != 0) { - SetError(error, "Failed to initialize array view: (%d) %s: %s", status, - strerror(status), arrow_error.message); + InternalAdbcSetError(error, "Failed to initialize array view: (%d) %s: %s", status, + strerror(status), arrow_error.message); return ADBC_STATUS_INVALID_ARGUMENT; } if (binder->batch.storage_type != NANOARROW_TYPE_STRUCT) { - SetError(error, "Bind parameters do not have root type STRUCT"); + InternalAdbcSetError(error, "Bind parameters do not have root type STRUCT"); return ADBC_STATUS_INVALID_ARGUMENT; } binder->types = (enum ArrowType*)malloc(binder->schema.n_children * sizeof(enum ArrowType)); + if (bind_by_name) { + binder->param_indices = (int*)malloc(binder->schema.n_children * sizeof(int)); + // Lazily initialized below + memset(binder->param_indices, 0, binder->schema.n_children * sizeof(int)); + } + struct ArrowSchemaView view = {0}; for (int i = 0; i < binder->schema.n_children; i++) { status = ArrowSchemaViewInit(&view, binder->schema.children[i], &arrow_error); if (status != NANOARROW_OK) { - SetError(error, "Failed to parse schema for column %d: %s (%d): %s", i, - strerror(status), status, arrow_error.message); + InternalAdbcSetError(error, "Failed to parse schema for column %d: %s (%d): %s", i, + strerror(status), status, arrow_error.message); return ADBC_STATUS_INVALID_ARGUMENT; } if (view.type == NANOARROW_TYPE_UNINITIALIZED) { - SetError(error, "Column %d has UNINITIALIZED type", i); + InternalAdbcSetError(error, "Column %d has UNINITIALIZED type", i); return ADBC_STATUS_INTERNAL; } @@ -80,8 +86,9 @@ AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* binder, status = ArrowSchemaViewInit(&value_view, binder->schema.children[i]->dictionary, &arrow_error); if (status != NANOARROW_OK) { - SetError(error, "Failed to parse schema for column %d->dictionary: %s (%d): %s", - i, strerror(status), status, arrow_error.message); + InternalAdbcSetError( + error, "Failed to parse schema for column %d->dictionary: %s (%d): %s", i, + strerror(status), status, arrow_error.message); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -96,8 +103,8 @@ AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* binder, case NANOARROW_TYPE_BINARY_VIEW: break; default: - SetError(error, "Column %d dictionary has unsupported type %s", i, - ArrowTypeString(value_view.type)); + InternalAdbcSetError(error, "Column %d dictionary has unsupported type %s", i, + ArrowTypeString(value_view.type)); return ADBC_STATUS_NOT_IMPLEMENTED; } } @@ -108,13 +115,14 @@ AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* binder, return ADBC_STATUS_OK; } -AdbcStatusCode AdbcSqliteBinderSetArrayStream(struct AdbcSqliteBinder* binder, - struct ArrowArrayStream* values, - struct AdbcError* error) { - AdbcSqliteBinderRelease(binder); +AdbcStatusCode InternalAdbcSqliteBinderSetArrayStream(struct AdbcSqliteBinder* binder, + struct ArrowArrayStream* values, + bool bind_by_name, + struct AdbcError* error) { + InternalAdbcSqliteBinderRelease(binder); binder->params = *values; memset(values, 0, sizeof(*values)); - return AdbcSqliteBinderSet(binder, error); + return InternalAdbcSqliteBinderSet(binder, bind_by_name, error); } #define SECONDS_PER_DAY 86400 @@ -129,7 +137,7 @@ static AdbcStatusCode ArrowDate32ToIsoString(int32_t value, char** buf, #if SIZEOF_TIME_T < 8 if ((value > INT32_MAX / SECONDS_PER_DAY) || (value < INT32_MIN / SECONDS_PER_DAY)) { - SetError(error, "Date %" PRId32 " exceeds platform time_t bounds", value); + InternalAdbcSetError(error, "Date %" PRId32 " exceeds platform time_t bounds", value); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -142,13 +150,15 @@ static AdbcStatusCode ArrowDate32ToIsoString(int32_t value, char** buf, #if defined(_WIN32) if (gmtime_s(&broken_down_time, &time) != 0) { - SetError(error, "Could not convert date %" PRId32 " to broken down time", value); + InternalAdbcSetError(error, "Could not convert date %" PRId32 " to broken down time", + value); return ADBC_STATUS_INVALID_ARGUMENT; } #else if (gmtime_r(&time, &broken_down_time) != &broken_down_time) { - SetError(error, "Could not convert date %" PRId32 " to broken down time", value); + InternalAdbcSetError(error, "Could not convert date %" PRId32 " to broken down time", + value); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -160,7 +170,8 @@ static AdbcStatusCode ArrowDate32ToIsoString(int32_t value, char** buf, } if (strftime(tsstr, strlen + 1, "%Y-%m-%d", &broken_down_time) == 0) { - SetError(error, "Call to strftime for date %" PRId32 " with failed", value); + InternalAdbcSetError(error, "Call to strftime for date %" PRId32 " with failed", + value); free(tsstr); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -206,8 +217,9 @@ static AdbcStatusCode ArrowTimestampToIsoString(int64_t value, enum ArrowTimeUni #if SIZEOF_TIME_T < 8 if ((seconds > INT32_MAX) || (seconds < INT32_MIN)) { - SetError(error, "Timestamp %" PRId64 " with unit %d exceeds platform time_t bounds", - value, unit); + InternalAdbcSetError( + error, "Timestamp %" PRId64 " with unit %d exceeds platform time_t bounds", value, + unit); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -220,17 +232,17 @@ static AdbcStatusCode ArrowTimestampToIsoString(int64_t value, enum ArrowTimeUni #if defined(_WIN32) if (gmtime_s(&broken_down_time, &time) != 0) { - SetError(error, - "Could not convert timestamp %" PRId64 " with unit %d to broken down time", - value, unit); + InternalAdbcSetError( + error, "Could not convert timestamp %" PRId64 " with unit %d to broken down time", + value, unit); return ADBC_STATUS_INVALID_ARGUMENT; } #else if (gmtime_r(&time, &broken_down_time) != &broken_down_time) { - SetError(error, - "Could not convert timestamp %" PRId64 " with unit %d to broken down time", - value, unit); + InternalAdbcSetError( + error, "Could not convert timestamp %" PRId64 " with unit %d to broken down time", + value, unit); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -242,8 +254,9 @@ static AdbcStatusCode ArrowTimestampToIsoString(int64_t value, enum ArrowTimeUni } if (strftime(tsstr, strlen, "%Y-%m-%dT%H:%M:%S", &broken_down_time) == 0) { - SetError(error, "Call to strftime for timestamp %" PRId64 " with unit %d failed", - value, unit); + InternalAdbcSetError(error, + "Call to strftime for timestamp %" PRId64 " with unit %d failed", + value, unit); free(tsstr); return ADBC_STATUS_INVALID_ARGUMENT; } @@ -270,9 +283,9 @@ static AdbcStatusCode ArrowTimestampToIsoString(int64_t value, enum ArrowTimeUni return ADBC_STATUS_OK; } -AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3* conn, - sqlite3_stmt* stmt, char* finished, - struct AdbcError* error) { +AdbcStatusCode InternalAdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, + sqlite3* conn, sqlite3_stmt* stmt, + char* finished, struct AdbcError* error) { struct ArrowError arrow_error = {0}; int status = 0; while (!binder->array.release || binder->next_row >= binder->array.length) { @@ -283,8 +296,8 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 status = ArrowArrayViewInitFromSchema(&binder->batch, &binder->schema, &arrow_error); if (status != 0) { - SetError(error, "Failed to initialize array view: (%d) %s: %s", status, - strerror(status), arrow_error.message); + InternalAdbcSetError(error, "Failed to initialize array view: (%d) %s: %s", + status, strerror(status), arrow_error.message); return ADBC_STATUS_INTERNAL; } } @@ -293,21 +306,21 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 if (status != 0) { const char* message = binder->params.get_last_error(&binder->params); if (!message) message = "(unknown error)"; - SetError(error, "Failed to get next parameter batch: (%d) %s: %s", status, - strerror(status), message); + InternalAdbcSetError(error, "Failed to get next parameter batch: (%d) %s: %s", + status, strerror(status), message); return ADBC_STATUS_IO; } if (!binder->array.release) { *finished = 1; - AdbcSqliteBinderRelease(binder); + InternalAdbcSqliteBinderRelease(binder); return ADBC_STATUS_OK; } status = ArrowArrayViewSetArray(&binder->batch, &binder->array, &arrow_error); if (status != 0) { - SetError(error, "Failed to initialize array view: (%d) %s: %s", status, - strerror(status), arrow_error.message); + InternalAdbcSetError(error, "Failed to initialize array view: (%d) %s: %s", status, + strerror(status), arrow_error.message); return ADBC_STATUS_INTERNAL; } @@ -315,17 +328,36 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 } if (sqlite3_reset(stmt) != SQLITE_OK) { - SetError(error, "Failed to reset statement: %s", sqlite3_errmsg(conn)); + InternalAdbcSetError(error, "Failed to reset statement: %s", sqlite3_errmsg(conn)); return ADBC_STATUS_INTERNAL; } if (sqlite3_clear_bindings(stmt) != SQLITE_OK) { - SetError(error, "Failed to clear statement bindings: %s", sqlite3_errmsg(conn)); + InternalAdbcSetError(error, "Failed to clear statement bindings: %s", + sqlite3_errmsg(conn)); return ADBC_STATUS_INTERNAL; } + if (binder->param_indices != NULL && binder->param_indices[0] == 0) { + // Lazy initialize since we have the statement now + for (int i = 0; i < binder->schema.n_children; i++) { + binder->param_indices[i] = + sqlite3_bind_parameter_index(stmt, binder->schema.children[i]->name); + if (binder->param_indices[i] == 0) { + InternalAdbcSetError(error, "could not find parameter `%s`", + binder->schema.children[i]->name); + return ADBC_STATUS_INVALID_ARGUMENT; + } + } + } + for (int col = 0; col < binder->schema.n_children; col++) { + int bind_index = col + 1; + if (binder->param_indices != NULL) { + bind_index = binder->param_indices[col]; + } + if (ArrowArrayViewIsNull(binder->batch.children[col], binder->next_row)) { - status = sqlite3_bind_null(stmt, col + 1); + status = sqlite3_bind_null(stmt, bind_index); } else { switch (binder->types[col]) { case NANOARROW_TYPE_BINARY: @@ -334,8 +366,8 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 case NANOARROW_TYPE_BINARY_VIEW: { struct ArrowBufferView value = ArrowArrayViewGetBytesUnsafe(binder->batch.children[col], binder->next_row); - status = sqlite3_bind_blob(stmt, col + 1, value.data.as_char, value.size_bytes, - SQLITE_STATIC); + status = sqlite3_bind_blob(stmt, bind_index, value.data.as_char, + (int)value.size_bytes, SQLITE_STATIC); break; } case NANOARROW_TYPE_BOOL: @@ -346,13 +378,13 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 uint64_t value = ArrowArrayViewGetUIntUnsafe(binder->batch.children[col], binder->next_row); if (value > INT64_MAX) { - SetError(error, - "Column %d has unsigned integer value %" PRIu64 - "out of range of int64_t", - col, value); + InternalAdbcSetError(error, + "Column %d has unsigned integer value %" PRIu64 + "out of range of int64_t", + col, value); return ADBC_STATUS_INVALID_ARGUMENT; } - status = sqlite3_bind_int64(stmt, col + 1, (int64_t)value); + status = sqlite3_bind_int64(stmt, bind_index, (int64_t)value); break; } case NANOARROW_TYPE_INT8: @@ -361,7 +393,7 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 case NANOARROW_TYPE_INT64: { int64_t value = ArrowArrayViewGetIntUnsafe(binder->batch.children[col], binder->next_row); - status = sqlite3_bind_int64(stmt, col + 1, value); + status = sqlite3_bind_int64(stmt, bind_index, value); break; } case NANOARROW_TYPE_HALF_FLOAT: @@ -369,7 +401,7 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 case NANOARROW_TYPE_DOUBLE: { double value = ArrowArrayViewGetDoubleUnsafe(binder->batch.children[col], binder->next_row); - status = sqlite3_bind_double(stmt, col + 1, value); + status = sqlite3_bind_double(stmt, bind_index, value); break; } case NANOARROW_TYPE_STRING: @@ -377,8 +409,8 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 case NANOARROW_TYPE_STRING_VIEW: { struct ArrowBufferView value = ArrowArrayViewGetBytesUnsafe(binder->batch.children[col], binder->next_row); - status = sqlite3_bind_text(stmt, col + 1, value.data.as_char, value.size_bytes, - SQLITE_STATIC); + status = sqlite3_bind_text(stmt, bind_index, value.data.as_char, + (int)value.size_bytes, SQLITE_STATIC); break; } case NANOARROW_TYPE_DICTIONARY: { @@ -386,12 +418,12 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 ArrowArrayViewGetIntUnsafe(binder->batch.children[col], binder->next_row); if (ArrowArrayViewIsNull(binder->batch.children[col]->dictionary, value_index)) { - status = sqlite3_bind_null(stmt, col + 1); + status = sqlite3_bind_null(stmt, bind_index); } else { struct ArrowBufferView value = ArrowArrayViewGetBytesUnsafe( binder->batch.children[col]->dictionary, value_index); - status = sqlite3_bind_text(stmt, col + 1, value.data.as_char, - value.size_bytes, SQLITE_STATIC); + status = sqlite3_bind_text(stmt, bind_index, value.data.as_char, + (int)value.size_bytes, SQLITE_STATIC); } break; } @@ -401,26 +433,26 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 char* tsstr; if ((value > INT32_MAX) || (value < INT32_MIN)) { - SetError(error, - "Column %d has value %" PRId64 - " which exceeds the expected range " - "for an Arrow DATE32 type", - col, value); + InternalAdbcSetError(error, + "Column %d has value %" PRId64 + " which exceeds the expected range " + "for an Arrow DATE32 type", + col, value); return ADBC_STATUS_INVALID_DATA; } RAISE_ADBC(ArrowDate32ToIsoString((int32_t)value, &tsstr, error)); // SQLITE_TRANSIENT ensures the value is copied during bind - status = - sqlite3_bind_text(stmt, col + 1, tsstr, strlen(tsstr), SQLITE_TRANSIENT); + status = sqlite3_bind_text(stmt, bind_index, tsstr, (int)strlen(tsstr), + SQLITE_TRANSIENT); free(tsstr); break; } case NANOARROW_TYPE_TIMESTAMP: { struct ArrowSchemaView bind_schema_view; - RAISE_ADBC(ArrowSchemaViewInit(&bind_schema_view, binder->schema.children[col], - &arrow_error)); + RAISE_NA(ArrowSchemaViewInit(&bind_schema_view, binder->schema.children[col], + &arrow_error)); enum ArrowTimeUnit unit = bind_schema_view.time_unit; int64_t value = ArrowArrayViewGetIntUnsafe(binder->batch.children[col], binder->next_row); @@ -429,20 +461,21 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 RAISE_ADBC(ArrowTimestampToIsoString(value, unit, &tsstr, error)); // SQLITE_TRANSIENT ensures the value is copied during bind - status = - sqlite3_bind_text(stmt, col + 1, tsstr, strlen(tsstr), SQLITE_TRANSIENT); + status = sqlite3_bind_text(stmt, bind_index, tsstr, (int)strlen(tsstr), + SQLITE_TRANSIENT); free((char*)tsstr); break; } default: - SetError(error, "Column %d has unsupported type %s", col, - ArrowTypeString(binder->types[col])); + InternalAdbcSetError(error, "Column %d has unsupported type %s", col, + ArrowTypeString(binder->types[col])); return ADBC_STATUS_NOT_IMPLEMENTED; } } if (status != SQLITE_OK) { - SetError(error, "Failed to clear statement bindings: %s", sqlite3_errmsg(conn)); + InternalAdbcSetError(error, "Failed to bind col %d to param %d: %s", col, + bind_index, sqlite3_errmsg(conn)); return ADBC_STATUS_INTERNAL; } } @@ -452,7 +485,7 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3 return ADBC_STATUS_OK; } -void AdbcSqliteBinderRelease(struct AdbcSqliteBinder* binder) { +void InternalAdbcSqliteBinderRelease(struct AdbcSqliteBinder* binder) { if (binder->schema.release) { binder->schema.release(&binder->schema); } @@ -462,6 +495,9 @@ void AdbcSqliteBinderRelease(struct AdbcSqliteBinder* binder) { if (binder->types) { free(binder->types); } + if (binder->param_indices) { + free(binder->param_indices); + } if (binder->array.release) { binder->array.release(&binder->array); } @@ -469,7 +505,7 @@ void AdbcSqliteBinderRelease(struct AdbcSqliteBinder* binder) { memset(binder, 0, sizeof(*binder)); } -struct StatementReader { +struct InternalSqliteStatementReader { sqlite3* db; sqlite3_stmt* stmt; enum ArrowType* types; @@ -481,16 +517,18 @@ struct StatementReader { int batch_size; }; -const char* StatementReaderGetLastError(struct ArrowArrayStream* self) { +const char* InternalSqliteStatementReaderGetLastError(struct ArrowArrayStream* self) { if (!self->release || !self->private_data) { return NULL; } - struct StatementReader* reader = (struct StatementReader*)self->private_data; + struct InternalSqliteStatementReader* reader = + (struct InternalSqliteStatementReader*)self->private_data; return reader->error.message; } -void StatementReaderSetError(struct StatementReader* reader) { +void InternalSqliteStatementReaderInternalAdbcSetError( + struct InternalSqliteStatementReader* reader) { const char* msg = sqlite3_errmsg(reader->db); // Reset here so that we don't get an error again in StatementRelease (void)sqlite3_reset(reader->stmt); @@ -498,8 +536,8 @@ void StatementReaderSetError(struct StatementReader* reader) { reader->error.message[sizeof(reader->error.message) - 1] = '\0'; } -int StatementReaderGetOneValue(struct StatementReader* reader, int col, - struct ArrowArray* out) { +int InternalSqliteStatementReaderGetOneValue(struct InternalSqliteStatementReader* reader, + int col, struct ArrowArray* out) { int sqlite_type = sqlite3_column_type(reader->stmt, col); if (sqlite_type == SQLITE_NULL) { @@ -621,12 +659,14 @@ int StatementReaderGetOneValue(struct StatementReader* reader, int col, return ENOTSUP; } -int StatementReaderGetNext(struct ArrowArrayStream* self, struct ArrowArray* out) { +int InternalSqliteStatementReaderGetNext(struct ArrowArrayStream* self, + struct ArrowArray* out) { if (!self->release || !self->private_data) { return EINVAL; } - struct StatementReader* reader = (struct StatementReader*)self->private_data; + struct InternalSqliteStatementReader* reader = + (struct InternalSqliteStatementReader*)self->private_data; if (reader->initial_batch.release != NULL) { // Canonically return zero-row results as a stream with zero batches if (reader->initial_batch.length == 0) { @@ -660,8 +700,8 @@ int StatementReaderGetNext(struct ArrowArrayStream* self, struct ArrowArray* out } else { char finished = 0; struct AdbcError error = {0}; - status = AdbcSqliteBinderBindNext(reader->binder, reader->db, reader->stmt, - &finished, &error); + status = InternalAdbcSqliteBinderBindNext(reader->binder, reader->db, + reader->stmt, &finished, &error); if (status != ADBC_STATUS_OK) { reader->done = 1; status = EIO; @@ -681,17 +721,17 @@ int StatementReaderGetNext(struct ArrowArrayStream* self, struct ArrowArray* out } else if (rc == SQLITE_ERROR) { reader->done = 1; status = EIO; - StatementReaderSetError(reader); + InternalSqliteStatementReaderInternalAdbcSetError(reader); break; } else if (rc != SQLITE_ROW) { reader->done = 1; status = ADBC_STATUS_INTERNAL; - StatementReaderSetError(reader); + InternalSqliteStatementReaderInternalAdbcSetError(reader); break; } for (int col = 0; col < reader->schema.n_children; col++) { - status = StatementReaderGetOneValue(reader, col, out->children[col]); + status = InternalSqliteStatementReaderGetOneValue(reader, col, out->children[col]); if (status != 0) break; } @@ -714,18 +754,21 @@ int StatementReaderGetNext(struct ArrowArrayStream* self, struct ArrowArray* out return status; } -int StatementReaderGetSchema(struct ArrowArrayStream* self, struct ArrowSchema* out) { +int InternalSqliteStatementReaderGetSchema(struct ArrowArrayStream* self, + struct ArrowSchema* out) { if (!self->release || !self->private_data) { return EINVAL; } - struct StatementReader* reader = (struct StatementReader*)self->private_data; + struct InternalSqliteStatementReader* reader = + (struct InternalSqliteStatementReader*)self->private_data; return ArrowSchemaDeepCopy(&reader->schema, out); } -void StatementReaderRelease(struct ArrowArrayStream* self) { +void InternalSqliteStatementReaderRelease(struct ArrowArrayStream* self) { if (self->private_data) { - struct StatementReader* reader = (struct StatementReader*)self->private_data; + struct InternalSqliteStatementReader* reader = + (struct InternalSqliteStatementReader*)self->private_data; if (reader->schema.release) { reader->schema.release(&reader->schema); } @@ -736,7 +779,7 @@ void StatementReaderRelease(struct ArrowArrayStream* self) { free(reader->types); } if (reader->binder) { - AdbcSqliteBinderRelease(reader->binder); + InternalAdbcSqliteBinderRelease(reader->binder); } free(self->private_data); @@ -772,12 +815,10 @@ void StatementReaderRelease(struct ArrowArrayStream* self) { /// Initialize buffers for the first (type-inferred) batch of data. /// Use raw buffers since the types may change. -AdbcStatusCode StatementReaderInitializeInfer(int num_columns, size_t infer_rows, - struct ArrowBitmap* validity, - struct ArrowBuffer* data, - struct ArrowBuffer* binary, - enum ArrowType* current_type, - struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderInitializeInfer( + int num_columns, size_t infer_rows, struct ArrowBitmap* validity, + struct ArrowBuffer* data, struct ArrowBuffer* binary, enum ArrowType* current_type, + struct AdbcError* error) { for (int i = 0; i < num_columns; i++) { ArrowBitmapInit(&validity[i]); CHECK_NA(INTERNAL, ArrowBitmapReserve(&validity[i], infer_rows), error); @@ -790,10 +831,11 @@ AdbcStatusCode StatementReaderInitializeInfer(int num_columns, size_t infer_rows } // NOLINT(whitespace/indent) /// Finalize the first (type-inferred) batch of data. -AdbcStatusCode StatementReaderInferFinalize( - sqlite3_stmt* stmt, int num_columns, int64_t num_rows, struct StatementReader* reader, - struct ArrowBitmap* validity, struct ArrowBuffer* data, struct ArrowBuffer* binary, - enum ArrowType* current_type, struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderInferFinalize( + sqlite3_stmt* stmt, int num_columns, int64_t num_rows, + struct InternalSqliteStatementReader* reader, struct ArrowBitmap* validity, + struct ArrowBuffer* data, struct ArrowBuffer* binary, enum ArrowType* current_type, + struct AdbcError* error) { ArrowSchemaInit(&reader->schema); CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(&reader->schema, num_columns), error); for (int col = 0; col < num_columns; col++) { @@ -811,7 +853,8 @@ AdbcStatusCode StatementReaderInferFinalize( if (current_type[col] == NANOARROW_TYPE_STRING || current_type[col] == NANOARROW_TYPE_BINARY) { if (binary[col].data == NULL) { - SetError(error, "INTERNAL: column has binary-like type but no backing buffer"); + InternalAdbcSetError( + error, "INTERNAL: column has binary-like type but no backing buffer"); return ADBC_STATUS_INTERNAL; } } @@ -835,8 +878,8 @@ AdbcStatusCode StatementReaderInferFinalize( } // Convert an int64 typed column to double. -AdbcStatusCode StatementReaderUpcastInt64ToDouble(struct ArrowBuffer* data, - struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderUpcastInt64ToDouble(struct ArrowBuffer* data, + struct AdbcError* error) { struct ArrowBuffer doubles; ArrowBufferInit(&doubles); CHECK_NA(INTERNAL, ArrowBufferReserve(&doubles, data->capacity_bytes), error); @@ -844,7 +887,7 @@ AdbcStatusCode StatementReaderUpcastInt64ToDouble(struct ArrowBuffer* data, size_t num_elements = data->size_bytes / sizeof(int64_t); const int64_t* elements = (const int64_t*)data->data; for (size_t i = 0; i < num_elements; i++) { - double value = elements[i]; + double value = (double)elements[i]; ArrowBufferAppendUnsafe(&doubles, &value, sizeof(double)); } ArrowBufferReset(data); @@ -852,10 +895,9 @@ AdbcStatusCode StatementReaderUpcastInt64ToDouble(struct ArrowBuffer* data, return ADBC_STATUS_OK; } -AdbcStatusCode StatementReaderAppendInt64ToBinary(struct ArrowBuffer* offsets, - struct ArrowBuffer* binary, - int64_t value, int32_t* offset, - struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderAppendInt64ToBinary( + struct ArrowBuffer* offsets, struct ArrowBuffer* binary, int64_t value, + int32_t* offset, struct AdbcError* error) { // Make sure we have at least 21 bytes available (19 digits + sign + null) // Presumably this is enough, but manpage for snprintf makes no guarantees // about whether locale may affect this, so check for truncation regardless @@ -867,13 +909,13 @@ AdbcStatusCode StatementReaderAppendInt64ToBinary(struct ArrowBuffer* offsets, while (1) { written = snprintf(output, buffer_size, "%" PRId64, value); if (written < 0) { - SetError(error, "Encoding error when upcasting double to string"); + InternalAdbcSetError(error, "Encoding error when upcasting double to string"); return ADBC_STATUS_INTERNAL; } else if (((size_t)written) >= buffer_size) { // Truncated, resize and try again // Check for overflow - presumably this can never happen...? if (UINT_MAX - buffer_size < buffer_size) { - SetError(error, "Overflow when upcasting double to string"); + InternalAdbcSetError(error, "Overflow when upcasting double to string"); return ADBC_STATUS_INTERNAL; } CHECK_NA(INTERNAL, ArrowBufferReserve(binary, buffer_size), error); @@ -888,10 +930,9 @@ AdbcStatusCode StatementReaderAppendInt64ToBinary(struct ArrowBuffer* offsets, return ADBC_STATUS_OK; } -AdbcStatusCode StatementReaderAppendDoubleToBinary(struct ArrowBuffer* offsets, - struct ArrowBuffer* binary, - double value, int32_t* offset, - struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderAppendDoubleToBinary( + struct ArrowBuffer* offsets, struct ArrowBuffer* binary, double value, + int32_t* offset, struct AdbcError* error) { static const size_t kReserve = 64; size_t buffer_size = kReserve; CHECK_NA(INTERNAL, ArrowBufferReserve(binary, buffer_size), error); @@ -900,13 +941,13 @@ AdbcStatusCode StatementReaderAppendDoubleToBinary(struct ArrowBuffer* offsets, while (1) { written = snprintf(output, buffer_size, "%e", value); if (written < 0) { - SetError(error, "Encoding error when upcasting double to string"); + InternalAdbcSetError(error, "Encoding error when upcasting double to string"); return ADBC_STATUS_INTERNAL; } else if (((size_t)written) >= buffer_size) { // Truncated, resize and try again // Check for overflow - presumably this can never happen...? if (UINT_MAX - buffer_size < buffer_size) { - SetError(error, "Overflow when upcasting double to string"); + InternalAdbcSetError(error, "Overflow when upcasting double to string"); return ADBC_STATUS_INTERNAL; } CHECK_NA(INTERNAL, ArrowBufferReserve(binary, buffer_size), error); @@ -921,9 +962,8 @@ AdbcStatusCode StatementReaderAppendDoubleToBinary(struct ArrowBuffer* offsets, return ADBC_STATUS_OK; } -AdbcStatusCode StatementReaderUpcastInt64ToBinary(struct ArrowBuffer* data, - struct ArrowBuffer* binary, - struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderUpcastInt64ToBinary( + struct ArrowBuffer* data, struct ArrowBuffer* binary, struct AdbcError* error) { struct ArrowBuffer offsets; ArrowBufferInit(&offsets); ArrowBufferInit(binary); @@ -936,8 +976,8 @@ AdbcStatusCode StatementReaderUpcastInt64ToBinary(struct ArrowBuffer* data, int32_t offset = 0; ArrowBufferAppendUnsafe(&offsets, &offset, sizeof(int32_t)); for (size_t i = 0; i < num_elements; i++) { - AdbcStatusCode status = - StatementReaderAppendInt64ToBinary(&offsets, binary, elements[i], &offset, error); + AdbcStatusCode status = InternalSqliteStatementReaderAppendInt64ToBinary( + &offsets, binary, elements[i], &offset, error); if (status != ADBC_STATUS_OK) return status; } ArrowBufferReset(data); @@ -945,9 +985,8 @@ AdbcStatusCode StatementReaderUpcastInt64ToBinary(struct ArrowBuffer* data, return ADBC_STATUS_OK; } -AdbcStatusCode StatementReaderUpcastDoubleToBinary(struct ArrowBuffer* data, - struct ArrowBuffer* binary, - struct AdbcError* error) { +AdbcStatusCode InternalSqliteStatementReaderUpcastDoubleToBinary( + struct ArrowBuffer* data, struct ArrowBuffer* binary, struct AdbcError* error) { struct ArrowBuffer offsets; ArrowBufferInit(&offsets); ArrowBufferInit(binary); @@ -960,7 +999,7 @@ AdbcStatusCode StatementReaderUpcastDoubleToBinary(struct ArrowBuffer* data, int32_t offset = 0; ArrowBufferAppendUnsafe(&offsets, &offset, sizeof(int32_t)); for (size_t i = 0; i < num_elements; i++) { - AdbcStatusCode status = StatementReaderAppendDoubleToBinary( + AdbcStatusCode status = InternalSqliteStatementReaderAppendDoubleToBinary( &offsets, binary, elements[i], &offset, error); if (status != ADBC_STATUS_OK) return status; } @@ -970,7 +1009,7 @@ AdbcStatusCode StatementReaderUpcastDoubleToBinary(struct ArrowBuffer* data, } /// Append a single value to a single column. -AdbcStatusCode StatementReaderInferOneValue( +AdbcStatusCode InternalSqliteStatementReaderInferOneValue( sqlite3_stmt* stmt, int col, struct ArrowBitmap* validity, struct ArrowBuffer* data, struct ArrowBuffer* binary, enum ArrowType* current_type, struct AdbcError* error) { // TODO: static_assert sizeof(int64) == sizeof(double) @@ -1018,7 +1057,7 @@ AdbcStatusCode StatementReaderInferOneValue( case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: { int32_t offset = ((int32_t*)data->data)[data->size_bytes / 4 - 1]; - return StatementReaderAppendInt64ToBinary( + return InternalSqliteStatementReaderAppendInt64ToBinary( data, binary, sqlite3_column_int64(stmt, col), &offset, error); } default: @@ -1031,7 +1070,8 @@ AdbcStatusCode StatementReaderInferOneValue( switch (*current_type) { case NANOARROW_TYPE_INT64: { - AdbcStatusCode status = StatementReaderUpcastInt64ToDouble(data, error); + AdbcStatusCode status = + InternalSqliteStatementReaderUpcastInt64ToDouble(data, error); if (status != ADBC_STATUS_OK) return status; *current_type = NANOARROW_TYPE_DOUBLE; double value = sqlite3_column_double(stmt, col); @@ -1046,7 +1086,7 @@ AdbcStatusCode StatementReaderInferOneValue( case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: { int32_t offset = ((int32_t*)data->data)[data->size_bytes / 4 - 1]; - return StatementReaderAppendDoubleToBinary( + return InternalSqliteStatementReaderAppendDoubleToBinary( data, binary, sqlite3_column_double(stmt, col), &offset, error); } default: @@ -1059,14 +1099,15 @@ AdbcStatusCode StatementReaderInferOneValue( switch (*current_type) { case NANOARROW_TYPE_INT64: { - AdbcStatusCode status = StatementReaderUpcastInt64ToBinary(data, binary, error); + AdbcStatusCode status = + InternalSqliteStatementReaderUpcastInt64ToBinary(data, binary, error); if (status != ADBC_STATUS_OK) return status; *current_type = NANOARROW_TYPE_STRING; break; } case NANOARROW_TYPE_DOUBLE: { AdbcStatusCode status = - StatementReaderUpcastDoubleToBinary(data, binary, error); + InternalSqliteStatementReaderUpcastDoubleToBinary(data, binary, error); if (status != ADBC_STATUS_OK) return status; *current_type = NANOARROW_TYPE_STRING; break; @@ -1090,14 +1131,15 @@ AdbcStatusCode StatementReaderInferOneValue( switch (*current_type) { case NANOARROW_TYPE_INT64: { - AdbcStatusCode status = StatementReaderUpcastInt64ToBinary(data, binary, error); + AdbcStatusCode status = + InternalSqliteStatementReaderUpcastInt64ToBinary(data, binary, error); if (status != ADBC_STATUS_OK) return status; *current_type = NANOARROW_TYPE_BINARY; break; } case NANOARROW_TYPE_DOUBLE: { AdbcStatusCode status = - StatementReaderUpcastDoubleToBinary(data, binary, error); + InternalSqliteStatementReaderUpcastDoubleToBinary(data, binary, error); if (status != ADBC_STATUS_OK) return status; *current_type = NANOARROW_TYPE_BINARY; break; @@ -1125,21 +1167,23 @@ AdbcStatusCode StatementReaderInferOneValue( return ADBC_STATUS_OK; } // NOLINT(whitespace/indent) -AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, - struct AdbcSqliteBinder* binder, size_t batch_size, - struct ArrowArrayStream* stream, - struct AdbcError* error) { - struct StatementReader* reader = malloc(sizeof(struct StatementReader)); - memset(reader, 0, sizeof(struct StatementReader)); +AdbcStatusCode InternalAdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, + struct AdbcSqliteBinder* binder, + size_t batch_size, + struct ArrowArrayStream* stream, + struct AdbcError* error) { + struct InternalSqliteStatementReader* reader = + malloc(sizeof(struct InternalSqliteStatementReader)); + memset(reader, 0, sizeof(struct InternalSqliteStatementReader)); reader->db = db; reader->stmt = stmt; - reader->batch_size = batch_size; + reader->batch_size = (int)batch_size; stream->private_data = reader; - stream->release = StatementReaderRelease; - stream->get_last_error = StatementReaderGetLastError; - stream->get_next = StatementReaderGetNext; - stream->get_schema = StatementReaderGetSchema; + stream->release = InternalSqliteStatementReaderRelease; + stream->get_last_error = InternalSqliteStatementReaderGetLastError; + stream->get_next = InternalSqliteStatementReaderGetNext; + stream->get_schema = InternalSqliteStatementReaderGetSchema; sqlite3_mutex_enter(sqlite3_db_mutex(db)); @@ -1149,12 +1193,12 @@ AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, struct ArrowBuffer* binary = malloc(num_columns * sizeof(struct ArrowBuffer)); enum ArrowType* current_type = malloc(num_columns * sizeof(enum ArrowType)); - AdbcStatusCode status = StatementReaderInitializeInfer( + AdbcStatusCode status = InternalSqliteStatementReaderInitializeInfer( num_columns, batch_size, validity, data, binary, current_type, error); if (binder) { char finished = 0; - status = AdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); + status = InternalAdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); if (finished) { reader->done = 1; } @@ -1170,7 +1214,7 @@ AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, break; } else { char finished = 0; - status = AdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); + status = InternalAdbcSqliteBinderBindNext(binder, db, stmt, &finished, error); if (status != ADBC_STATUS_OK) break; if (finished) { reader->done = 1; @@ -1179,7 +1223,7 @@ AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, } continue; } else if (rc == SQLITE_ERROR) { - SetError(error, "Failed to step query: %s", sqlite3_errmsg(db)); + InternalAdbcSetError(error, "Failed to step query: %s", sqlite3_errmsg(db)); status = ADBC_STATUS_IO; // Reset here so that we don't get an error again in StatementRelease (void)sqlite3_reset(stmt); @@ -1190,8 +1234,9 @@ AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, } for (int col = 0; col < num_columns; col++) { - status = StatementReaderInferOneValue(stmt, col, &validity[col], &data[col], - &binary[col], ¤t_type[col], error); + status = InternalSqliteStatementReaderInferOneValue(stmt, col, &validity[col], + &data[col], &binary[col], + ¤t_type[col], error); if (status != ADBC_STATUS_OK) break; } if (status != ADBC_STATUS_OK) break; @@ -1199,8 +1244,9 @@ AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, } if (status == ADBC_STATUS_OK) { - status = StatementReaderInferFinalize(stmt, num_columns, num_rows, reader, validity, - data, binary, current_type, error); + status = InternalSqliteStatementReaderInferFinalize(stmt, num_columns, num_rows, + reader, validity, data, binary, + current_type, error); } } diff --git a/c/driver/sqlite/statement_reader.h b/c/driver/sqlite/statement_reader.h index 2e6b19086c..99be501336 100644 --- a/c/driver/sqlite/statement_reader.h +++ b/c/driver/sqlite/statement_reader.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include @@ -33,6 +35,7 @@ struct ADBC_EXPORT AdbcSqliteBinder { struct ArrowSchema schema; struct ArrowArrayStream params; enum ArrowType* types; + int* param_indices; // Scratch space struct ArrowArray array; @@ -41,28 +44,30 @@ struct ADBC_EXPORT AdbcSqliteBinder { }; ADBC_EXPORT -AdbcStatusCode AdbcSqliteBinderSetArrayStream(struct AdbcSqliteBinder* binder, - struct ArrowArrayStream* values, - struct AdbcError* error); +AdbcStatusCode InternalAdbcSqliteBinderSetArrayStream(struct AdbcSqliteBinder* binder, + struct ArrowArrayStream* values, + bool bind_by_name, + struct AdbcError* error); ADBC_EXPORT -AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3* conn, - sqlite3_stmt* stmt, char* finished, - struct AdbcError* error); +AdbcStatusCode InternalAdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, + sqlite3* conn, sqlite3_stmt* stmt, + char* finished, struct AdbcError* error); ADBC_EXPORT -void AdbcSqliteBinderRelease(struct AdbcSqliteBinder* binder); +void InternalAdbcSqliteBinderRelease(struct AdbcSqliteBinder* binder); /// \brief Initialize an ArrowArrayStream from a sqlite3_stmt. /// \param[in] db The SQLite connection. /// \param[in] stmt The SQLite statement. /// \param[in] binder Query parameters to bind, if provided. -/// \param[in] infer_rows How many rows to read to infer the Arrow schema. +/// \param[in] batch_size How many rows to read to infer the Arrow schema. /// \param[out] stream The stream to export to. /// \param[out] error Error details, if needed. ADBC_EXPORT -AdbcStatusCode AdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, - struct AdbcSqliteBinder* binder, size_t batch_size, - struct ArrowArrayStream* stream, - struct AdbcError* error); +AdbcStatusCode InternalAdbcSqliteExportReader(sqlite3* db, sqlite3_stmt* stmt, + struct AdbcSqliteBinder* binder, + size_t batch_size, + struct ArrowArrayStream* stream, + struct AdbcError* error); #ifdef __cplusplus } diff --git a/c/driver_manager/CMakeLists.txt b/c/driver_manager/CMakeLists.txt index 0eb17f0c8d..7057cc78e8 100644 --- a/c/driver_manager/CMakeLists.txt +++ b/c/driver_manager/CMakeLists.txt @@ -15,6 +15,34 @@ # specific language governing permissions and limitations # under the License. +set(ADBC_DRIVER_MANAGER_SHARED_LINK_LIBS ${CMAKE_DL_LIBS}) +set(ADBC_DRIVER_MANAGER_STATIC_LINK_LIBS ${CMAKE_DL_LIBS}) +# std::filesystem with old g++/clang++ require libstdc++fs/libc++fs. +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") + list(APPEND ADBC_DRIVER_MANAGER_SHARED_LINK_LIBS stdc++fs) + list(APPEND ADBC_DRIVER_MANAGER_STATIC_LINK_LIBS stdc++fs) + endif() +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8") + list(APPEND ADBC_DRIVER_MANAGER_SHARED_LINK_LIBS c++fs) + list(APPEND ADBC_DRIVER_MANAGER_STATIC_LINK_LIBS c++fs) + endif() +endif() + +if(WIN32) + list(APPEND + ADBC_DRIVER_MANAGER_SHARED_LINK_LIBS + advapi32 + uuid + shell32) + list(APPEND + ADBC_DRIVER_MANAGER_STATIC_LINK_LIBS + advapi32 + uuid + shell32) +endif() + add_arrow_lib(adbc_driver_manager SOURCES adbc_driver_manager.cc @@ -25,22 +53,28 @@ add_arrow_lib(adbc_driver_manager PKG_CONFIG_NAME adbc-driver-manager SHARED_LINK_LIBS - ${CMAKE_DL_LIBS} + ${ADBC_DRIVER_MANAGER_SHARED_LINK_LIBS} STATIC_LINK_LIBS - ${CMAKE_DL_LIBS} + ${ADBC_DRIVER_MANAGER_STATIC_LINK_LIBS} SHARED_LINK_FLAGS ${ADBC_LINK_FLAGS}) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/include/) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/vendor) -include_directories(SYSTEM ${REPOSITORY_ROOT}/c/driver) install(FILES "${REPOSITORY_ROOT}/c/include/adbc.h" DESTINATION include) install(FILES "${REPOSITORY_ROOT}/c/include/arrow-adbc/adbc.h" DESTINATION include/arrow-adbc) foreach(LIB_TARGET ${ADBC_LIBRARIES}) + target_include_directories(${LIB_TARGET} SYSTEM + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${REPOSITORY_ROOT}/c/vendor + ${REPOSITORY_ROOT}/c/driver) target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_EXPORTING) + # We have to match R Makevars which can only set to 1 or 0 (not define/undefine) + if("$ENV{CONDA_BUILD}" STREQUAL "1") + target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_CONDA_BUILD=1) + else() + target_compile_definitions(${LIB_TARGET} PRIVATE ADBC_CONDA_BUILD=0) + endif() endforeach() if(ADBC_BUILD_TESTS) @@ -60,11 +94,30 @@ if(ADBC_BUILD_TESTS) EXTRA_LINK_LIBS adbc_driver_common adbc_validation - nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-driver-manager-test PRIVATE cxx_std_17) + + if(ADBC_DRIVER_SQLITE) + target_compile_definitions(adbc-driver-manager-test + PRIVATE ADBC_DRIVER_MANAGER_TEST_LIB="${CMAKE_BINARY_DIR}/driver/sqlite/libadbc_driver_sqlite${CMAKE_SHARED_LIBRARY_SUFFIX}" + ) + endif() + if(ADBC_DRIVER_MANAGER_TEST_MANIFEST_USER_LEVEL) + target_compile_definitions(adbc-driver-manager-test + PRIVATE ADBC_DRIVER_MANAGER_TEST_MANIFEST_USER_LEVEL=1) + endif() + if(ADBC_DRIVER_MANAGER_TEST_MANIFEST_SYSTEM_LEVEL) + target_compile_definitions(adbc-driver-manager-test + PRIVATE ADBC_DRIVER_MANAGER_TEST_MANIFEST_SYSTEM_LEVEL=1) + endif() + if("$ENV{CONDA_BUILD}" STREQUAL "1") + target_compile_definitions(adbc-driver-manager-test PRIVATE ADBC_CONDA_BUILD=1) + else() + target_compile_definitions(adbc-driver-manager-test PRIVATE ADBC_CONDA_BUILD=0) + endif() target_include_directories(adbc-driver-manager-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/vendor/nanoarrow/) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${LIBPQ_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) add_test_case(version_100_compatibility_test PREFIX @@ -76,9 +129,9 @@ if(ADBC_BUILD_TESTS) adbc_version_100_compatibility_test.cc EXTRA_LINK_LIBS adbc_validation_util - nanoarrow ${TEST_LINK_LIBS}) target_compile_features(adbc-version-100-compatibility-test PRIVATE cxx_std_17) target_include_directories(adbc-version-100-compatibility-test SYSTEM - PRIVATE ${REPOSITORY_ROOT}/c/vendor/nanoarrow/) + PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ + ${LIBPQ_INCLUDE_DIRS} ${REPOSITORY_ROOT}/c/driver) endif() diff --git a/c/driver_manager/adbc_driver_manager.cc b/c/driver_manager/adbc_driver_manager.cc index 0ce173a888..5840b900a0 100644 --- a/c/driver_manager/adbc_driver_manager.cc +++ b/c/driver_manager/adbc_driver_manager.cc @@ -15,29 +15,107 @@ // specific language governing permissions and limitations // under the License. -#include "arrow-adbc/adbc_driver_manager.h" +#if defined(_WIN32) +#define NOMINMAX +#include // Must come first + +#ifndef NTDDI_VERSION +#define NTDDI_VERSION 0x0A00000C // For SHGetKnownFolderPath in ShlObj_core.h in ShlObj.h +#endif + +#include +#include +#include +#include // _wcsnicmp +#include +#include +#else +#include +#endif // defined(_WIN32) + +#include #include "arrow-adbc/adbc.h" +#include "arrow-adbc/adbc_driver_manager.h" +#include "current_arch.h" #include #include #include #include #include +#include #include #include #include +#include -#if defined(_WIN32) -#include // Must come first +using namespace std::string_literals; // NOLINT [build/namespaces] -#include -#include -#else -#include -#endif // defined(_WIN32) +ADBC_EXPORT +std::vector InternalAdbcParsePath(const std::string_view path); +ADBC_EXPORT +std::filesystem::path InternalAdbcUserConfigDir(); namespace { +/// \brief Where a search path came from (for error reporting) +enum class SearchPathSource { + kEnv, + kUser, + kRegistry, + kSystem, + kAdditional, + kConda, + kUnset, + kDoesNotExist, + kDisabled, + kOtherError, +}; + +using SearchPaths = std::vector>; + +void AddSearchPathsToError(const SearchPaths& search_paths, std::string& error_message) { + if (!search_paths.empty()) { + error_message += "\nAlso searched these paths for manifests:"; + for (const auto& [source, path] : search_paths) { + error_message += "\n\t"; + switch (source) { + case SearchPathSource::kEnv: + error_message += "ADBC_DRIVER_PATH: "; + break; + case SearchPathSource::kUser: + error_message += "user config dir: "; + break; + case SearchPathSource::kRegistry: + error_message += "Registry: "; + break; + case SearchPathSource::kSystem: + error_message += "system config dir: "; + break; + case SearchPathSource::kAdditional: + error_message += "additional search path: "; + break; + case SearchPathSource::kConda: + error_message += "Conda prefix: "; + break; + case SearchPathSource::kUnset: + error_message += "not set: "; + break; + case SearchPathSource::kDoesNotExist: + error_message += "does not exist: "; + break; + case SearchPathSource::kDisabled: + error_message += "not enabled at build time: "; + break; + case SearchPathSource::kOtherError: + // Don't add any prefix + break; + } + error_message += path.string(); + } + } +} + // Platform-specific helpers #if defined(_WIN32) @@ -114,7 +192,327 @@ struct OwnedError { } }; -// Driver state +#ifdef _WIN32 +using char_type = wchar_t; + +std::string Utf8Encode(const std::wstring& wstr) { + if (wstr.empty()) return std::string(); + int size_needed = WideCharToMultiByte( + CP_UTF8, 0, wstr.data(), static_cast(wstr.size()), NULL, 0, NULL, NULL); + std::string str_to(size_needed, 0); + WideCharToMultiByte(CP_UTF8, 0, wstr.data(), static_cast(wstr.size()), + str_to.data(), size_needed, NULL, NULL); + return str_to; +} + +std::wstring Utf8Decode(const std::string& str) { + if (str.empty()) return std::wstring(); + int size_needed = + MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast(str.size()), NULL, 0); + std::wstring wstr_to(size_needed, 0); + MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast(str.size()), + wstr_to.data(), size_needed); + return wstr_to; +} + +#else +using char_type = char; +#endif // _WIN32 + +/// \brief The location and entrypoint of a resolved driver. +struct DriverInfo { + std::string manifest_file; + int64_t manifest_version; + std::string driver_name; + std::filesystem::path lib_path; + std::string entrypoint; + + std::string version; + std::string source; +}; + +#ifdef _WIN32 +class RegistryKey { + public: + RegistryKey(HKEY root, const std::wstring_view subkey) noexcept + : root_(root), key_(nullptr) { + status_ = RegOpenKeyExW(root_, subkey.data(), 0, KEY_READ, &key_); + } + + ~RegistryKey() { + if (is_open() && key_ != nullptr) { + RegCloseKey(key_); + key_ = nullptr; + status_ = ERROR_REGISTRY_IO_FAILED; + } + } + + HKEY key() const { return key_; } + bool is_open() const { return status_ == ERROR_SUCCESS; } + LSTATUS status() const { return status_; } + + std::wstring GetString(const std::wstring& name, std::wstring default_value) { + if (!is_open()) return default_value; + + DWORD type = REG_SZ; + DWORD size = 0; + auto result = RegQueryValueExW(key_, name.data(), nullptr, &type, nullptr, &size); + if (result != ERROR_SUCCESS) return default_value; + if (type != REG_SZ) return default_value; + + std::wstring value(size, '\0'); + result = RegQueryValueExW(key_, name.data(), nullptr, &type, + reinterpret_cast(value.data()), &size); + if (result != ERROR_SUCCESS) return default_value; + return value; + } + + int32_t GetInt(const std::wstring& name, const int32_t default_value) { + if (!is_open()) return default_value; + + DWORD dwValue; + DWORD dataSize = sizeof(dwValue); + DWORD valueType; + auto result = RegQueryValueExW(key_, name.data(), nullptr, &valueType, + (LPBYTE)&dwValue, &dataSize); + if (result != ERROR_SUCCESS) return default_value; + if (valueType != REG_DWORD) return default_value; + return static_cast(dwValue); + } + + private: + HKEY root_; + HKEY key_; + LSTATUS status_; +}; + +AdbcStatusCode LoadDriverFromRegistry(HKEY root, const std::wstring& driver_name, + DriverInfo& info, struct AdbcError* error) { + // N.B. start all error messages with the subkey so that the calling code + // can prepend the name of 'root' to the error message (easier than trying + // to invoke win32 API to get the name of the HKEY) + static const LPCWSTR kAdbcDriverRegistry = L"SOFTWARE\\ADBC\\Drivers"; + RegistryKey drivers_key(root, kAdbcDriverRegistry); + if (!drivers_key.is_open()) { + std::string error_message = "SOFTWARE\\ADBC\\DRIVERS not found"s; + SetError(error, std::move(error_message)); + return ADBC_STATUS_NOT_FOUND; + } + + RegistryKey dkey(drivers_key.key(), driver_name); + if (!dkey.is_open()) { + std::string error_message = "SOFTWARE\\ADBC\\DRIVERS has no entry for driver \""s; + error_message += Utf8Encode(driver_name); + error_message += "\""s; + SetError(error, std::move(error_message)); + return ADBC_STATUS_NOT_FOUND; + } + + info.driver_name = Utf8Encode(dkey.GetString(L"name", L"")); + info.manifest_version = int64_t(dkey.GetInt(L"manifest_version", 1)); + if (info.manifest_version != 1) { + SetError(error, "Driver manifest version '" + std::to_string(info.manifest_version) + + "' is not supported by this driver manager."); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + info.entrypoint = Utf8Encode(dkey.GetString(L"entrypoint", L"")); + info.version = Utf8Encode(dkey.GetString(L"version", L"")); + info.source = Utf8Encode(dkey.GetString(L"source", L"")); + info.lib_path = std::filesystem::path(dkey.GetString(L"driver", L"")); + if (info.lib_path.empty()) { + std::string error_message = "SOFTWARE\\ADBC\\DRIVERS\\"s; + error_message += Utf8Encode(driver_name); + error_message += " has no driver path"s; + SetError(error, std::move(error_message)); + return ADBC_STATUS_NOT_FOUND; + } + return ADBC_STATUS_OK; +} +#endif // _WIN32 + +/// \return ADBC_STATUS_NOT_FOUND if the manifest does not contain a driver +/// path for this platform, ADBC_STATUS_INVALID_ARGUMENT if the manifest +/// could not be parsed, ADBC_STATUS_OK otherwise (`info` will be populated) +AdbcStatusCode LoadDriverManifest(const std::filesystem::path& driver_manifest, + DriverInfo& info, struct AdbcError* error) { + toml::table config; + try { + config = toml::parse_file(driver_manifest.native()); + } catch (const toml::parse_error& err) { + // Despite the name, this exception covers IO errors too. Hence, we can't + // differentiate between bad syntax and other I/O error. + std::string message = "Could not open manifest. "; + message += err.what(); + message += ". Manifest: "; + message += driver_manifest.string(); + SetError(error, std::move(message)); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + info.manifest_file = driver_manifest.string(); + info.driver_name = config["name"].value_or(""s); + info.manifest_version = config["manifest_version"].value_or(int64_t(1)); + if (info.manifest_version != 1) { + SetError(error, "Driver manifest version '" + std::to_string(info.manifest_version) + + "' is not supported by this driver manager."); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + info.entrypoint = config.at_path("Driver.entrypoint").value_or(""s); + info.version = config["version"].value_or(""s); + info.source = config["source"].value_or(""s); + + auto entrypoint = config.at_path("Driver.entrypoint"); + if (entrypoint) { + if (auto* ep = entrypoint.as_string()) { + info.entrypoint = ep->get(); + } else { + SetError(error, "Driver entrypoint not a string in manifest '"s + + driver_manifest.string() + "'"s); + return ADBC_STATUS_INVALID_ARGUMENT; + } + } + + auto driver = config.at_path("Driver.shared"); + if (toml::table* platforms = driver.as_table()) { + auto view = platforms->at_path(adbc::CurrentArch()); + if (!view) { + std::string message = "Driver path not found in manifest '"; + message += driver_manifest.string(); + message += "' for current architecture '"; + message += adbc::CurrentArch(); + message += "'. Architectures found:"; + for (const auto& [key, val] : *platforms) { + message += " "; + message += key; + } + SetError(error, std::move(message)); + return ADBC_STATUS_NOT_FOUND; + } else if (auto* path = view.as_string()) { + if (path->get().empty()) { + std::string message = "Driver path is an empty string in manifest '"; + message += driver_manifest.string(); + message += "' for current architecture '"; + message += adbc::CurrentArch(); + message += "'"; + SetError(error, std::move(message)); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + info.lib_path = path->get(); + return ADBC_STATUS_OK; + } else { + std::string message = "Driver path not found in manifest '"; + message += driver_manifest.string(); + message += "' for current architecture '"; + message += adbc::CurrentArch(); + message += "'. Value was not a string"; + SetError(error, std::move(message)); + return ADBC_STATUS_INVALID_ARGUMENT; + } + return ADBC_STATUS_OK; + } else if (auto* path = driver.as_string()) { + info.lib_path = path->get(); + if (info.lib_path.empty()) { + SetError(error, "Driver path is an empty string in manifest '"s + + driver_manifest.string() + "'"s); + return ADBC_STATUS_INVALID_ARGUMENT; + } + return ADBC_STATUS_OK; + } + SetError(error, "Driver path not defined in manifest '"s + driver_manifest.string() + + "'. `Driver.shared` must be a string or table"s); + return ADBC_STATUS_INVALID_ARGUMENT; +} + +SearchPaths GetEnvPaths(const char_type* env_var) { +#ifdef _WIN32 + size_t required_size; + + _wgetenv_s(&required_size, NULL, 0, env_var); + if (required_size == 0) { + return {}; + } + + std::wstring path_var; + path_var.resize(required_size); + _wgetenv_s(&required_size, path_var.data(), required_size, env_var); + // Remove null terminator + path_var.resize(required_size - 1); + auto path = Utf8Encode(path_var); +#else + const char* path_var = std::getenv(env_var); + if (!path_var) { + return {}; + } + std::string path(path_var); +#endif // _WIN32 + SearchPaths paths; + for (auto path : InternalAdbcParsePath(path)) { + paths.emplace_back(SearchPathSource::kEnv, path); + } + return paths; +} + +#ifdef _WIN32 +static const wchar_t* kAdbcDriverPath = L"ADBC_DRIVER_PATH"; +#else +static const char* kAdbcDriverPath = "ADBC_DRIVER_PATH"; +#endif // _WIN32 + +SearchPaths GetSearchPaths(const AdbcLoadFlags levels) { + SearchPaths paths; + if (levels & ADBC_LOAD_FLAG_SEARCH_ENV) { + // Check the ADBC_DRIVER_PATH environment variable + paths = GetEnvPaths(kAdbcDriverPath); + } + + if (levels & ADBC_LOAD_FLAG_SEARCH_USER) { + // Check the user configuration directory + std::filesystem::path user_config_dir = InternalAdbcUserConfigDir(); + if (!user_config_dir.empty() && std::filesystem::exists(user_config_dir)) { + paths.emplace_back(SearchPathSource::kUser, std::move(user_config_dir)); + } else { + paths.emplace_back(SearchPathSource::kDoesNotExist, std::move(user_config_dir)); + } + } + + if (levels & ADBC_LOAD_FLAG_SEARCH_SYSTEM) { + // System level behavior for Windows is to search the registry keys so we + // only need to check for macOS and fall back to Unix-like behavior as long + // as we're not on Windows +#if defined(__APPLE__) + const std::filesystem::path system_config_dir( + "/Library/Application Support/ADBC/Drivers"); + if (std::filesystem::exists(system_config_dir)) { + paths.emplace_back(SearchPathSource::kSystem, std::move(system_config_dir)); + } else { + paths.emplace_back(SearchPathSource::kDoesNotExist, std::move(system_config_dir)); + } +#elif !defined(_WIN32) + const std::filesystem::path system_config_dir("/etc/adbc/drivers"); + if (std::filesystem::exists(system_config_dir)) { + paths.emplace_back(SearchPathSource::kSystem, std::move(system_config_dir)); + } else { + paths.emplace_back(SearchPathSource::kDoesNotExist, std::move(system_config_dir)); + } +#endif // defined(__APPLE__) + } + + return paths; +} + +bool HasExtension(const std::filesystem::path& path, const std::string& ext) { +#ifdef _WIN32 + auto wext = Utf8Decode(ext); + auto path_ext = path.extension().native(); + return path_ext.size() == wext.size() && + _wcsnicmp(path_ext.data(), wext.data(), wext.size()) == 0; +#else + return path.extension() == ext; +#endif // _WIN32 +} /// A driver DLL. struct ManagedLibrary { @@ -133,33 +531,322 @@ struct ManagedLibrary { ~ManagedLibrary() { Release(); } void Release() { - // TODO(apache/arrow-adbc#204): causes tests to segfault - // Need to refcount the driver DLL; also, errors may retain a reference to - // release() from the DLL - how to handle this? + // TODO(apache/arrow-adbc#204): causes tests to segfault. Need to + // refcount the driver DLL; also, errors may retain a reference to + // release() from the DLL - how to handle this? It's unlikely we can + // actually do this - in general shared libraries are not safe to unload. + } + + /// \brief Resolve the driver name to a concrete location. + AdbcStatusCode GetDriverInfo( + const std::string_view driver_name, const AdbcLoadFlags load_options, + const std::vector& additional_search_paths, DriverInfo& info, + struct AdbcError* error) { + if (driver_name.empty()) { + SetError(error, "Driver name is empty"); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + // First try to treat the given driver name as a path to a manifest or shared library + std::filesystem::path driver_path(driver_name); + const bool allow_relative_paths = load_options & ADBC_LOAD_FLAG_ALLOW_RELATIVE_PATHS; + if (driver_path.has_extension()) { + if (driver_path.is_relative() && !allow_relative_paths) { + SetError(error, "Driver path is relative and relative paths are not allowed"); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + if (HasExtension(driver_path, ".toml")) { + // if the extension is .toml, attempt to load the manifest + // erroring if we fail + + auto status = LoadDriverManifest(driver_path, info, error); + if (status == ADBC_STATUS_OK) { + return Load(info.lib_path.c_str(), {}, error); + } + return status; + } + + // if the extension is not .toml, then just try to load the provided + // path as if it was an absolute path to a driver library + return Load(driver_path.c_str(), {}, error); + } + + if (driver_path.is_absolute()) { + // if we have an absolute path without an extension, first see if there's a + // toml file with the same name. + driver_path.replace_extension(".toml"); + if (std::filesystem::exists(driver_path)) { + auto status = LoadDriverManifest(driver_path, info, error); + if (status == ADBC_STATUS_OK) { + return Load(info.lib_path.c_str(), {}, error); + } + } + + driver_path.replace_extension(""); + info.lib_path = driver_path; + // otherwise just try to load the provided path as if it was an absolute path + return Load(driver_path.c_str(), {}, error); + } + + if (driver_path.has_extension()) { + if (driver_path.is_relative() && !allow_relative_paths) { + SetError(error, "Driver path is relative and relative paths are not allowed"); + return ADBC_STATUS_INVALID_ARGUMENT; + } + +#if defined(_WIN32) + static const std::string kPlatformLibrarySuffix = ".dll"; +#elif defined(__APPLE__) + static const std::string kPlatformLibrarySuffix = ".dylib"; +#else + static const std::string kPlatformLibrarySuffix = ".so"; +#endif // defined(_WIN32) + if (HasExtension(driver_path, kPlatformLibrarySuffix)) { + info.lib_path = driver_path; + return Load(driver_path.c_str(), {}, error); + } + + SetError(error, "Driver name has unrecognized extension: " + + driver_path.extension().string()); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + // not an absolute path, no extension. Let's search the configured paths + // based on the options + return FindDriver(driver_path, load_options, additional_search_paths, info, error); + } + + /// \return ADBC_STATUS_NOT_FOUND if the driver shared library could not be + /// found (via dlopen) or if a manifest was found but did not contain a + /// path for the current platform, ADBC_STATUS_INVALID_ARGUMENT if a + /// manifest was found but could not be parsed, ADBC_STATUS_OK otherwise + /// + /// May modify search_paths to add error info + AdbcStatusCode SearchPathsForDriver(const std::filesystem::path& driver_path, + SearchPaths& search_paths, DriverInfo& info, + struct AdbcError* error) { + SearchPaths extra_debug_info; + for (const auto& [source, search_path] : search_paths) { + if (source == SearchPathSource::kRegistry || source == SearchPathSource::kUnset || + source == SearchPathSource::kDoesNotExist || + source == SearchPathSource::kDisabled || + source == SearchPathSource::kOtherError) { + continue; + } + std::filesystem::path full_path = search_path / driver_path; + + // check for toml first, then dll + full_path.replace_extension(".toml"); + if (std::filesystem::exists(full_path)) { + OwnedError intermediate_error; + + auto status = LoadDriverManifest(full_path, info, &intermediate_error.error); + if (status == ADBC_STATUS_OK) { + // Don't pass attempted_paths here; we'll generate the error at a higher level + status = Load(info.lib_path.c_str(), {}, &intermediate_error.error); + if (status == ADBC_STATUS_OK) { + return status; + } + std::string message = "found "; + message += full_path.string(); + if (intermediate_error.error.message) { + message += " but: "; + message += intermediate_error.error.message; + } else { + message += " could not load the driver it specified"; + } + extra_debug_info.emplace_back(SearchPathSource::kOtherError, + std::move(message)); + search_paths.insert(search_paths.end(), extra_debug_info.begin(), + extra_debug_info.end()); + return status; + } else if (status == ADBC_STATUS_INVALID_ARGUMENT) { + // The manifest was invalid. Don't ignore that! + search_paths.insert(search_paths.end(), extra_debug_info.begin(), + extra_debug_info.end()); + if (intermediate_error.error.message) { + std::string error_message = intermediate_error.error.message; + AddSearchPathsToError(search_paths, error_message); + SetError(error, std::move(error_message)); + } + return status; + } + // Should be NOT_FOUND otherwise + std::string message = "found "; + message += full_path.string(); + if (intermediate_error.error.message) { + message += " but: "; + message += intermediate_error.error.message; + } else { + message += " which did not define a driver for this platform"; + } + + extra_debug_info.emplace_back(SearchPathSource::kOtherError, std::move(message)); + } + + // remove the .toml extension; Load will add the DLL/SO/DYLIB suffix + full_path.replace_extension(""); + // Don't pass error here - it'll be suppressed anyways + auto status = Load(full_path.c_str(), {}, nullptr); + if (status == ADBC_STATUS_OK) { + return status; + } + } + + search_paths.insert(search_paths.end(), extra_debug_info.begin(), + extra_debug_info.end()); + return ADBC_STATUS_NOT_FOUND; } - AdbcStatusCode Load(const char* library, struct AdbcError* error) { + AdbcStatusCode FindDriver( + const std::filesystem::path& driver_path, const AdbcLoadFlags load_options, + const std::vector& additional_search_paths, DriverInfo& info, + struct AdbcError* error) { + if (driver_path.empty()) { + SetError(error, "Driver path is empty"); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + SearchPaths search_paths; + { + // First search the paths in the env var `ADBC_DRIVER_PATH`. + // Then search the runtime application-defined additional search paths. + search_paths = GetSearchPaths(load_options & ADBC_LOAD_FLAG_SEARCH_ENV); + if (search_paths.empty()) { + search_paths.emplace_back(SearchPathSource::kUnset, "ADBC_DRIVER_PATH"); + } + for (const auto& path : additional_search_paths) { + search_paths.emplace_back(SearchPathSource::kAdditional, path); + } + +#if ADBC_CONDA_BUILD + // Then, if this is a conda build, search in the conda environment if + // it is activated. + if (load_options & ADBC_LOAD_FLAG_SEARCH_ENV) { +#ifdef _WIN32 + const wchar_t* conda_name = L"CONDA_PREFIX"; +#else + const char* conda_name = "CONDA_PREFIX"; +#endif // _WIN32 + auto venv = GetEnvPaths(conda_name); + if (!venv.empty()) { + for (const auto& [_, venv_path] : venv) { + search_paths.emplace_back(SearchPathSource::kConda, + venv_path / "etc" / "adbc" / "drivers"); + } + } + } +#else + if (load_options & ADBC_LOAD_FLAG_SEARCH_ENV) { + search_paths.emplace_back(SearchPathSource::kDisabled, "Conda prefix"); + } +#endif // ADBC_CONDA_BUILD + + auto status = SearchPathsForDriver(driver_path, search_paths, info, error); + if (status != ADBC_STATUS_NOT_FOUND) { + // If NOT_FOUND, then keep searching; if OK or INVALID_ARGUMENT, stop + return status; + } + } + + // We searched environment paths and additional search paths (if they + // exist), so now search the rest. +#ifdef _WIN32 + // On Windows, check registry keys, not just search paths. + if (load_options & ADBC_LOAD_FLAG_SEARCH_USER) { + // Check the user registry for the driver. + auto status = + LoadDriverFromRegistry(HKEY_CURRENT_USER, driver_path.native(), info, error); + if (status == ADBC_STATUS_OK) { + return Load(info.lib_path.c_str(), {}, error); + } + if (error && error->message) { + std::string message = "HKEY_CURRENT_USER\\"s; + message += error->message; + search_paths.emplace_back(SearchPathSource::kRegistry, std::move(message)); + } else { + search_paths.emplace_back(SearchPathSource::kRegistry, + "not found in HKEY_CURRENT_USER"); + } + + auto user_paths = GetSearchPaths(ADBC_LOAD_FLAG_SEARCH_USER); + status = SearchPathsForDriver(driver_path, user_paths, info, error); + if (status != ADBC_STATUS_NOT_FOUND) { + return status; + } + search_paths.insert(search_paths.end(), user_paths.begin(), user_paths.end()); + } + + if (load_options & ADBC_LOAD_FLAG_SEARCH_SYSTEM) { + // Check the system registry for the driver. + auto status = + LoadDriverFromRegistry(HKEY_LOCAL_MACHINE, driver_path.native(), info, error); + if (status == ADBC_STATUS_OK) { + return Load(info.lib_path.c_str(), {}, error); + } + if (error && error->message) { + std::string message = "HKEY_LOCAL_MACHINE\\"s; + message += error->message; + search_paths.emplace_back(SearchPathSource::kRegistry, std::move(message)); + } else { + search_paths.emplace_back(SearchPathSource::kRegistry, + "not found in HKEY_LOCAL_MACHINE"); + } + + auto system_paths = GetSearchPaths(ADBC_LOAD_FLAG_SEARCH_SYSTEM); + status = SearchPathsForDriver(driver_path, system_paths, info, error); + if (status != ADBC_STATUS_NOT_FOUND) { + return status; + } + search_paths.insert(search_paths.end(), system_paths.begin(), system_paths.end()); + } + + info.lib_path = driver_path; + return Load(driver_path.c_str(), search_paths, error); +#else + // Otherwise, search the configured paths. + SearchPaths more_search_paths = + GetSearchPaths(load_options & ~ADBC_LOAD_FLAG_SEARCH_ENV); + auto status = SearchPathsForDriver(driver_path, more_search_paths, info, error); + if (status == ADBC_STATUS_NOT_FOUND) { + // If we reach here, we didn't find the driver in any of the paths + // so let's just attempt to load it as default behavior. + search_paths.insert(search_paths.end(), more_search_paths.begin(), + more_search_paths.end()); + info.lib_path = driver_path; + return Load(driver_path.c_str(), search_paths, error); + } + return status; +#endif // _WIN32 + } + + /// \return ADBC_STATUS_NOT_FOUND if the driver shared library could not be + /// found, ADBC_STATUS_OK otherwise + AdbcStatusCode Load(const char_type* library, const SearchPaths& attempted_paths, + struct AdbcError* error) { std::string error_message; #if defined(_WIN32) - HMODULE handle = LoadLibraryExA(library, NULL, 0); + HMODULE handle = LoadLibraryExW(library, NULL, 0); if (!handle) { - error_message += library; - error_message += ": LoadLibraryExA() failed: "; + error_message += Utf8Encode(library); + error_message += ": LoadLibraryExW() failed: "; GetWinError(&error_message); - std::string full_driver_name = library; - full_driver_name += ".dll"; - handle = LoadLibraryExA(full_driver_name.c_str(), NULL, 0); + std::wstring full_driver_name = library; + full_driver_name += L".dll"; + handle = LoadLibraryExW(full_driver_name.c_str(), NULL, 0); if (!handle) { error_message += '\n'; - error_message += full_driver_name; - error_message += ": LoadLibraryExA() failed: "; + error_message += Utf8Encode(full_driver_name); + error_message += ": LoadLibraryExW() failed: "; GetWinError(&error_message); } } if (!handle) { + AddSearchPathsToError(attempted_paths, error_message); SetError(error, error_message); - return ADBC_STATUS_INTERNAL; + return ADBC_STATUS_NOT_FOUND; } else { this->handle = handle; } @@ -173,7 +860,7 @@ struct ManagedLibrary { void* handle = dlopen(library, RTLD_NOW | RTLD_LOCAL); if (!handle) { - error_message = "[DriverManager] dlopen() failed: "; + error_message = "dlopen() failed: "; error_message += dlerror(); // If applicable, append the shared library prefix/extension and @@ -203,8 +890,9 @@ struct ManagedLibrary { if (handle) { this->handle = handle; } else { + AddSearchPathsToError(attempted_paths, error_message); SetError(error, error_message); - return ADBC_STATUS_INTERNAL; + return ADBC_STATUS_NOT_FOUND; } #endif // defined(_WIN32) return ADBC_STATUS_OK; @@ -567,6 +1255,13 @@ AdbcStatusCode StatementGetParameterSchema(struct AdbcStatement* statement, return ADBC_STATUS_NOT_IMPLEMENTED; } +AdbcStatusCode StatementNextResult(struct AdbcStatement*, struct ArrowSchema*, + struct ArrowArrayStream*, struct AdbcPartitions*, + int64_t*,struct AdbcError*) { + SetError(error, "AdbcStatementNextResult not implemented"); + return ADBC_STATUS_NOT_IMPLEMENTED; +} + AdbcStatusCode StatementPrepare(struct AdbcStatement*, struct AdbcError* error) { SetError(error, "AdbcStatementPrepare not implemented"); return ADBC_STATUS_NOT_IMPLEMENTED; @@ -617,6 +1312,8 @@ struct TempDatabase { std::string driver; std::string entrypoint; AdbcDriverInitFunc init_func = nullptr; + AdbcLoadFlags load_flags = ADBC_LOAD_FLAG_ALLOW_RELATIVE_PATHS; + std::string additional_search_path_list; }; /// Temporary state while the database is being configured. @@ -631,9 +1328,95 @@ static const char kDefaultEntrypoint[] = "AdbcDriverInit"; } // namespace // Other helpers (intentionally not in an anonymous namespace so they can be tested) +ADBC_EXPORT +std::filesystem::path InternalAdbcUserConfigDir() { + std::filesystem::path config_dir; +#if defined(_WIN32) + // SHGetFolderPath is just an alias to SHGetKnownFolderPath since Vista + // so let's just call the updated function. + PWSTR path = nullptr; + auto hres = SHGetKnownFolderPath(FOLDERID_LocalAppData, 0, nullptr, &path); + if (!SUCCEEDED(hres)) { + return config_dir; + } + + std::wstring wpath(path); + std::filesystem::path dir(std::move(wpath)); + if (!dir.empty()) { + config_dir = std::filesystem::path(dir); + config_dir /= "ADBC/Drivers"; + } +#elif defined(__APPLE__) + auto dir = std::getenv("HOME"); + if (dir) { + config_dir = std::filesystem::path(dir); + config_dir /= "Library/Application Support/ADBC/Drivers"; + } +#elif defined(__linux__) + auto dir = std::getenv("XDG_CONFIG_HOME"); + if (!dir) { + dir = std::getenv("HOME"); + if (dir) { + config_dir = std::filesystem::path(dir) /= ".config"; + } + } else { + config_dir = std::filesystem::path(dir); + } + + if (!config_dir.empty()) { + config_dir = config_dir / "adbc" / "drivers"; + } +#endif // defined(_WIN32) + + return config_dir; +} + +std::vector InternalAdbcParsePath(const std::string_view path) { + std::vector result; + if (path.empty()) { + return result; + } + +#ifdef _WIN32 + constexpr char delimiter = ';'; + + // pulling the logic from Go's filepath.SplitList function + // where windows checks for quoted/escaped sections while splitting + // but unix doesn't. + // see + // https://cs.opensource.google/go/go/+/refs/tags/go1.24.3:src/path/filepath/path_windows.go + bool in_quotes = false; + size_t start = 0; + for (size_t i = 0; i < path.size(); ++i) { + if (path[i] == '"') { + in_quotes = !in_quotes; + } else if (path[i] == delimiter && !in_quotes) { + result.emplace_back(path.substr(start, i - start)); + start = i + 1; + } + } + result.emplace_back(path.substr(start)); +#else + constexpr char delimiter = ':'; + + size_t start = 0; + size_t end = 0; + while ((end = path.find(delimiter, start)) != std::string::npos) { + result.emplace_back(path.substr(start, end - start)); + start = end + 1; + } + result.emplace_back(path.substr(start)); +#endif // _WIN32 + + // remove empty paths + result.erase(std::remove_if(result.begin(), result.end(), + [](const auto& p) { return p.empty(); }), + result.end()); + return result; +} ADBC_EXPORT -std::string AdbcDriverManagerDefaultEntrypoint(const std::string& driver) { +std::string InternalAdbcDriverManagerDefaultEntrypoint(const std::string& driver) { /// - libadbc_driver_sqlite.so.2.0.0 -> AdbcDriverSqliteInit /// - adbc_driver_sqlite.dll -> AdbcDriverSqliteInit /// - proprietary_driver.dll -> AdbcProprietaryDriverInit @@ -842,7 +1625,15 @@ AdbcStatusCode AdbcDatabaseSetOption(struct AdbcDatabase* database, const char* TempDatabase* args = reinterpret_cast(database->private_data); if (std::strcmp(key, "driver") == 0) { - args->driver = value; + std::string_view v{value}; + std::string::size_type pos = v.find("://"); + if (pos != std::string::npos) { + std::string_view d = v.substr(0, pos); + args->driver = std::string{d}; + args->options["uri"] = std::string{v}; + } else { + args->driver = value; + } } else if (std::strcmp(key, "entrypoint") == 0) { args->entrypoint = value; } else { @@ -889,6 +1680,35 @@ AdbcStatusCode AdbcDatabaseSetOptionDouble(struct AdbcDatabase* database, const return ADBC_STATUS_OK; } +AdbcStatusCode AdbcDriverManagerDatabaseSetLoadFlags(struct AdbcDatabase* database, + AdbcLoadFlags flags, + struct AdbcError* error) { + if (database->private_driver) { + SetError(error, "Cannot SetLoadFlags after AdbcDatabaseInit"); + return ADBC_STATUS_INVALID_STATE; + } + + TempDatabase* args = reinterpret_cast(database->private_data); + args->load_flags = flags; + return ADBC_STATUS_OK; +} + +AdbcStatusCode AdbcDriverManagerDatabaseSetAdditionalSearchPathList( + struct AdbcDatabase* database, const char* path_list, struct AdbcError* error) { + if (database->private_driver) { + SetError(error, "Cannot SetAdditionalSearchPathList after AdbcDatabaseInit"); + return ADBC_STATUS_INVALID_STATE; + } + + TempDatabase* args = reinterpret_cast(database->private_data); + if (path_list) { + args->additional_search_path_list.assign(path_list); + } else { + args->additional_search_path_list.clear(); + } + return ADBC_STATUS_OK; +} + AdbcStatusCode AdbcDriverManagerDatabaseSetInitFunc(struct AdbcDatabase* database, AdbcDriverInitFunc init_func, struct AdbcError* error) { @@ -921,14 +1741,17 @@ AdbcStatusCode AdbcDatabaseInit(struct AdbcDatabase* database, struct AdbcError* // So we don't confuse a driver into thinking it's initialized already database->private_data = nullptr; if (args->init_func) { - status = AdbcLoadDriverFromInitFunc(args->init_func, ADBC_VERSION_1_1_0, + status = AdbcLoadDriverFromInitFunc(args->init_func, ADBC_VERSION_1_2_0, database->private_driver, error); } else if (!args->entrypoint.empty()) { - status = AdbcLoadDriver(args->driver.c_str(), args->entrypoint.c_str(), - ADBC_VERSION_1_1_0, database->private_driver, error); + status = AdbcFindLoadDriver(args->driver.c_str(), args->entrypoint.c_str(), + ADBC_VERSION_1_2_0, args->load_flags, + args->additional_search_path_list.data(), + database->private_driver, error); } else { - status = AdbcLoadDriver(args->driver.c_str(), nullptr, ADBC_VERSION_1_1_0, - database->private_driver, error); + status = AdbcFindLoadDriver( + args->driver.c_str(), nullptr, ADBC_VERSION_1_2_0, args->load_flags, + args->additional_search_path_list.data(), database->private_driver, error); } if (status != ADBC_STATUS_OK) { @@ -1658,39 +2481,57 @@ const char* AdbcStatusCodeMessage(AdbcStatusCode code) { #undef CASE } -AdbcStatusCode AdbcLoadDriver(const char* driver_name, const char* entrypoint, - int version, void* raw_driver, struct AdbcError* error) { - AdbcDriverInitFunc init_func; +AdbcStatusCode AdbcFindLoadDriver(const char* driver_name, const char* entrypoint, + const int version, const AdbcLoadFlags load_options, + const char* additional_search_path_list, + void* raw_driver, struct AdbcError* error) { + AdbcDriverInitFunc init_func = nullptr; std::string error_message; switch (version) { case ADBC_VERSION_1_0_0: case ADBC_VERSION_1_1_0: + case ADBC_VERSION_1_2_0: break; default: - SetError(error, "Only ADBC 1.0.0 and 1.1.0 are supported"); + SetError(error, "Only ADBC 1.0.0, 1.1.0 and 1.2.0 are supported"); return ADBC_STATUS_NOT_IMPLEMENTED; } if (!raw_driver) { - SetError(error, "Must provide non-NULL raw_driver"); + SetError(error, "Driver pointer is null"); + return ADBC_STATUS_INVALID_ARGUMENT; + } + if (!driver_name) { + SetError(error, "Driver name is null"); return ADBC_STATUS_INVALID_ARGUMENT; } - auto* driver = reinterpret_cast(raw_driver); ManagedLibrary library; - AdbcStatusCode status = library.Load(driver_name, error); + DriverInfo info; + if (entrypoint) { + info.entrypoint = entrypoint; + } + + std::vector additional_paths; + if (additional_search_path_list) { + additional_paths = InternalAdbcParsePath(additional_search_path_list); + } + + auto* driver = reinterpret_cast(raw_driver); + + AdbcStatusCode status = + library.GetDriverInfo(driver_name, load_options, additional_paths, info, error); if (status != ADBC_STATUS_OK) { - // AdbcDatabaseInit tries to call this if set driver->release = nullptr; return status; } void* load_handle = nullptr; - if (entrypoint) { - status = library.Lookup(entrypoint, &load_handle, error); + if (!info.entrypoint.empty()) { + status = library.Lookup(info.entrypoint.c_str(), &load_handle, error); } else { - auto name = AdbcDriverManagerDefaultEntrypoint(driver_name); + auto name = InternalAdbcDriverManagerDefaultEntrypoint(info.lib_path.string()); status = library.Lookup(name.c_str(), &load_handle, error); if (status != ADBC_STATUS_OK) { status = library.Lookup(kDefaultEntrypoint, &load_handle, error); @@ -1716,11 +2557,22 @@ AdbcStatusCode AdbcLoadDriver(const char* driver_name, const char* entrypoint, return status; } +AdbcStatusCode AdbcLoadDriver(const char* driver_name, const char* entrypoint, + int version, void* raw_driver, struct AdbcError* error) { + // maintain old behavior of allowing relative paths (because dlopen allows it) + // but don't enable searching for manifests by default. It will need to be explicitly + // enabled by calling AdbcFindLoadDriver directly. + return AdbcFindLoadDriver(driver_name, entrypoint, version, + ADBC_LOAD_FLAG_ALLOW_RELATIVE_PATHS, nullptr, raw_driver, + error); +} + AdbcStatusCode AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int version, void* raw_driver, struct AdbcError* error) { constexpr std::array kSupportedVersions = { ADBC_VERSION_1_1_0, ADBC_VERSION_1_0_0, + ADBC_VERSION_1_2_0, }; if (!raw_driver) { @@ -1731,9 +2583,10 @@ AdbcStatusCode AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int vers switch (version) { case ADBC_VERSION_1_0_0: case ADBC_VERSION_1_1_0: + case ADBC_VERSION_1_2_0: break; default: - SetError(error, "Only ADBC 1.0.0 and 1.1.0 are supported"); + SetError(error, "Only ADBC 1.0.0, 1.1.0 and 1.2.0 are supported"); return ADBC_STATUS_NOT_IMPLEMENTED; } @@ -1826,6 +2679,10 @@ AdbcStatusCode AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int vers FILL_DEFAULT(driver, StatementSetOptionDouble); FILL_DEFAULT(driver, StatementSetOptionInt); } + if (version >= ADBC_VERSION_1_2_0) { + auto* driver = reinterpret_cast(raw_driver); + FILL_DEFAULT(driver, StatementNextResult); + } return ADBC_STATUS_OK; diff --git a/c/driver_manager/adbc_driver_manager_test.cc b/c/driver_manager/adbc_driver_manager_test.cc index c2342ebae2..18f5f74891 100644 --- a/c/driver_manager/adbc_driver_manager_test.cc +++ b/c/driver_manager/adbc_driver_manager_test.cc @@ -15,19 +15,30 @@ // specific language governing permissions and limitations // under the License. +#if defined(_WIN32) +#include +#endif + #include #include -#include +#include +#include +#include // NOLINT [build/c++17] +#include #include +#include #include #include "arrow-adbc/adbc.h" #include "arrow-adbc/adbc_driver_manager.h" +#include "current_arch.h" #include "validation/adbc_validation.h" #include "validation/adbc_validation_util.h" -std::string AdbcDriverManagerDefaultEntrypoint(const std::string& filename); +std::string InternalAdbcDriverManagerDefaultEntrypoint(const std::string& filename); +std::vector InternalAdbcParsePath(const std::string_view path); +std::filesystem::path InternalAdbcUserConfigDir(); // Tests of the SQLite example driver, except using the driver manager @@ -299,7 +310,7 @@ class SqliteStatementTest : public ::testing::Test, }; ADBCV_TEST_STATEMENT(SqliteStatementTest) -TEST(AdbcDriverManagerInternal, AdbcDriverManagerDefaultEntrypoint) { +TEST(AdbcDriverManagerInternal, InternalAdbcDriverManagerDefaultEntrypoint) { for (const auto& driver : { "adbc_driver_sqlite", "adbc_driver_sqlite.dll", @@ -312,7 +323,8 @@ TEST(AdbcDriverManagerInternal, AdbcDriverManagerDefaultEntrypoint) { "C:\\System32\\adbc_driver_sqlite.dll", }) { SCOPED_TRACE(driver); - EXPECT_EQ("AdbcDriverSqliteInit", ::AdbcDriverManagerDefaultEntrypoint(driver)); + EXPECT_EQ("AdbcDriverSqliteInit", + ::InternalAdbcDriverManagerDefaultEntrypoint(driver)); } for (const auto& driver : { @@ -322,7 +334,7 @@ TEST(AdbcDriverManagerInternal, AdbcDriverManagerDefaultEntrypoint) { "C:\\System32\\sqlite.dll", }) { SCOPED_TRACE(driver); - EXPECT_EQ("AdbcSqliteInit", ::AdbcDriverManagerDefaultEntrypoint(driver)); + EXPECT_EQ("AdbcSqliteInit", ::InternalAdbcDriverManagerDefaultEntrypoint(driver)); } for (const auto& driver : { @@ -332,7 +344,721 @@ TEST(AdbcDriverManagerInternal, AdbcDriverManagerDefaultEntrypoint) { "C:\\System32\\proprietary_engine.dll", }) { SCOPED_TRACE(driver); - EXPECT_EQ("AdbcProprietaryEngineInit", ::AdbcDriverManagerDefaultEntrypoint(driver)); + EXPECT_EQ("AdbcProprietaryEngineInit", + ::InternalAdbcDriverManagerDefaultEntrypoint(driver)); + } + + for (const auto& driver : { + "driver_example", + "libdriver_example.so", + }) { + SCOPED_TRACE(driver); + EXPECT_EQ("AdbcDriverExampleInit", + ::InternalAdbcDriverManagerDefaultEntrypoint(driver)); + } +} + +TEST(AdbcDriverManagerInternal, InternalAdbcParsePath) { + // Test parsing a path of directories +#ifdef _WIN32 + static const char* const delimiter = ";"; +#else + static const char* const delimiter = ":"; +#endif + + std::vector paths = { + "/usr/lib/adbc/drivers", "/usr/local/lib/adbc/drivers", + "/opt/adbc/drivers", "/home/user/.config/adbc/drivers", +#ifdef _WIN32 + "/home/\":foo:\"/bar", +#endif + }; + + std::ostringstream joined; + std::copy(paths.begin(), paths.end(), + std::ostream_iterator(joined, delimiter)); + + auto output = InternalAdbcParsePath(joined.str()); + EXPECT_THAT(output, ::testing::ElementsAreArray(paths)); +} + +class DriverManifest : public ::testing::Test { + public: + void SetUp() override { + std::memset(&driver, 0, sizeof(driver)); + std::memset(&error, 0, sizeof(error)); + +#ifndef ADBC_DRIVER_MANAGER_TEST_LIB + GTEST_SKIP() << "ADBC_DRIVER_MANAGER_TEST_LIB is not defined. " + "This test requires a driver library to be specified."; +#else + driver_path = std::filesystem::path(ADBC_DRIVER_MANAGER_TEST_LIB); + if (!std::filesystem::exists(driver_path)) { + GTEST_SKIP() << "Driver library does not exist: " << driver_path; + } + + simple_manifest = toml::table{ + {"name", "SQLite3"}, + {"publisher", "arrow-adbc"}, + {"version", "X.Y.Z"}, + {"ADBC", + toml::table{ + {"version", "1.1.0"}, + }}, + {"Driver", + toml::table{ + {"shared", + toml::table{ + {adbc::CurrentArch(), driver_path.string()}, + }}, + }}, + }; + + temp_dir = std::filesystem::temp_directory_path() / "adbc_driver_manager_test"; + std::filesystem::create_directories(temp_dir); +#endif + } + + void TearDown() override { + if (error.release) { + error.release(&error); + } + + if (driver.release) { + ASSERT_THAT(driver.release(&driver, &error), IsOkStatus(&error)); + ASSERT_EQ(driver.private_data, nullptr); + ASSERT_EQ(driver.private_manager, nullptr); + } + + driver_path.clear(); + if (std::filesystem::exists(temp_dir)) { + std::filesystem::remove_all(temp_dir); + } } + + protected: + void SetConfigPath(const char* path) { +#ifdef _WIN32 + ASSERT_TRUE(SetEnvironmentVariable("ADBC_DRIVER_PATH", path)); +#else + setenv("ADBC_DRIVER_PATH", path, 1); +#endif + } + + void UnsetConfigPath() { SetConfigPath(""); } + + struct AdbcDriver driver = {}; + struct AdbcError error = {}; + + std::filesystem::path driver_path; + std::filesystem::path temp_dir; + toml::table simple_manifest; +}; + +TEST_F(DriverManifest, LoadDriverEnv) { + ASSERT_THAT(AdbcFindLoadDriver("sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + std::ofstream test_manifest_file(temp_dir / "sqlite.toml"); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + SetConfigPath(temp_dir.string().c_str()); + + ASSERT_THAT(AdbcFindLoadDriver("sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove(temp_dir / "sqlite.toml")); + + UnsetConfigPath(); +} + +TEST_F(DriverManifest, LoadNonAsciiPath) { + ASSERT_THAT(AdbcFindLoadDriver("sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + Not(IsOkStatus(&error))); + +#ifdef _WIN32 + std::filesystem::path non_ascii_dir = temp_dir / L"majestik møøse"; +#else + std::filesystem::path non_ascii_dir = temp_dir / "majestik møøse"; +#endif + + ASSERT_TRUE(std::filesystem::create_directories(non_ascii_dir)); + + std::ofstream test_manifest_file(non_ascii_dir / "sqlite.toml"); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + SetConfigPath(non_ascii_dir.string().c_str()); + + ASSERT_THAT(AdbcFindLoadDriver("sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove(non_ascii_dir / "sqlite.toml")); + + UnsetConfigPath(); +} + +TEST_F(DriverManifest, DisallowEnvConfig) { + std::ofstream test_manifest_file(temp_dir / "sqlite.toml"); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + SetConfigPath(temp_dir.string().c_str()); + + auto load_options = ADBC_LOAD_FLAG_DEFAULT & ~ADBC_LOAD_FLAG_SEARCH_ENV; + ASSERT_THAT(AdbcFindLoadDriver("sqlite", nullptr, ADBC_VERSION_1_1_0, load_options, + nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + ASSERT_TRUE(std::filesystem::remove(temp_dir / "sqlite.toml")); + + UnsetConfigPath(); +} + +TEST_F(DriverManifest, ConfigEntrypoint) { + auto manifest_with_bad_entrypoint = simple_manifest; + // Override the entrypoint in the manifest + manifest_with_bad_entrypoint.erase("Driver"); + manifest_with_bad_entrypoint.insert( + "Driver", toml::table{ + {"entrypoint", "BadEntrypointSymbolName"}, + {"shared", + toml::table{ + {adbc::CurrentArch(), driver_path.string()}, + }}, + }); + + auto filepath = temp_dir / "sqlite.toml"; + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_with_bad_entrypoint; + test_manifest_file.close(); + + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, LoadAbsolutePath) { + auto filepath = temp_dir / "sqlite.toml"; + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, LoadAbsolutePathNoExtension) { + auto filepath = temp_dir / "sqlite.toml"; + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + auto noext = filepath; + noext.replace_extension(); // Remove the .toml extension + ASSERT_THAT(AdbcFindLoadDriver(noext.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, LoadRelativePath) { + std::ofstream test_manifest_file("sqlite.toml"); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + ASSERT_THAT(AdbcFindLoadDriver("sqlite.toml", nullptr, ADBC_VERSION_1_1_0, 0, nullptr, + &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT( + AdbcFindLoadDriver("sqlite.toml", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_ALLOW_RELATIVE_PATHS, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove("sqlite.toml")); } + +TEST_F(DriverManifest, NotFound) { + ASSERT_THAT(AdbcFindLoadDriver("nosuchdriver", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_NOT_FOUND, &error)); + ASSERT_THAT(error.message, + ::testing::HasSubstr("Also searched these paths for manifests:\n\tnot " + "set: ADBC_DRIVER_PATH")); +} + +TEST_F(DriverManifest, ManifestDriverMissing) { + // Create a manifest without the "Driver" section + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT(error.message, ::testing::HasSubstr("Driver path not defined in manifest")); + ASSERT_THAT(error.message, + ::testing::HasSubstr("`Driver.shared` must be a string or table")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestDriverMissingAdbcDatabase) { + // Similar test as above but with AdbcDatabaseInit path and using the + // additional search path. + // Create a manifest without the "Driver" section + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + adbc_validation::Handle database; + ASSERT_THAT(AdbcDatabaseNew(&database.value, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseSetOption(&database.value, "driver", "sqlite", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDriverManagerDatabaseSetLoadFlags(&database.value, + ADBC_LOAD_FLAG_DEFAULT, &error), + IsOkStatus(&error)); + std::string search_path = temp_dir.string(); + ASSERT_THAT(AdbcDriverManagerDatabaseSetAdditionalSearchPathList( + &database.value, search_path.data(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseInit(&database.value, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + ASSERT_THAT(error.message, ::testing::HasSubstr("Driver path not defined in manifest")); + ASSERT_THAT(error.message, + ::testing::HasSubstr("`Driver.shared` must be a string or table")); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestDriverInvalid) { + // "Driver" section is not a table + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", toml::table{{"shared", true}}); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT(error.message, ::testing::HasSubstr("Driver path not defined in manifest")); + ASSERT_THAT(error.message, + ::testing::HasSubstr("`Driver.shared` must be a string or table")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestDriverEmpty) { + // "Driver" section is not a table + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", toml::table{{"shared", ""}}); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT(error.message, + ::testing::HasSubstr("Driver path is an empty string in manifest")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestWrongArch) { + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", + toml::table{ + {"shared", + toml::table{ + {"non-existent", "path/to/bad/driver.so"}, + {"windows-alpha64", "path/to/bad/driver.so"}, + }}, + }); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_NOT_FOUND, &error)); + + ASSERT_THAT(error.message, ::testing::HasSubstr("Driver path not found in manifest")); + ASSERT_THAT(error.message, + ::testing::HasSubstr("Architectures found: non-existent windows-alpha64")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestDriverMissingArchAdbcDatabase) { + // Similar test as above but with AdbcDatabaseInit path and using the + // additional search path. + // Create a manifest without the "Driver" section + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", + toml::table{ + {"shared", + toml::table{ + {"non-existent", "path/to/bad/driver.so"}, + {"windows-alpha64", "path/to/bad/driver.so"}, + }}, + }); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + adbc_validation::Handle database; + ASSERT_THAT(AdbcDatabaseNew(&database.value, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseSetOption(&database.value, "driver", "sqlite", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDriverManagerDatabaseSetLoadFlags(&database.value, + ADBC_LOAD_FLAG_DEFAULT, &error), + IsOkStatus(&error)); + std::string search_path = temp_dir.string(); + ASSERT_THAT(AdbcDriverManagerDatabaseSetAdditionalSearchPathList( + &database.value, search_path.data(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseInit(&database.value, &error), + IsStatus(ADBC_STATUS_NOT_FOUND, &error)); + ASSERT_THAT(error.message, ::testing::HasSubstr("sqlite.toml but:")); + ASSERT_THAT(error.message, + ::testing::HasSubstr("Architectures found: non-existent windows-alpha64")); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestDriverPointsNowhere) { + // Similar test as above but with AdbcDatabaseInit path and using the + // additional search path. + // Create a manifest without the "Driver" section + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + // The idea is that we can find the manifest, but not the driver it points to. + manifest_without_driver.insert("Driver", toml::table{ + {"shared", + toml::table{ + {"linux_arm64", "adbc-goosedb"}, + {"linux_amd64", "adbc-goosedb"}, + {"macos_arm64", "adbc-goosedb"}, + {"macos_amd64", "adbc-goosedb"}, + {"windows_arm64", "adbc-goosedb"}, + {"windows_amd64", "adbc-goosedb"}, + }}, + }); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + adbc_validation::Handle database; + ASSERT_THAT(AdbcDatabaseNew(&database.value, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseSetOption(&database.value, "driver", "sqlite", &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDriverManagerDatabaseSetLoadFlags(&database.value, + ADBC_LOAD_FLAG_DEFAULT, &error), + IsOkStatus(&error)); + std::string search_path = temp_dir.string(); + ASSERT_THAT(AdbcDriverManagerDatabaseSetAdditionalSearchPathList( + &database.value, search_path.data(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseInit(&database.value, &error), + IsStatus(ADBC_STATUS_NOT_FOUND, &error)); + ASSERT_THAT(error.message, ::testing::HasSubstr("sqlite.toml but:")); + // Message is platform-specific but something like "dlopen() failed: + // adbc-goosedb: cannot open shared object file..." + ASSERT_THAT(error.message, ::testing::HasSubstr("adbc-goosedb")); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestArchPathEmpty) { + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", toml::table{ + {"shared", + toml::table{ + {"linux_arm64", ""}, + {"linux_amd64", ""}, + {"macos_arm64", ""}, + {"macos_amd64", ""}, + {"windows_arm64", ""}, + {"windows_amd64", ""}, + }}, + }); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT(error.message, + ::testing::HasSubstr("Driver path is an empty string in manifest")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestArchPathInvalid) { + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", toml::table{ + {"shared", + toml::table{ + {"linux_arm64", 42}, + {"linux_amd64", 42}, + {"macos_arm64", 42}, + {"macos_amd64", 42}, + {"windows_arm64", 42}, + {"windows_amd64", 42}, + }}, + }); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT(error.message, ::testing::HasSubstr("Driver path not found in manifest")); + ASSERT_THAT(error.message, ::testing::HasSubstr("Value was not a string")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestEntrypointInvalid) { + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_without_driver = simple_manifest; + manifest_without_driver.erase("Driver"); + manifest_without_driver.insert("Driver", toml::table{ + {"shared", "foobar"}, + {"entrypoint", 42}, + }); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_without_driver; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_THAT(error.message, + ::testing::HasSubstr("Driver entrypoint not a string in manifest")); + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +TEST_F(DriverManifest, ManifestBadVersion) { + auto filepath = temp_dir / "sqlite.toml"; + toml::table manifest_with_bad_version = simple_manifest; + manifest_with_bad_version.insert("manifest_version", 2); + + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << manifest_with_bad_version; + test_manifest_file.close(); + + // Attempt to load the driver + ASSERT_THAT(AdbcFindLoadDriver(filepath.string().data(), nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsStatus(ADBC_STATUS_INVALID_ARGUMENT, &error)); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + +// only build and run test that puts files in the users home directory if +// it's been enabled via the build system setting this compile def +#ifdef ADBC_DRIVER_MANAGER_TEST_MANIFEST_USER_LEVEL +TEST_F(DriverManifest, LoadUserLevelManifest) { + ASSERT_THAT(AdbcFindLoadDriver("adbc-test-sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + auto user_config_dir = InternalAdbcUserConfigDir(); + bool created = false; + if (!std::filesystem::exists(user_config_dir)) { + ASSERT_TRUE(std::filesystem::create_directories(user_config_dir)); + created = true; + } + + std::ofstream test_manifest_file(user_config_dir / "adbc-test-sqlite.toml"); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + // fail to load if flag doesn't have ADBC_LOAD_FLAG_SEARCH_USER + ASSERT_THAT(AdbcFindLoadDriver("adbc-test-sqlite", nullptr, ADBC_VERSION_1_1_0, 0, + nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + // succeed with default load options + ASSERT_THAT(AdbcFindLoadDriver("adbc-test-sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove(user_config_dir / "adbc-test-sqlite.toml")); + if (created) { + std::filesystem::remove_all(user_config_dir); + } +} +#endif + +// only build and run test that creates / adds a file to /etc/adbc/drivers if +// it's been enabled via the build system setting this compile def +#ifdef ADBC_DRIVER_MANAGER_TEST_MANIFEST_SYSTEM_LEVEL +TEST_F(DriverManifest, LoadSystemLevelManifest) { + ASSERT_THAT(AdbcFindLoadDriver("adbc-test-sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + auto system_config_dir = std::filesystem::path("/etc/adbc/drivers"); + bool created = false; + if (!std::filesystem::exists(system_config_dir)) { + ASSERT_TRUE(std::filesystem::create_directories(system_config_dir)); + created = true; + } + + std::ofstream test_manifest_file(system_config_dir / "adbc-test-sqlite.toml"); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + + // fail to load if flag doesn't have ADBC_LOAD_FLAG_SEARCH_SYSTEM + ASSERT_THAT(AdbcFindLoadDriver("adbc-test-sqlite", nullptr, ADBC_VERSION_1_1_0, 0, + nullptr, &driver, &error), + Not(IsOkStatus(&error))); + + // succeed with default load options + ASSERT_THAT(AdbcFindLoadDriver("adbc-test-sqlite", nullptr, ADBC_VERSION_1_1_0, + ADBC_LOAD_FLAG_DEFAULT, nullptr, &driver, &error), + IsOkStatus(&error)); + + ASSERT_TRUE(std::filesystem::remove(system_config_dir / "adbc-test-sqlite.toml")); + if (created) { + std::filesystem::remove_all(system_config_dir); + } +} +#endif + +TEST_F(DriverManifest, CondaPrefix) { +#if ADBC_CONDA_BUILD + constexpr bool is_conda_build = true; +#else + constexpr bool is_conda_build = false; +#endif // ADBC_CONDA_BUILD + + std::cerr << "ADBC_CONDA_BUILD: " << (is_conda_build ? "defined" : "not defined") + << std::endl; + + auto filepath = temp_dir / "etc" / "adbc" / "drivers" / "sqlite.toml"; + std::filesystem::create_directories(filepath.parent_path()); + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << simple_manifest; + test_manifest_file.close(); + +#ifdef _WIN32 + ASSERT_EQ(0, ::_wputenv_s(L"CONDA_PREFIX", temp_dir.native().c_str())); +#else + ASSERT_EQ(0, ::setenv("CONDA_PREFIX", temp_dir.native().c_str(), 1)); +#endif // _WIN32 + + AdbcStatusCode result = + AdbcFindLoadDriver("sqlite", nullptr, ADBC_VERSION_1_1_0, ADBC_LOAD_FLAG_DEFAULT, + nullptr, &driver, &error); + + if constexpr (is_conda_build) { + ASSERT_THAT(result, IsOkStatus(&error)); + } else { + ASSERT_THAT(result, IsStatus(ADBC_STATUS_NOT_FOUND, &error)); + ASSERT_THAT(error.message, + ::testing::HasSubstr("not enabled at build time: Conda prefix")); + } +} + +TEST_F(DriverManifest, ImplicitUri) { + auto filepath = temp_dir / "postgresql.toml"; + std::ofstream test_manifest_file(filepath); + ASSERT_TRUE(test_manifest_file.is_open()); + test_manifest_file << R"([Driver] +shared = "adbc_driver_postgresql")"; + test_manifest_file.close(); + + // Should attempt to load the "postgresql" driver by inferring from the URI + std::string uri = "postgresql://a:b@localhost:9999/nonexistent"; + adbc_validation::Handle database; + ASSERT_THAT(AdbcDatabaseNew(&database.value, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseSetOption(&database.value, "driver", uri.c_str(), &error), + IsOkStatus(&error)); + std::string search_path = temp_dir.string(); + ASSERT_THAT(AdbcDriverManagerDatabaseSetAdditionalSearchPathList( + &database.value, search_path.data(), &error), + IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseInit(&database.value, &error), + IsStatus(ADBC_STATUS_IO, &error)); + ASSERT_THAT(error.message, ::testing::HasSubstr("Failed to connect")); + + ASSERT_TRUE(std::filesystem::remove(filepath)); +} + } // namespace adbc diff --git a/c/driver_manager/current_arch.h b/c/driver_manager/current_arch.h new file mode 100644 index 0000000000..fc7167c205 --- /dev/null +++ b/c/driver_manager/current_arch.h @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#if defined(_WIN32) +#define ADBC_LITTLE_ENDIAN 1 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#elif !defined(_AIX) +#include +#endif +#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) +#define ADBC_LITTLE_ENDIAN 1 +#else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define ADBC_LITTLE_ENDIAN 1 +#else +#define ADBC_LITTLE_ENDIAN 0 +#endif +#endif +#endif + +namespace adbc { + +const std::string& CurrentArch() { +#if defined(_WIN32) + static const std::string platform = "windows"; +#elif defined(__APPLE__) + static const std::string platform = "macos"; +#elif defined(__FreeBSD__) + static const std::string platform = "freebsd"; +#elif defined(__OpenBSD__) + static const std::string platform = "openbsd"; +#elif defined(__linux__) + static const std::string platform = "linux"; +#else + static const std::string platform = "unknown"; +#endif + +#if defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) || defined(_M_AMD64) + static const std::string arch = "amd64"; +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__ARM_ARCH_ISA_A64) +#ifdef ADBC_LITTLE_ENDIAN + static const std::string arch = "arm64"; +#else + static const std::string arch = "arm64be"; +#endif +#elif defined(__i386__) || defined(_M_IX86) || defined(_M_X86) + static const std::string arch = "x86"; +#elif defined(__arm__) || defined(_M_ARM) +#ifdef ADBC_LITTLE_ENDIAN + static const std::string arch = "arm"; +#else + static const std::string arch = "armbe"; +#endif +#elif defined(__riscv) || defined(_M_RISCV) +#if defined(__riscv_xlen) && __riscv_xlen == 64 + static const std::string arch = "riscv64"; +#else + static const std::string arch = "riscv"; +#endif +#elif defined(__ppc64__) || defined(__powerpc64__) +#ifdef ADBC_LITTLE_ENDIAN + static const std::string arch = "powerpc64le"; +#else + static const std::string arch = "powerpc64"; +#endif +#elif defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) + static const std::string arch = "powerpc"; +#elif defined(__s390x__) || defined(_M_S390) + static const std::string arch = "s390x"; +#elif defined(__sparc__) || defined(__sparc) +#if defined(_LP64) || defined(__LP64__) + static const std::string arch = "sparc64"; +#else + static const std::string arch = "sparc"; +#endif +#elif defined(__wasm32__) + static const std::string arch = "wasm32"; +#elif defined(__wasm64__) + static const std::string arch = "wasm64"; +#else + static const std::string arch = "unknown"; +#endif + +// musl doesn't actually define any preprocessor macro for itself +// but apparently it doesn't define __USE_GNU inside of features.h +// while gcc DOES define that. +// see https://stackoverflow.com/questions/58177815/how-to-actually-detect-musl-libc +#if defined(_WIN32) || defined(__APPLE__) || defined(__FreeBSD__) +#else +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#include // NOLINT [build/include] +#ifndef __USE_GNU +#define __MUSL__ +#endif +#undef _GNU_SOURCE /* don't contaminate other includes unnecessarily */ +#else +#include // NOLINT [build/include] +#ifndef __USE_GNU +#define __MUSL__ +#endif +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) + static const std::string target = "_mingw"; +#elif defined(__MUSL__) + static const std::string target = "_musl"; +#else + static const std::string target = ""; +#endif + + static const std::string result = platform + "_" + arch + target; + return result; +} + +} // namespace adbc diff --git a/c/driver_manager/meson.build b/c/driver_manager/meson.build index 6be37f96f4..5225e2cb64 100644 --- a/c/driver_manager/meson.build +++ b/c/driver_manager/meson.build @@ -15,12 +15,20 @@ # specific language governing permissions and limitations # under the License. +if host_machine.system() != 'windows' + dl_dep = dependency('dl') + deps = [dl_dep] +else + dl_dep = declare_dependency() + deps = [dependency('uuid'), dependency('shell32'), dependency('advapi32')] +endif adbc_driver_manager_lib = library( 'adbc_driver_manager', 'adbc_driver_manager.cc', - include_directories: [include_dir], + include_directories: [include_dir, vendor_dir], install: true, + dependencies: deps, ) pkg.generate( @@ -30,3 +38,8 @@ pkg.generate( libraries: [adbc_driver_manager_lib], filebase: 'adbc-driver-manager', ) + +adbc_driver_manager_dep = declare_dependency( + include_directories: include_dir, + link_with: adbc_driver_manager_lib, +) diff --git a/c/include/arrow-adbc/adbc.h b/c/include/arrow-adbc/adbc.h index b965672e6f..be51cb6cf4 100644 --- a/c/include/arrow-adbc/adbc.h +++ b/c/include/arrow-adbc/adbc.h @@ -177,28 +177,34 @@ typedef uint8_t AdbcStatusCode; /// \brief No error. #define ADBC_STATUS_OK 0 + /// \brief An unknown error occurred. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_UNKNOWN 1 + /// \brief The operation is not implemented or supported. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_NOT_IMPLEMENTED 2 + /// \brief A requested resource was not found. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_NOT_FOUND 3 + /// \brief A requested resource already exists. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_ALREADY_EXISTS 4 + /// \brief The arguments are invalid, likely a programming error. /// /// For instance, they may be of the wrong format, or out of range. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_INVALID_ARGUMENT 5 + /// \brief The preconditions for the operation are not met, likely a /// programming error. /// @@ -207,6 +213,7 @@ typedef uint8_t AdbcStatusCode; /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_INVALID_STATE 6 + /// \brief Invalid data was processed (not a programming error). /// /// For instance, a division by zero may have occurred during query @@ -214,6 +221,7 @@ typedef uint8_t AdbcStatusCode; /// /// May indicate a database-side error only. #define ADBC_STATUS_INVALID_DATA 7 + /// \brief The database's integrity was affected. /// /// For instance, a foreign key check may have failed, or a uniqueness @@ -221,28 +229,34 @@ typedef uint8_t AdbcStatusCode; /// /// May indicate a database-side error only. #define ADBC_STATUS_INTEGRITY 8 + /// \brief An error internal to the driver or database occurred. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_INTERNAL 9 + /// \brief An I/O error occurred. /// /// For instance, a remote service may be unavailable. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_IO 10 + /// \brief The operation was cancelled, not due to a timeout. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_CANCELLED 11 + /// \brief The operation was cancelled due to a timeout. /// /// May indicate a driver-side or database-side error. #define ADBC_STATUS_TIMEOUT 12 + /// \brief Authentication failed. /// /// May indicate a database-side error only. #define ADBC_STATUS_UNAUTHENTICATED 13 + /// \brief The client is not authorized to perform the given operation. /// /// May indicate a database-side error only. @@ -288,7 +302,7 @@ struct ADBC_EXPORT AdbcError { /// /// This field may not be used unless vendor_code is /// ADBC_ERROR_VENDOR_CODE_PRIVATE_DATA. If present, this field is NULLPTR - /// iff the error is unintialized/freed. + /// iff the error is uninitialized/freed. /// /// \since ADBC API revision 1.1.0 void* private_data; @@ -331,6 +345,7 @@ struct ADBC_EXPORT AdbcError { /// /// \since ADBC API revision 1.1.0 #define ADBC_ERROR_1_0_0_SIZE (offsetof(struct AdbcError, private_data)) + /// \brief The size of the AdbcError structure in ADBC 1.1.0. /// /// Drivers written for ADBC 1.1.0 and later should never touch more than this @@ -408,10 +423,19 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// \since ADBC API revision 1.1.0 #define ADBC_VERSION_1_1_0 1001000 +/// \brief ADBC revision 1.2.0. +/// +/// When passed to an AdbcDriverInitFunc(), the driver parameter must +/// point to an AdbcDriver. +/// +/// \since ADBC API revision 1.2.0 +#define ADBC_VERSION_1_2_0 1002000 + /// \brief Canonical option value for enabling an option. /// /// For use as the value in SetOption calls. #define ADBC_OPTION_VALUE_ENABLED "true" + /// \brief Canonical option value for disabling an option. /// /// For use as the value in SetOption calls. @@ -426,6 +450,7 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// /// \since ADBC API revision 1.1.0 #define ADBC_OPTION_URI "uri" + /// \brief Canonical option name for usernames. /// /// Should be used as the expected option name to specify a username @@ -435,6 +460,7 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// /// \since ADBC API revision 1.1.0 #define ADBC_OPTION_USERNAME "username" + /// \brief Canonical option name for passwords. /// /// Should be used as the expected option name to specify a password @@ -450,28 +476,34 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_VENDOR_NAME 0 + /// \brief The database vendor/product version (type: utf8). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_VENDOR_VERSION 1 + /// \brief The database vendor/product Arrow library version (type: /// utf8). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_VENDOR_ARROW_VERSION 2 + /// \brief Indicates whether SQL queries are supported (type: bool). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_VENDOR_SQL 3 + /// \brief Indicates whether Substrait queries are supported (type: bool). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_VENDOR_SUBSTRAIT 4 + /// \brief The minimum supported Substrait version, or null if /// Substrait is not supported (type: utf8). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_VENDOR_SUBSTRAIT_MIN_VERSION 5 + /// \brief The maximum supported Substrait version, or null if /// Substrait is not supported (type: utf8). /// @@ -482,14 +514,17 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_DRIVER_NAME 100 + /// \brief The driver version (type: utf8). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_DRIVER_VERSION 101 + /// \brief The driver Arrow library version (type: utf8). /// /// \see AdbcConnectionGetInfo #define ADBC_INFO_DRIVER_ARROW_VERSION 102 + /// \brief The driver ADBC API version (type: int64). /// /// The value should be one of the ADBC_VERSION constants. @@ -498,24 +533,29 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// \see AdbcConnectionGetInfo /// \see ADBC_VERSION_1_0_0 /// \see ADBC_VERSION_1_1_0 +/// \see ADBC_VERSION_1_2_0 #define ADBC_INFO_DRIVER_ADBC_VERSION 103 /// \brief Return metadata on catalogs, schemas, tables, and columns. /// /// \see AdbcConnectionGetObjects #define ADBC_OBJECT_DEPTH_ALL 0 + /// \brief Return metadata on catalogs only. /// /// \see AdbcConnectionGetObjects #define ADBC_OBJECT_DEPTH_CATALOGS 1 + /// \brief Return metadata on catalogs and schemas. /// /// \see AdbcConnectionGetObjects #define ADBC_OBJECT_DEPTH_DB_SCHEMAS 2 + /// \brief Return metadata on catalogs, schemas, and tables. /// /// \see AdbcConnectionGetObjects #define ADBC_OBJECT_DEPTH_TABLES 3 + /// \brief Return metadata on catalogs, schemas, tables, and columns. /// /// \see AdbcConnectionGetObjects @@ -527,42 +567,55 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// \brief The dictionary-encoded name of the average byte width statistic. #define ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_KEY 0 + /// \brief The average byte width statistic. The average size in bytes of a /// row in the column. Value type is float64. /// /// For example, this is roughly the average length of a string for a string /// column. #define ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_NAME "adbc.statistic.byte_width" + /// \brief The dictionary-encoded name of the distinct value count statistic. #define ADBC_STATISTIC_DISTINCT_COUNT_KEY 1 + /// \brief The distinct value count (NDV) statistic. The number of distinct /// values in the column. Value type is int64 (when not approximate) or /// float64 (when approximate). #define ADBC_STATISTIC_DISTINCT_COUNT_NAME "adbc.statistic.distinct_count" + /// \brief The dictionary-encoded name of the max byte width statistic. #define ADBC_STATISTIC_MAX_BYTE_WIDTH_KEY 2 + /// \brief The max byte width statistic. The maximum size in bytes of a row /// in the column. Value type is int64 (when not approximate) or float64 /// (when approximate). /// /// For example, this is the maximum length of a string for a string column. #define ADBC_STATISTIC_MAX_BYTE_WIDTH_NAME "adbc.statistic.max_byte_width" + /// \brief The dictionary-encoded name of the max value statistic. #define ADBC_STATISTIC_MAX_VALUE_KEY 3 + /// \brief The max value statistic. Value type is column-dependent. #define ADBC_STATISTIC_MAX_VALUE_NAME "adbc.statistic.max_value" + /// \brief The dictionary-encoded name of the min value statistic. #define ADBC_STATISTIC_MIN_VALUE_KEY 4 + /// \brief The min value statistic. Value type is column-dependent. #define ADBC_STATISTIC_MIN_VALUE_NAME "adbc.statistic.min_value" + /// \brief The dictionary-encoded name of the null count statistic. #define ADBC_STATISTIC_NULL_COUNT_KEY 5 + /// \brief The null count statistic. The number of values that are null in /// the column. Value type is int64 (when not approximate) or float64 /// (when approximate). #define ADBC_STATISTIC_NULL_COUNT_NAME "adbc.statistic.null_count" + /// \brief The dictionary-encoded name of the row count statistic. #define ADBC_STATISTIC_ROW_COUNT_KEY 6 + /// \brief The row count statistic. The number of rows in the column or /// table. Value type is int64 (when not approximate) or float64 (when /// approximate). @@ -753,33 +806,41 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// /// The type is char*. #define ADBC_INGEST_OPTION_TARGET_TABLE "adbc.ingest.target_table" + /// \brief Whether to create (the default) or append. /// /// The type is char*. #define ADBC_INGEST_OPTION_MODE "adbc.ingest.mode" + /// \brief Create the table and insert data; error if the table exists. #define ADBC_INGEST_OPTION_MODE_CREATE "adbc.ingest.mode.create" + /// \brief Do not create the table, and insert data; error if the /// table does not exist (ADBC_STATUS_NOT_FOUND) or does not match /// the schema of the data to append (ADBC_STATUS_ALREADY_EXISTS). #define ADBC_INGEST_OPTION_MODE_APPEND "adbc.ingest.mode.append" + /// \brief Create the table and insert data; drop the original table /// if it already exists. /// \since ADBC API revision 1.1.0 #define ADBC_INGEST_OPTION_MODE_REPLACE "adbc.ingest.mode.replace" + /// \brief Insert data; create the table if it does not exist, or /// error if the table exists, but the schema does not match the /// schema of the data to append (ADBC_STATUS_ALREADY_EXISTS). /// \since ADBC API revision 1.1.0 #define ADBC_INGEST_OPTION_MODE_CREATE_APPEND "adbc.ingest.mode.create_append" + /// \brief The catalog of the table for bulk insert. /// /// The type is char*. #define ADBC_INGEST_OPTION_TARGET_CATALOG "adbc.ingest.target_catalog" + /// \brief The schema of the table for bulk insert. /// /// The type is char*. #define ADBC_INGEST_OPTION_TARGET_DB_SCHEMA "adbc.ingest.target_db_schema" + /// \brief Use a temporary table for ingestion. /// /// The value should be ADBC_OPTION_VALUE_ENABLED or @@ -807,7 +868,7 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream /// Must be kept alive as long as any connections exist. struct ADBC_EXPORT AdbcDatabase { /// \brief Opaque implementation-defined state. - /// This field is NULLPTR iff the connection is unintialized/freed. + /// This field is NULLPTR iff the connection is uninitialized/freed. void* private_data; /// \brief The associated driver (used by the driver manager to help /// track state). @@ -830,7 +891,7 @@ struct ADBC_EXPORT AdbcDatabase { /// serialize accesses to a connection. struct ADBC_EXPORT AdbcConnection { /// \brief Opaque implementation-defined state. - /// This field is NULLPTR iff the connection is unintialized/freed. + /// This field is NULLPTR iff the connection is uninitialized/freed. void* private_data; /// \brief The associated driver (used by the driver manager to help /// track state). @@ -868,7 +929,7 @@ struct ADBC_EXPORT AdbcConnection { /// serialize accesses to a statement. struct ADBC_EXPORT AdbcStatement { /// \brief Opaque implementation-defined state. - /// This field is NULLPTR iff the connection is unintialized/freed. + /// This field is NULLPTR iff the connection is uninitialized/freed. void* private_data; /// \brief The associated driver (used by the driver manager to help @@ -907,7 +968,7 @@ struct AdbcPartitions { const size_t* partition_lengths; /// \brief Opaque implementation-defined state. - /// This field is NULLPTR iff the connection is unintialized/freed. + /// This field is NULLPTR iff the connection is uninitialized/freed. void* private_data; /// \brief Release the contained partitions. @@ -935,11 +996,11 @@ struct AdbcPartitions { /// worrying about multiple definitions of the same symbol. struct ADBC_EXPORT AdbcDriver { /// \brief Opaque driver-defined state. - /// This field is NULL if the driver is unintialized/freed (but + /// This field is NULL if the driver is uninitialized/freed (but /// it need not have a value even if the driver is initialized). void* private_data; /// \brief Opaque driver manager-defined state. - /// This field is NULL if the driver is unintialized/freed (but + /// This field is NULL if the driver is uninitialized/freed (but /// it need not have a value even if the driver is initialized). void* private_manager; @@ -1007,17 +1068,18 @@ struct ADBC_EXPORT AdbcDriver { /// the AdbcDriverInitFunc is greater than or equal to /// ADBC_VERSION_1_1_0. /// - /// For a 1.0.0 driver being loaded by a 1.1.0 driver manager: the - /// 1.1.0 manager will allocate the new, expanded AdbcDriver struct - /// and attempt to have the driver initialize it with - /// ADBC_VERSION_1_1_0. This must return an error, after which the - /// driver will try again with ADBC_VERSION_1_0_0. The driver must - /// not access the new fields, which will carry undefined values. - /// - /// For a 1.1.0 driver being loaded by a 1.0.0 driver manager: the - /// 1.0.0 manager will allocate the old AdbcDriver struct and - /// attempt to have the driver initialize it with + /// When a driver implementing an older spec is loaded by a newer + /// driver manager, the newer manager will allocate the new, expanded + /// AdbcDriver struct and attempt to have the driver initialize it with + /// the newer version. This must return an error, after which the driver + /// will try again with successively older versions all the way back to /// ADBC_VERSION_1_0_0. The driver must not access the new fields, + /// which will carry undefined values. + /// + /// When a driver implementing a newer spec is loaded by an older + /// driver manager, the older manager will allocate the old AdbcDriver + /// struct and attempt to have the driver initialize it with the + /// older version. The driver must not access the new fields, /// and should initialize the old fields. /// /// @{ @@ -1083,6 +1145,20 @@ struct ADBC_EXPORT AdbcDriver { struct AdbcError*); /// @} + /// \defgroup adbc-1.2.0 ADBC API Revision 1.2.0 + /// + /// Functions added in ADBC 1.2.0. For backwards compatibility, + /// these members must not be accessed unless the version passed to + /// the AdbcDriverInitFunc is greater than or equal to + /// ADBC_VERSION_1_2_0. + /// + /// @{ + + AdbcStatusCode (*StatementNextResult)(struct AdbcStatement*, struct ArrowSchema*, + struct ArrowArrayStream*, struct AdbcPartitions*, + int64_t*, struct AdbcError*); + + /// @} }; /// \brief The size of the AdbcDriver structure in ADBC 1.0.0. @@ -1094,12 +1170,20 @@ struct ADBC_EXPORT AdbcDriver { #define ADBC_DRIVER_1_0_0_SIZE (offsetof(struct AdbcDriver, ErrorGetDetailCount)) /// \brief The size of the AdbcDriver structure in ADBC 1.1.0. -/// Drivers written for ADBC 1.1.0 and later should never touch more +/// Drivers written for ADBC 1.2.0 and later should never touch more /// than this portion of an AdbcDriver struct when given /// ADBC_VERSION_1_1_0. /// /// \since ADBC API revision 1.1.0 -#define ADBC_DRIVER_1_1_0_SIZE (sizeof(struct AdbcDriver)) +#define ADBC_DRIVER_1_1_0_SIZE (offsetof(struct AdbcDriver, StatementNextResult)) + +/// \brief The size of the AdbcDriver structure in ADBC 1.2.0. +/// Drivers written for ADBC 1.3.0 and later should never touch more +/// than this portion of an AdbcDriver struct when given +/// ADBC_VERSION_1_2_0. +/// +/// \since ADBC API revision 1.2.0 +#define ADBC_DRIVER_1_2_0_SIZE (sizeof(struct AdbcDriver)) /// @} @@ -2236,6 +2320,55 @@ AdbcStatusCode AdbcStatementGetParameterSchema(struct AdbcStatement* statement, struct ArrowSchema* schema, struct AdbcError* error); +/// \brief Move to next result set, if any. +/// +/// For an execution which returns multiple results, this can be +/// called once the initial execution is complete to get the second +/// and subsequent result sets. A driver may support calling +/// AdbcStatementNextResult while the previous result is still being +/// consumed. One which does not must return ADBC_STATUS_INVALID_STATE +/// until that happens. A driver returns ADBC_STATUS_OK to indicate +/// successful execution of this function whether or not an additional +/// result set is available. +/// +/// If the original execution was via AdbcStatementExecuteSchema then +/// the out, partitions and rows_affected parameters may be passed as +/// NULL. If passed, their contents are unchanged whether or not an +/// additional result is available. +/// +/// Either partitions or out must be NULL to indicate which style of output +/// is desired by the caller. Supplying non-NULL values to both must result +/// in ADBC_STATUS_INVALID_ARGUMENT. If the original execution was via +/// AdbcStatementExecuteQuery and the call to AdbcStatementNextResult has a +/// non-NULL partitions, or the original was via AdbcStatementExecutePartitions +/// and this call has a non-NULL out, then the driver may choose to return the +/// data in a different style than the original result set. If it does not (or +/// cannot) then it should return ADBC_STATUS_INVALID_ARGUMENT. +/// +/// The driver indicates that no additional result is available by setting +/// release on schema and/or out to NULL. +/// +/// \since ADBC API revision 1.2.0 +/// +/// \param[in] statement The statement for which to fetch a subsequent result. +/// \param[out] schema An optional location to return the schema of the result. +/// Either schema or out must be set. +/// \param[out] out The result set, if desired as a single result stream. +/// \param[out] partitions The result set, if desired as partitioned data +/// \param[out] rows_affected The number of rows affected if known, else -1. +/// \param[out] error An optional location to return an error +/// message if necessary. +/// +/// \return ADBC_STATUS_INVALID_STATE if this function is called at +/// an inappropriate time. +ADBC_EXPORT +AdbcStatusCode AdbcStatementNextResult(struct AdbcStatement* statement, + struct ArrowSchema* schema, + struct ArrowArrayStream* out, + struct AdbcPartitions* partitions, + int64_t* rows_affected, + struct AdbcError* error); + /// \brief Set a string option on a statement. /// \param[in] statement The statement. /// \param[in] key The option to set. diff --git a/c/include/arrow-adbc/adbc_driver_manager.h b/c/include/arrow-adbc/adbc_driver_manager.h index c32368ab69..cf968ffdb4 100644 --- a/c/include/arrow-adbc/adbc_driver_manager.h +++ b/c/include/arrow-adbc/adbc_driver_manager.h @@ -23,6 +23,7 @@ #pragma once #include +#include #ifdef __cplusplus extern "C" { @@ -31,6 +32,17 @@ extern "C" { #ifndef ADBC_DRIVER_MANAGER_H #define ADBC_DRIVER_MANAGER_H +typedef uint32_t AdbcLoadFlags; + +#define ADBC_LOAD_FLAG_SEARCH_ENV 1 +#define ADBC_LOAD_FLAG_SEARCH_USER 2 +#define ADBC_LOAD_FLAG_SEARCH_SYSTEM 4 +#define ADBC_LOAD_FLAG_ALLOW_RELATIVE_PATHS 8 + +#define ADBC_LOAD_FLAG_DEFAULT \ + (ADBC_LOAD_FLAG_SEARCH_ENV | ADBC_LOAD_FLAG_SEARCH_USER | \ + ADBC_LOAD_FLAG_SEARCH_SYSTEM | ADBC_LOAD_FLAG_ALLOW_RELATIVE_PATHS) + /// \brief Common entry point for drivers via the driver manager. /// /// The driver manager can fill in default implementations of some @@ -40,8 +52,9 @@ extern "C" { /// /// \param[in] driver_name An identifier for the driver (e.g. a path to a /// shared library on Linux). -/// \param[in] entrypoint An identifier for the entrypoint (e.g. the -/// symbol to call for AdbcDriverInitFunc on Linux). +/// \param[in] entrypoint An identifier for the entrypoint (e.g. the symbol to +/// call for AdbcDriverInitFunc on Linux). If not provided, search for an +/// entrypoint based on the driver name. /// \param[in] version The ADBC revision to attempt to initialize. /// \param[out] driver The table of function pointers to initialize. /// \param[out] error An optional location to return an error message @@ -50,6 +63,54 @@ ADBC_EXPORT AdbcStatusCode AdbcLoadDriver(const char* driver_name, const char* entrypoint, int version, void* driver, struct AdbcError* error); +/// \brief Common entry point to search for and load a driver or manifest. +/// +/// The driver manager can fill in default implementations of some ADBC functions +/// for drivers. Drivers must implement a minimum level of functionality for this +/// to be possible, however, and some functions must be implemented by the driver. +/// +/// This function is different from AdbcLoadDriver in that it also accepts the name +/// of a driver manifest file, and allows specifying options to control what +/// directories it will search through. The behavior is as follows: +/// +/// If the passed in driver_name is an absolute path: +/// - If the path has a `.toml` extension, it will attempt to parse the manifest and load +/// the driver specified within it. Erroring if this fails. +/// - If the path has an extension other than `.toml`, it will attempt to load the path as +/// a shared library. Erroring if this fails. +/// +/// If the passed in driver_name does not have an extension and is not an absolute path: +/// - The load_options parameter will control whether the driver manager will search the +/// environment variable ADBC_DRIVER_PATH and (if built or installed with conda) the +/// conda environment, the user-level configuration, and/or the system-level +/// configuration for either a manifest file or a shared library. +/// - For each path to be searched, it will first look for /.toml. If +/// that file exists, it will attempt to parse the manifest and load the driver +/// specified within it, erroring if this fails. +/// - If the manifest file does not exist, it will then look for +/// /. +/// where is one of the following: `.so`, `.dll`, `.dylib`. If it can load +/// that shared library, then success is returned. Otherwise it moves to the next +/// directory until the search is either successful, or all directories have been +/// searched. +/// +/// \param[in] driver_name An identifier for the driver (e.g. a path to a +/// shared library on Linux or the basename of a manifest file). +/// \param[in] entrypoint An identifier for the entrypoint (e.g. the symbol to +/// call for AdbcDriverInitFunc on Linux). If not provided, search for an +/// entrypoint based on the driver name. +/// \param[in] version The ADBC revision to attempt to initialize. +/// \param[in] load_options bit mask of AdbcLoadFlags to control the directories searched +/// \param[in] additional_search_path_list A list of additional paths to search for +/// delimited by the OS specific path list separator. +/// \param[out] driver The table of function pointers to initialize +/// \param[out] error An optional location to return an error message +ADBC_EXPORT +AdbcStatusCode AdbcFindLoadDriver(const char* driver_name, const char* entrypoint, + const int version, const AdbcLoadFlags load_options, + const char* additional_search_path_list, void* driver, + struct AdbcError* error); + /// \brief Common entry point for drivers via the driver manager. /// /// The driver manager can fill in default implementations of some @@ -78,6 +139,38 @@ AdbcStatusCode AdbcDriverManagerDatabaseSetInitFunc(struct AdbcDatabase* databas AdbcDriverInitFunc init_func, struct AdbcError* error); +/// \brief Set the load flags for the driver manager. +/// +/// This is an extension to the ADBC API. The driver manager shims +/// the AdbcDatabase* functions to allow you to specify the +/// driver/entrypoint dynamically. This function lets you set the +/// load flags explicitly, for applications that can dynamically +/// load drivers on their own. +/// +/// If this function isn't called, the default load flags are just to +/// allow relative paths, disallowing the lookups of manifests. +ADBC_EXPORT +AdbcStatusCode AdbcDriverManagerDatabaseSetLoadFlags(struct AdbcDatabase* database, + AdbcLoadFlags flags, + struct AdbcError* error); + +/// \brief Set an additional manifest search path list for the driver manager. +/// +/// This is an extension to the ADBC API. The driver manager shims +/// the AdbcDatabase* functions to allow you to specify the +/// driver/entrypoint dynamically. This function lets you explicitly +/// set a path list at runtime for additional paths to search when +/// looking for a driver manifest. While users can add additional +/// paths via the ADBC_DRIVER_PATH environment variable, this allows +/// an application to specify search paths at runtime which are not tied +/// to the load flags. +/// +/// Calling this function with NULL as the `path_list` will clear any +/// previously set additional search paths. +ADBC_EXPORT +AdbcStatusCode AdbcDriverManagerDatabaseSetAdditionalSearchPathList( + struct AdbcDatabase* database, const char* path_list, struct AdbcError* error); + /// \brief Get a human-friendly description of a status code. ADBC_EXPORT const char* AdbcStatusCodeMessage(AdbcStatusCode code); diff --git a/rust/core/src/ffi/mod.rs b/c/include/arrow-adbc/driver/bigquery.h similarity index 68% rename from rust/core/src/ffi/mod.rs rename to c/include/arrow-adbc/driver/bigquery.h index 4a633cab14..985a021400 100644 --- a/rust/core/src/ffi/mod.rs +++ b/c/include/arrow-adbc/driver/bigquery.h @@ -15,12 +15,22 @@ // specific language governing permissions and limitations // under the License. -//! C-compatible items as defined in [`adbc.h`](https://github.com/apache/arrow-adbc/blob/main/adbc.h) +/// \file arrow-adbc/driver/bigquery.h ADBC BigQuery Driver +/// +/// A driver for BigQuery. -pub mod constants; -pub(crate) mod methods; -pub(crate) mod types; -pub use types::{ - FFI_AdbcConnection, FFI_AdbcDatabase, FFI_AdbcDriver, FFI_AdbcDriverInitFunc, FFI_AdbcError, - FFI_AdbcErrorDetail, FFI_AdbcPartitions, FFI_AdbcStatement, FFI_AdbcStatusCode, -}; +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +ADBC_EXPORT +AdbcStatusCode AdbcDriverBigqueryInit(int version, void* raw_driver, + struct AdbcError* error); + +#ifdef __cplusplus +} +#endif diff --git a/c/include/arrow-adbc/driver/flightsql.h b/c/include/arrow-adbc/driver/flightsql.h new file mode 100644 index 0000000000..1d2ad8e2d1 --- /dev/null +++ b/c/include/arrow-adbc/driver/flightsql.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \file arrow-adbc/driver/flightsql.h ADBC Flight SQL Driver +/// +/// A driver for Arrow Flight SQL. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +ADBC_EXPORT +AdbcStatusCode AdbcDriverFlightsqlInit(int version, void* raw_driver, + struct AdbcError* error); + +#ifdef __cplusplus +} +#endif diff --git a/c/include/arrow-adbc/driver/postgresql.h b/c/include/arrow-adbc/driver/postgresql.h new file mode 100644 index 0000000000..49ea63e16f --- /dev/null +++ b/c/include/arrow-adbc/driver/postgresql.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \file arrow-adbc/driver/postgresql.h ADBC PostgreSQL Driver +/// +/// A driver for PostgreSQL. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +ADBC_EXPORT +AdbcStatusCode AdbcDriverPostgresqlInit(int version, void* raw_driver, + struct AdbcError* error); + +#ifdef __cplusplus +} +#endif diff --git a/c/include/arrow-adbc/driver/snowflake.h b/c/include/arrow-adbc/driver/snowflake.h new file mode 100644 index 0000000000..3dc734bd41 --- /dev/null +++ b/c/include/arrow-adbc/driver/snowflake.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \file arrow-adbc/driver/snowflake.h ADBC Snowflake Driver +/// +/// A driver for Snowflake. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +ADBC_EXPORT +AdbcStatusCode AdbcDriverSnowflakeInit(int version, void* raw_driver, + struct AdbcError* error); + +#ifdef __cplusplus +} +#endif diff --git a/c/include/arrow-adbc/driver/sqlite.h b/c/include/arrow-adbc/driver/sqlite.h new file mode 100644 index 0000000000..e11c8418b9 --- /dev/null +++ b/c/include/arrow-adbc/driver/sqlite.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \file arrow-adbc/driver/sqlite.h ADBC SQLite Driver +/// +/// A driver for SQLite. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +ADBC_EXPORT +AdbcStatusCode AdbcDriverSqliteInit(int version, void* raw_driver, + struct AdbcError* error); + +#ifdef __cplusplus +} +#endif diff --git a/c/integration/duckdb/CMakeLists.txt b/c/integration/duckdb/CMakeLists.txt index 9065450b0d..1db2575d6c 100644 --- a/c/integration/duckdb/CMakeLists.txt +++ b/c/integration/duckdb/CMakeLists.txt @@ -20,7 +20,7 @@ include(FetchContent) if(ADBC_BUILD_TESTS) fetchcontent_declare(duckdb GIT_REPOSITORY https://github.com/duckdb/duckdb.git - GIT_TAG 0d84ccf478578278b2d1168675b8b93c60f78a5e # v0.9.0 + GIT_TAG 8e52ec43959ab363643d63cb78ee214577111da4 # v1.2.1 GIT_PROGRESS TRUE USES_TERMINAL_DOWNLOAD TRUE) set(BUILD_JEMALLOC_EXTENSION @@ -63,13 +63,11 @@ if(ADBC_BUILD_TESTS) adbc_driver_common adbc_driver_manager_static adbc_validation - duckdb - nanoarrow) + duckdb) add_dependencies(adbc-integration-duckdb-test duckdb) target_compile_features(adbc-integration-duckdb-test PRIVATE cxx_std_17) target_include_directories(adbc-integration-duckdb-test SYSTEM PRIVATE ${REPOSITORY_ROOT}/c/ ${REPOSITORY_ROOT}/c/include/ - ${REPOSITORY_ROOT}/c/vendor ${REPOSITORY_ROOT}/c/driver) adbc_configure_target(adbc-integration-duckdb-test) endif() diff --git a/c/integration/duckdb/README.md b/c/integration/duckdb/README.md new file mode 100644 index 0000000000..7621e07372 --- /dev/null +++ b/c/integration/duckdb/README.md @@ -0,0 +1,29 @@ + + +# ADBC DuckDB Integration Test + +![Vendor: DuckDB](https://img.shields.io/badge/vendor-DuckDB-blue?style=flat-square) +![Implementation: C/C++](https://img.shields.io/badge/implementation-C%2FC%2B%2B-violet?style=flat-square) +![Status: Stable](https://img.shields.io/badge/status-stable-green?style=flat-square) + +[![conda-forge: python-duckdb](https://img.shields.io/conda/vn/conda-forge/python-duckdb?label=conda-forge%3A%20python-duckdb&style=flat-square)](https://anaconda.org/conda-forge/python-duckdb) +[![PyPI: duckdb](https://img.shields.io/pypi/v/duckdb?style=flat-square)](https://pypi.org/project/duckdb/) + +This package provides an integration test between [DuckDB](https://duckdb.org/) and ADBC. diff --git a/c/integration/duckdb/duckdb_test.cc b/c/integration/duckdb/duckdb_test.cc index 5a8ecaf7b4..2a9cd8faba 100644 --- a/c/integration/duckdb/duckdb_test.cc +++ b/c/integration/duckdb/duckdb_test.cc @@ -16,9 +16,13 @@ // under the License. #include +#include +#include +#include "gmock/gmock.h" #include #include +#define ADBC_EXPORTING // duckdb changed the include guard... #include #include "validation/adbc_validation.h" @@ -26,9 +30,7 @@ // Convert between our definitions and DuckDB's AdbcStatusCode DuckDbDriverInitFunc(int version, void* driver, struct AdbcError* error) { - return duckdb_adbc_init(static_cast(version), - reinterpret_cast(driver), - reinterpret_cast(error)); + return duckdb_adbc_init(static_cast(version), driver, error); } class DuckDbQuirks : public adbc_validation::DriverQuirks { @@ -108,10 +110,38 @@ class DuckDbStatementTest : public ::testing::Test, GTEST_SKIP() << "Cannot query rows affected in delete stream (not implemented)"; } + void TestSqlQueryTrailingSemicolons() { + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), + adbc_validation::IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "INSTALL icu", &error), + adbc_validation::IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + adbc_validation::IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "LOAD icu", &error), + adbc_validation::IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error), + adbc_validation::IsOkStatus(&error)); + + ASSERT_THAT(AdbcStatementRelease(&statement, &error), + adbc_validation::IsOkStatus(&error)); + + adbc_validation::StatementTest::TestSqlQueryTrailingSemicolons(); + } + void TestErrorCompatibility() { GTEST_SKIP() << "DuckDB does not set AdbcError.release"; } + void TestResultIndependence() { + // DuckDB detects this by throwing + ASSERT_THAT([this]() { adbc_validation::StatementTest::TestResultIndependence(); }, + ::testing::Throws()); + } + protected: DuckDbQuirks quirks_; }; diff --git a/c/integration/shared_test/CMakeLists.txt b/c/integration/shared_test/CMakeLists.txt new file mode 100644 index 0000000000..dabe629c71 --- /dev/null +++ b/c/integration/shared_test/CMakeLists.txt @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 3.18) + +project(adbc-static-test LANGUAGES C) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) + +find_package(AdbcDriverManager REQUIRED) + +add_executable(shared_test main.c) +target_link_libraries(shared_test PRIVATE AdbcDriverManager::adbc_driver_manager_shared) diff --git a/c/integration/shared_test/main.c b/c/integration/shared_test/main.c new file mode 100644 index 0000000000..351d2a5d06 --- /dev/null +++ b/c/integration/shared_test/main.c @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include +#include + +int TryDriver(const char* driver_name) { + struct AdbcError error; + struct AdbcDriver driver; + + memset(&error, 0, sizeof(error)); + memset(&driver, 0, sizeof(driver)); + + AdbcStatusCode status = + AdbcLoadDriver(driver_name, NULL, ADBC_VERSION_1_1_0, &driver, &error); + if (status != ADBC_STATUS_OK) { + if (error.release) { + fprintf(stderr, "AdbcLoadDriver failed: %s\n", error.message); + error.release(&error); + } else { + fprintf(stderr, "AdbcLoadDriver failed\n"); + } + return EXIT_FAILURE; + } + + if (driver.release) { + status = driver.release(&driver, &error); + + if (status != ADBC_STATUS_OK) { + if (error.release) { + fprintf(stderr, "AdbcDriver.release failed: %s\n", error.message); + error.release(&error); + } else { + fprintf(stderr, "AdbcDriver.release failed\n"); + } + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} + +int main(int argc, char** argv) { + int rc = 0; + + rc = TryDriver("adbc_driver_bigquery"); + if (rc != EXIT_SUCCESS) { + return rc; + } + printf("Loaded BigQuery driver\n"); + + rc = TryDriver("adbc_driver_flightsql"); + if (rc != EXIT_SUCCESS) { + return rc; + } + printf("Loaded FlightSQL driver\n"); + + rc = TryDriver("adbc_driver_postgresql"); + if (rc != EXIT_SUCCESS) { + return rc; + } + printf("Loaded PostgreSQL driver\n"); + + rc = TryDriver("adbc_driver_snowflake"); + if (rc != EXIT_SUCCESS) { + return rc; + } + printf("Loaded Snowflake driver\n"); + + rc = TryDriver("adbc_driver_sqlite"); + if (rc != EXIT_SUCCESS) { + return rc; + } + printf("Loaded SQLite driver\n"); + + return EXIT_SUCCESS; +} diff --git a/c/integration/static_test/CMakeLists.txt b/c/integration/static_test/CMakeLists.txt new file mode 100644 index 0000000000..e0ada2fe70 --- /dev/null +++ b/c/integration/static_test/CMakeLists.txt @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 3.18) + +project(adbc-static-test LANGUAGES C CXX) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) + +find_package(AdbcDriverCommon REQUIRED) +find_package(AdbcDriverFramework REQUIRED) + +find_package(AdbcDriverBigQuery REQUIRED) +find_package(AdbcDriverFlightSQL REQUIRED) +find_package(AdbcDriverPostgreSQL REQUIRED) +find_package(AdbcDriverSQLite REQUIRED) +find_package(AdbcDriverSnowflake REQUIRED) + +find_package(fmt REQUIRED) +find_package(nanoarrow REQUIRED) + +add_executable(static_test main.c) +# We need to link the C++ standard library since some of the drivers are +# written in C++. +set_target_properties(static_test PROPERTIES LINKER_LANGUAGE CXX) +target_link_libraries(static_test + PRIVATE AdbcDriverFlightSQL::adbc_driver_flightsql_static + AdbcDriverPostgreSQL::adbc_driver_postgresql_static + AdbcDriverSQLite::adbc_driver_sqlite_static + PostgreSQL::PostgreSQL + SQLite::SQLite3 + fmt::fmt + nanoarrow::nanoarrow + AdbcDriverCommon::adbc_driver_common + AdbcDriverFramework::adbc_driver_framework) diff --git a/c/integration/static_test/README.md b/c/integration/static_test/README.md new file mode 100644 index 0000000000..1da0f562d2 --- /dev/null +++ b/c/integration/static_test/README.md @@ -0,0 +1,23 @@ + + +# Static Linking Test + +This is only used to test that static linking with multiple drivers behaves as +expected. See the docker-compose job `cpp-static-test`. diff --git a/c/integration/static_test/main.c b/c/integration/static_test/main.c new file mode 100644 index 0000000000..101e00f508 --- /dev/null +++ b/c/integration/static_test/main.c @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + struct AdbcError error; + + struct AdbcDriver flightsql; + struct AdbcDriver postgresql; + struct AdbcDriver sqlite; + + memset(&error, 0, sizeof(error)); + memset(&flightsql, 0, sizeof(flightsql)); + memset(&postgresql, 0, sizeof(postgresql)); + memset(&sqlite, 0, sizeof(sqlite)); + + AdbcStatusCode status; + + status = AdbcDriverFlightsqlInit(ADBC_VERSION_1_1_0, &flightsql, &error); + if (status != ADBC_STATUS_OK) { + if (error.release) { + fprintf(stderr, "AdbcDriverFlightsqlInit failed: %s\n", error.message); + error.release(&error); + } else { + fprintf(stderr, "AdbcDriverFlightsqlInit failed\n"); + } + return EXIT_FAILURE; + } + + status = AdbcDriverPostgresqlInit(ADBC_VERSION_1_1_0, &postgresql, &error); + if (status != ADBC_STATUS_OK) { + if (error.release) { + fprintf(stderr, "AdbcDriverPostgresqlInit failed: %s\n", error.message); + error.release(&error); + } else { + fprintf(stderr, "AdbcDriverPostgresqlInit failed\n"); + } + return EXIT_FAILURE; + } + + status = AdbcDriverSqliteInit(ADBC_VERSION_1_1_0, &sqlite, &error); + if (status != ADBC_STATUS_OK) { + if (error.release) { + fprintf(stderr, "AdbcDriverSqliteInit failed: %s\n", error.message); + error.release(&error); + } else { + fprintf(stderr, "AdbcDriverSqliteInit failed\n"); + } + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/c/meson.build b/c/meson.build index 98da3a4618..3b61b96185 100644 --- a/c/meson.build +++ b/c/meson.build @@ -17,8 +17,9 @@ project( 'arrow-adbc', - 'c', 'cpp', - version: '1.3.0-SNAPSHOT', + 'c', + 'cpp', + version: '1.9.0-SNAPSHOT', license: 'Apache-2.0', meson_version: '>=1.3.0', default_options: [ @@ -26,13 +27,18 @@ project( 'c_std=c99', 'warning_level=2', 'cpp_std=c++17', - ] + ], ) -add_project_arguments('-Wno-int-conversion', '-Wno-unused-parameter', language: 'c') +add_project_arguments( + '-Wno-int-conversion', + '-Wno-unused-parameter', + language: 'c', +) add_project_arguments('-Wno-unused-parameter', '-Wno-reorder', language: 'cpp') c_dir = include_directories('.') +vendor_dir = include_directories('vendor') include_dir = include_directories('include') install_headers('include/adbc.h') install_headers('include/arrow-adbc/adbc.h', subdir: 'arrow-adbc') @@ -40,7 +46,7 @@ driver_dir = include_directories('driver') nanoarrow_dep = dependency('nanoarrow') fmt_dep = dependency('fmt') -if get_option('tests') +if get_option('tests').enabled() gtest_main_dep = dependency('gtest_main') gmock_dep = dependency('gmock') else @@ -48,40 +54,40 @@ else gmock_dep = disabler() endif -needs_driver_manager = get_option('driver_manager') \ - or get_option('tests') +needs_driver_manager = get_option('driver_manager').enabled() \ + or get_option('tests').enabled() pkg = import('pkgconfig') if needs_driver_manager install_headers('include/adbc_driver_manager.h') - install_headers('include/arrow-adbc/adbc_driver_manager.h', subdir: 'arrow-adbc') + install_headers( + 'include/arrow-adbc/adbc_driver_manager.h', + subdir: 'arrow-adbc', + ) subdir('driver_manager') endif subdir('driver/common') subdir('driver/framework') +subdir('validation') -if get_option('tests') - subdir('validation') -endif - -if get_option('bigquery') - subdir('driver/bigquery') +if get_option('bigquery').enabled() + subdir('driver/bigquery') endif -if get_option('flightsql') - subdir('driver/flightsql') +if get_option('flightsql').enabled() + subdir('driver/flightsql') endif -if get_option('postgresql') - subdir('driver/postgresql') +if get_option('postgresql').enabled() + subdir('driver/postgresql') endif -if get_option('sqlite') - subdir('driver/sqlite') +if get_option('sqlite').enabled() + subdir('driver/sqlite') endif -if get_option('snowflake') - subdir('driver/snowflake') +if get_option('snowflake').enabled() + subdir('driver/snowflake') endif diff --git a/c/meson.options b/c/meson.options index 87d5534495..ffe5c4f5c7 100644 --- a/c/meson.options +++ b/c/meson.options @@ -15,41 +15,11 @@ # specific language governing permissions and limitations # under the License. -option('tests', type: 'boolean', description: 'Build tests', value: false) -option('benchmarks', type: 'boolean', description: 'Build benchmarks', value: false) -option( - 'bigquery', - type: 'boolean', - description: 'Build ADBC BigQuery driver', - value: false -) -option( - 'flightsql', - type: 'boolean', - description: 'Build ADBC FlightSQL driver', - value: false -) -option( - 'driver_manager', - type: 'boolean', - description: 'Build ADBC Driver Manager', - value: false -) -option( - 'postgresql', - type: 'boolean', - description: 'Build ADBC PostgreSQL Driver', - value: false -) -option( - 'sqlite', - type: 'boolean', - description: 'Build ADBC SQLite Driver', - value: false -) -option( - 'snowflake', - type: 'boolean', - description: 'Build ADBC Snowflake Driver', - value: false -) +option('tests', type: 'feature', description: 'Build tests') +option('benchmarks', type: 'feature', description: 'Build benchmarks') +option('bigquery', type: 'feature', description: 'Build ADBC BigQuery driver') +option('flightsql', type: 'feature', description: 'Build ADBC FlightSQL driver') +option('driver_manager', type: 'feature', description: 'Build ADBC Driver Manager') +option('postgresql', type: 'feature', description: 'Build ADBC PostgreSQL Driver') +option('sqlite', type: 'feature', description: 'Build ADBC SQLite Driver') +option('snowflake', type: 'feature', description: 'Build ADBC Snowflake Driver') diff --git a/c/symbols.map b/c/symbols.map index c9464b2da4..b011e1995b 100644 --- a/c/symbols.map +++ b/c/symbols.map @@ -19,15 +19,15 @@ global: # Only expose symbols from the ADBC API Adbc*; - - # Expose driver-specific initialization routines - FlightSQLDriverInit; - PostgresqlDriverInit; - SnowflakeDriverInit; - SqliteDriverInit; + # Needed for tests, but namespaced so they are clearly not for public use + InternalAdbc*; extern "C++" { - Adbc*; + # adbc_driver_manager_test links against libadbc_driver_manager + # so we need to make sure the internal ones are externally visible + # so that the tests can link against it. + InternalAdbc*; + Adbc*; }; local: diff --git a/c/validation/CMakeLists.txt b/c/validation/CMakeLists.txt index 04bc0115aa..02362259f5 100644 --- a/c/validation/CMakeLists.txt +++ b/c/validation/CMakeLists.txt @@ -20,23 +20,16 @@ adbc_configure_target(adbc_validation_util) target_compile_features(adbc_validation_util PRIVATE cxx_std_17) target_include_directories(adbc_validation_util SYSTEM PRIVATE "${REPOSITORY_ROOT}/c/include/" - "${REPOSITORY_ROOT}/c/driver/" - "${REPOSITORY_ROOT}/c/vendor/") -target_link_libraries(adbc_validation_util PUBLIC adbc_driver_common nanoarrow - GTest::gtest GTest::gmock) + "${REPOSITORY_ROOT}/c/driver/") +target_link_libraries(adbc_validation_util PUBLIC adbc_driver_common GTest::gtest + GTest::gmock) add_library(adbc_validation OBJECT adbc_validation.cc adbc_validation_connection.cc adbc_validation_database.cc adbc_validation_statement.cc) adbc_configure_target(adbc_validation) target_compile_features(adbc_validation PRIVATE cxx_std_17) -target_include_directories(adbc_validation SYSTEM - PRIVATE "${REPOSITORY_ROOT}/c/include/" - "${REPOSITORY_ROOT}/c/driver/" - "${REPOSITORY_ROOT}/c/vendor/") -target_link_libraries(adbc_validation - PUBLIC adbc_driver_common - adbc_validation_util - nanoarrow - GTest::gtest - GTest::gmock) +target_include_directories(adbc_validation SYSTEM PRIVATE "${REPOSITORY_ROOT}/c/include/" + "${REPOSITORY_ROOT}/c/driver/") +target_link_libraries(adbc_validation PUBLIC adbc_driver_common adbc_validation_util + GTest::gtest GTest::gmock) diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h index 427e39b2e2..fad84137e6 100644 --- a/c/validation/adbc_validation.h +++ b/c/validation/adbc_validation.h @@ -463,6 +463,7 @@ class StatementTest { void TestConcurrentStatements(); void TestErrorCompatibility(); + void TestResultIndependence(); void TestResultInvalidation(); protected: @@ -579,6 +580,7 @@ void StatementTest::TestSqlIngestType(ArrowType type, TEST_F(FIXTURE, Transactions) { TestTransactions(); } \ TEST_F(FIXTURE, ConcurrentStatements) { TestConcurrentStatements(); } \ TEST_F(FIXTURE, ErrorCompatibility) { TestErrorCompatibility(); } \ + TEST_F(FIXTURE, ResultIndependence) { TestResultIndependence(); } \ TEST_F(FIXTURE, ResultInvalidation) { TestResultInvalidation(); } } // namespace adbc_validation diff --git a/c/validation/adbc_validation_connection.cc b/c/validation/adbc_validation_connection.cc index 032f1d328f..9c9bdeff21 100644 --- a/c/validation/adbc_validation_connection.cc +++ b/c/validation/adbc_validation_connection.cc @@ -17,7 +17,11 @@ #include "adbc_validation.h" +#include #include +#include +#include +#include #include #include @@ -977,13 +981,13 @@ void ConnectionTest::TestMetadataGetObjectsPrimaryKey() { << "could not initialize the AdbcGetObjectsData object"; // Test primary key - struct AdbcGetObjectsTable* table = - AdbcGetObjectsDataGetTableByName(*get_objects_data, quirks()->catalog().c_str(), - quirks()->db_schema().c_str(), "adbc_pkey_test"); + struct AdbcGetObjectsTable* table = InternalAdbcGetObjectsDataGetTableByName( + *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), + "adbc_pkey_test"); ASSERT_NE(table, nullptr) << "could not find adbc_pkey_test table"; ASSERT_EQ(table->n_table_columns, 1); - struct AdbcGetObjectsColumn* column = AdbcGetObjectsDataGetColumnByName( + struct AdbcGetObjectsColumn* column = InternalAdbcGetObjectsDataGetColumnByName( *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), "adbc_pkey_test", "id"); ASSERT_NE(column, nullptr) << "could not find id column on adbc_pkey_test table"; @@ -996,7 +1000,7 @@ void ConnectionTest::TestMetadataGetObjectsPrimaryKey() { ConstraintTest(constraint, "PRIMARY KEY", {"id"}); // Test composite primary key - struct AdbcGetObjectsTable* composite_table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* composite_table = InternalAdbcGetObjectsDataGetTableByName( *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), "adbc_composite_pkey_test"); ASSERT_NE(composite_table, nullptr) << "could not find adbc_composite_pkey_test table"; @@ -1009,7 +1013,7 @@ void ConnectionTest::TestMetadataGetObjectsPrimaryKey() { const char* parent_2_column_names[2] = {"id_primary_col1", "id_primary_col2"}; struct AdbcGetObjectsColumn* parent_2_column; for (int column_name_index = 0; column_name_index < 2; column_name_index++) { - parent_2_column = AdbcGetObjectsDataGetColumnByName( + parent_2_column = InternalAdbcGetObjectsDataGetColumnByName( *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), "adbc_composite_pkey_test", parent_2_column_names[column_name_index]); ASSERT_NE(parent_2_column, nullptr) @@ -1100,7 +1104,7 @@ void ConnectionTest::TestMetadataGetObjectsForeignKey() { << "could not initialize the AdbcGetObjectsData object"; // Test child table - struct AdbcGetObjectsTable* child_table = AdbcGetObjectsDataGetTableByName( + struct AdbcGetObjectsTable* child_table = InternalAdbcGetObjectsDataGetTableByName( *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), "adbc_fkey_child_test"); ASSERT_NE(child_table, nullptr) << "could not find adbc_fkey_child_test table"; @@ -1111,7 +1115,7 @@ void ConnectionTest::TestMetadataGetObjectsForeignKey() { const char* child_column_names[3] = {"id_child_col1", "id_child_col2", "id_child_col3"}; struct AdbcGetObjectsColumn* child_column; for (int column_index = 0; column_index < 2; column_index++) { - child_column = AdbcGetObjectsDataGetColumnByName( + child_column = InternalAdbcGetObjectsDataGetColumnByName( *get_objects_data, quirks()->catalog().c_str(), quirks()->db_schema().c_str(), "adbc_fkey_child_test", child_column_names[column_index]); ASSERT_NE(child_column, nullptr) diff --git a/c/validation/adbc_validation_statement.cc b/c/validation/adbc_validation_statement.cc index cd388623ba..ae5ef518a3 100644 --- a/c/validation/adbc_validation_statement.cc +++ b/c/validation/adbc_validation_statement.cc @@ -18,6 +18,9 @@ #include "adbc_validation.h" #include +#include +#include +#include #include #include @@ -48,8 +51,12 @@ void StatementTest::TearDownTest() { if (statement.private_data) { EXPECT_THAT(AdbcStatementRelease(&statement, &error), IsOkStatus(&error)); } - EXPECT_THAT(AdbcConnectionRelease(&connection, &error), IsOkStatus(&error)); - EXPECT_THAT(AdbcDatabaseRelease(&database, &error), IsOkStatus(&error)); + if (connection.private_data) { + EXPECT_THAT(AdbcConnectionRelease(&connection, &error), IsOkStatus(&error)); + } + if (database.private_data) { + EXPECT_THAT(AdbcDatabaseRelease(&database, &error), IsOkStatus(&error)); + } if (error.release) { error.release(&error); } @@ -2670,6 +2677,9 @@ void StatementTest::TestTransactions() { })(), ::testing::Not(IsOkStatus(&error))); + // Rollback + ASSERT_THAT(AdbcConnectionRollback(&connection, &error), IsOkStatus(&error)); + // Commit ASSERT_THAT(quirks()->CreateSampleTable(&connection, "bulk_ingest", &error), IsOkStatus(&error)); @@ -2817,21 +2827,52 @@ struct ADBC_EXPORT AdbcError100 { // Test that an ADBC 1.0.0-sized error still works void StatementTest::TestErrorCompatibility() { static_assert(sizeof(AdbcError100) == ADBC_ERROR_1_0_0_SIZE, "Wrong size"); - // XXX: sketchy cast - auto* error = reinterpret_cast(malloc(ADBC_ERROR_1_0_0_SIZE)); - std::memset(error, 0, ADBC_ERROR_1_0_0_SIZE); + struct AdbcError error; + std::memset(&error, 0, ADBC_ERROR_1_1_0_SIZE); + struct AdbcDriver canary; + error.private_data = &canary; + error.private_driver = &canary; - ASSERT_THAT(AdbcStatementNew(&connection, &statement, error), IsOkStatus(error)); + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); ASSERT_THAT( - AdbcStatementSetSqlQuery(&statement, "SELECT * FROM thistabledoesnotexist", error), - IsOkStatus(error)); + AdbcStatementSetSqlQuery(&statement, "SELECT * FROM thistabledoesnotexist", &error), + IsOkStatus(&error)); adbc_validation::StreamReader reader; ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, - &reader.rows_affected, error), - ::testing::Not(IsOkStatus(error))); - auto* old_error = reinterpret_cast(error); - old_error->release(old_error); - free(error); + &reader.rows_affected, &error), + ::testing::Not(IsOkStatus(&error))); + ASSERT_EQ(&canary, error.private_data); + ASSERT_EQ(&canary, error.private_driver); + error.release(&error); +} + +void StatementTest::TestResultIndependence() { + // If we have a result reader, and we close the statement (and other + // resources), either the statement should error, or the reader should be + // closeable and should error on other operations + + ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "SELECT 42", &error), + IsOkStatus(&error)); + + StreamReader reader; + ASSERT_THAT(AdbcStatementExecuteQuery(&statement, &reader.stream.value, + &reader.rows_affected, &error), + IsOkStatus(&error)); + ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); + + auto status = AdbcStatementRelease(&statement, &error); + if (status != ADBC_STATUS_OK) { + // That's ok, this driver prevents closing the statement while readers are open + return; + } + ASSERT_THAT(AdbcConnectionRelease(&connection, &error), IsOkStatus(&error)); + ASSERT_THAT(AdbcDatabaseRelease(&database, &error), IsOkStatus(&error)); + + // Must not crash (but it's up to the driver whether it errors or succeeds) + std::ignore = reader.MaybeNext(); + // Implicitly StreamReader calls release() on destruction, that should not + // crash either } void StatementTest::TestResultInvalidation() { @@ -2855,4 +2896,5 @@ void StatementTest::TestResultInvalidation() { // First reader may fail, or may succeed but give no data reader1.MaybeNext(); } + } // namespace adbc_validation diff --git a/c/validation/adbc_validation_util.cc b/c/validation/adbc_validation_util.cc index 7d97ad7626..9fcf2c5980 100644 --- a/c/validation/adbc_validation_util.cc +++ b/c/validation/adbc_validation_util.cc @@ -16,6 +16,11 @@ // under the License. #include "adbc_validation_util.h" + +#include +#include +#include + #include #include "adbc_validation.h" @@ -315,7 +320,7 @@ std::string GetDriverVendorVersion(struct AdbcConnection* connection) { reader.GetSchema(); if (error.release) { error.release(&error); - throw std::runtime_error("error occured calling AdbcConnectionGetInfo!"); + throw std::runtime_error("error occurred calling AdbcConnectionGetInfo!"); } reader.Next(); diff --git a/c/validation/adbc_validation_util.h b/c/validation/adbc_validation_util.h index b4f5d6f81a..d3eab643d1 100644 --- a/c/validation/adbc_validation_util.h +++ b/c/validation/adbc_validation_util.h @@ -242,9 +242,9 @@ struct StreamReader { struct GetObjectsReader { explicit GetObjectsReader(struct ArrowArrayView* array_view) { // TODO: this swallows any construction errors - get_objects_data_ = AdbcGetObjectsDataInit(array_view); + get_objects_data_ = InternalAdbcGetObjectsDataInit(array_view); } - ~GetObjectsReader() { AdbcGetObjectsDataDelete(get_objects_data_); } + ~GetObjectsReader() { InternalAdbcGetObjectsDataDelete(get_objects_data_); } struct AdbcGetObjectsData* operator*() { return get_objects_data_; } struct AdbcGetObjectsData* operator->() { return get_objects_data_; } diff --git a/c/validation/meson.build b/c/validation/meson.build index 984f4a34fb..1ae46228b0 100644 --- a/c/validation/meson.build +++ b/c/validation/meson.build @@ -16,27 +16,35 @@ # under the License. -adbc_validation_util_lib = static_library( - 'adbc_validation_util', - 'adbc_validation_util.cc', - include_directories: [include_dir, driver_dir], - link_with: [adbc_common_lib, adbc_framework_lib, adbc_driver_manager_lib], - dependencies: [nanoarrow_dep, gtest_main_dep, gmock_dep], -) +if get_option('tests').enabled() + adbc_validation_util_lib = static_library( + 'adbc_validation_util', + 'adbc_validation_util.cc', + include_directories: [include_dir, driver_dir], + link_with: [ + adbc_common_lib, + adbc_framework_lib, + adbc_driver_manager_lib, + ], + dependencies: [nanoarrow_dep, gtest_main_dep, gmock_dep], + ) -adbc_validation_dep = declare_dependency( - sources: [ - 'adbc_validation.cc', - 'adbc_validation_connection.cc', - 'adbc_validation_database.cc', - 'adbc_validation_statement.cc', - ], - include_directories: [include_dir, driver_dir], - link_with: [ - adbc_validation_util_lib, - adbc_common_lib, - adbc_framework_lib, - adbc_driver_manager_lib, - ], - dependencies: [nanoarrow_dep, gtest_main_dep, gmock_dep], -) + adbc_validation_dep = declare_dependency( + sources: [ + 'adbc_validation.cc', + 'adbc_validation_connection.cc', + 'adbc_validation_database.cc', + 'adbc_validation_statement.cc', + ], + include_directories: [include_dir, driver_dir], + link_with: [ + adbc_validation_util_lib, + adbc_common_lib, + adbc_framework_lib, + adbc_driver_manager_lib, + ], + dependencies: [nanoarrow_dep, gtest_main_dep, gmock_dep], + ) +else + adbc_validation_dep = disabler() +endif diff --git a/c/vendor/nanoarrow/CMakeLists.txt b/c/vendor/nanoarrow/CMakeLists.txt index 233f999c7f..978dc14059 100644 --- a/c/vendor/nanoarrow/CMakeLists.txt +++ b/c/vendor/nanoarrow/CMakeLists.txt @@ -21,6 +21,8 @@ add_library( nanoarrow.c ) +target_include_directories(nanoarrow PUBLIC "${CMAKE_CURRENT_LIST_DIR}/..") set_target_properties( nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON ) +add_library(nanoarrow::nanoarrow ALIAS nanoarrow) diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c index 8f2659881b..80b79eee5c 100644 --- a/c/vendor/nanoarrow/nanoarrow.c +++ b/c/vendor/nanoarrow/nanoarrow.c @@ -111,6 +111,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_DECIMAL32: layout->element_size_bits[1] = 32; break; case NANOARROW_TYPE_INTERVAL_MONTHS: @@ -122,6 +123,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_DOUBLE: case NANOARROW_TYPE_INTERVAL_DAY_TIME: + case NANOARROW_TYPE_DECIMAL64: layout->element_size_bits[1] = 64; break; @@ -188,6 +190,24 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW; layout->element_size_bits[1] = 128; + break; + + case NANOARROW_TYPE_LIST_VIEW: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_VIEW_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; + layout->element_size_bits[1] = 32; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE; + layout->buffer_data_type[2] = NANOARROW_TYPE_INT32; + layout->element_size_bits[2] = 32; + break; + case NANOARROW_TYPE_LARGE_LIST_VIEW: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_VIEW_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + layout->element_size_bits[1] = 64; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE; + layout->buffer_data_type[2] = NANOARROW_TYPE_INT64; + layout->element_size_bits[2] = 64; + break; default: break; @@ -326,13 +346,14 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, // Use 32-bit words for portability uint32_t words32[8]; - int n_words32 = decimal->n_words * 2; + memset(words32, 0, sizeof(words32)); + int n_words32 = decimal->n_words > 0 ? decimal->n_words * 2 : 1; NANOARROW_DCHECK(n_words32 <= 8); memset(words32, 0, sizeof(words32)); ShiftAndAdd(value, words32, n_words32); - if (decimal->low_word_index == 0) { + if (_ArrowIsLittleEndian() || n_words32 == 1) { memcpy(decimal->words, words32, sizeof(uint32_t) * n_words32); } else { uint64_t lo; @@ -356,11 +377,31 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, // https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365 ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer) { - NANOARROW_DCHECK(decimal->n_words == 2 || decimal->n_words == 4); + NANOARROW_DCHECK(decimal->n_words == 0 || decimal->n_words == 1 || + decimal->n_words == 2 || decimal->n_words == 4); + + // For the 32-bit case, just use snprintf() + if (decimal->n_words == 0) { + int32_t value; + memcpy(&value, decimal->words, sizeof(int32_t)); + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, 16)); + int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, + (buffer->capacity_bytes - buffer->size_bytes), "%d", value); + if (n_chars <= 0) { + return EINVAL; + } + + buffer->size_bytes += n_chars; + return NANOARROW_OK; + } + int is_negative = ArrowDecimalSign(decimal) < 0; uint64_t words_little_endian[4]; - if (decimal->low_word_index == 0) { + if (decimal->n_words == 0) { + words_little_endian[0] = 0; + memcpy(words_little_endian, decimal->words, sizeof(uint32_t)); + } else if (decimal->low_word_index == 0) { memcpy(words_little_endian, decimal->words, decimal->n_words * sizeof(uint64_t)); } else { for (int i = 0; i < decimal->n_words; i++) { @@ -370,21 +411,33 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decim // We've already made a copy, so negate that if needed if (is_negative) { - uint64_t carry = 1; - for (int i = 0; i < decimal->n_words; i++) { - uint64_t elem = words_little_endian[i]; - elem = ~elem + carry; - carry &= (elem == 0); - words_little_endian[i] = elem; + if (decimal->n_words == 0) { + uint32_t elem = (uint32_t)words_little_endian[0]; + elem = ~elem + 1; + words_little_endian[0] = (int32_t)elem; + } else { + uint64_t carry = 1; + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = words_little_endian[i]; + elem = ~elem + carry; + carry &= (elem == 0); + words_little_endian[i] = elem; + } } } // Find the most significant word that is non-zero int most_significant_elem_idx = -1; - for (int i = decimal->n_words - 1; i >= 0; i--) { - if (words_little_endian[i] != 0) { - most_significant_elem_idx = i; - break; + if (decimal->n_words == 0) { + if (words_little_endian[0] != 0) { + most_significant_elem_idx = 0; + } + } else { + for (int i = decimal->n_words - 1; i >= 0; i--) { + if (words_little_endian[i] != 0) { + most_significant_elem_idx = i; + break; + } } } @@ -462,6 +515,50 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decim return NANOARROW_OK; } + +ArrowErrorCode ArrowDecimalAppendStringToBuffer(const struct ArrowDecimal* decimal, + struct ArrowBuffer* buffer) { + int64_t buffer_size = buffer->size_bytes; + NANOARROW_RETURN_NOT_OK(ArrowDecimalAppendDigitsToBuffer(decimal, buffer)); + int64_t digits_size = buffer->size_bytes - buffer_size; + + if (decimal->scale <= 0) { + // e.g., digits are -12345 and scale is -2 -> -1234500 + // Just add zeros to the end + for (int i = decimal->scale; i < 0; i++) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(buffer, '0')); + } + return NANOARROW_OK; + } + + int is_negative = buffer->data[0] == '-'; + int64_t num_digits = digits_size - is_negative; + if (num_digits <= decimal->scale) { + // e.g., digits are -12345 and scale is 6 -> -0.012345 + // Insert "0." between the (maybe) negative sign and the digits + int64_t num_zeros_after_decimal = decimal->scale - num_digits; + NANOARROW_RETURN_NOT_OK( + ArrowBufferResize(buffer, buffer->size_bytes + num_zeros_after_decimal + 2, 0)); + + uint8_t* digits_start = buffer->data + is_negative; + memmove(digits_start + num_zeros_after_decimal + 2, digits_start, num_digits); + *digits_start++ = '0'; + *digits_start++ = '.'; + for (int i = 0; i < num_zeros_after_decimal; i++) { + *digits_start++ = '0'; + } + + } else { + // e.g., digits are -12345 and scale is 4 -> -1.2345 + // Insert a decimal point before scale digits of output + NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes + 1, 0)); + uint8_t* decimal_point_to_be = buffer->data + buffer->size_bytes - 1 - decimal->scale; + memmove(decimal_point_to_be + 1, decimal_point_to_be, decimal->scale); + *decimal_point_to_be = '.'; + } + + return NANOARROW_OK; +} // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -589,6 +686,10 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType type) { return "+l"; case NANOARROW_TYPE_LARGE_LIST: return "+L"; + case NANOARROW_TYPE_LIST_VIEW: + return "+vl"; + case NANOARROW_TYPE_LARGE_LIST_VIEW: + return "+vL"; case NANOARROW_TYPE_STRUCT: return "+s"; case NANOARROW_TYPE_MAP: @@ -607,6 +708,8 @@ static int ArrowSchemaInitChildrenIfNeeded(struct ArrowSchema* schema, case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_LARGE_LIST_VIEW: NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); ArrowSchemaInit(schema->children[0]); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item")); @@ -735,11 +838,35 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowT char buffer[64]; int n_chars; switch (type) { + case NANOARROW_TYPE_DECIMAL32: + if (decimal_precision > 9) { + return EINVAL; + } + + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,32", decimal_precision, + decimal_scale); + break; + case NANOARROW_TYPE_DECIMAL64: + if (decimal_precision > 18) { + return EINVAL; + } + + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,64", decimal_precision, + decimal_scale); + break; case NANOARROW_TYPE_DECIMAL128: + if (decimal_precision > 38) { + return EINVAL; + } + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale); break; case NANOARROW_TYPE_DECIMAL256: + if (decimal_precision > 76) { + return EINVAL; + } + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision, decimal_scale); break; @@ -1185,6 +1312,12 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, *format_end_out = parse_end; switch (schema_view->decimal_bitwidth) { + case 32: + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL32); + return NANOARROW_OK; + case 64: + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL64); + return NANOARROW_OK; case 128: ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128); return NANOARROW_OK; @@ -1321,6 +1454,24 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, return EINVAL; } + // views + case 'v': + switch (format[2]) { + case 'l': + schema_view->storage_type = NANOARROW_TYPE_LIST_VIEW; + schema_view->type = NANOARROW_TYPE_LIST_VIEW; + *format_end_out = format + 3; + return NANOARROW_OK; + case 'L': + schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST_VIEW; + schema_view->type = NANOARROW_TYPE_LARGE_LIST_VIEW; + *format_end_out = format + 3; + return NANOARROW_OK; + default: + ArrowErrorSet( + error, "Expected view format string +vl or +vL but found '%s'", format); + return EINVAL; + } default: ArrowErrorSet(error, "Expected nested type format string but found '%s'", format); @@ -1621,6 +1772,8 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_STRING: @@ -1649,7 +1802,9 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie return ArrowSchemaViewValidateNChildren(schema_view, 0, error); case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LIST_VIEW: case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_LARGE_LIST_VIEW: case NANOARROW_TYPE_FIXED_SIZE_LIST: return ArrowSchemaViewValidateNChildren(schema_view, 1, error); @@ -1759,7 +1914,7 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, ArrowLayoutInit(&schema_view->layout, schema_view->storage_type); if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) { - schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8; + schema_view->layout.element_size_bits[1] = (int64_t)schema_view->fixed_size * 8; } else if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_LIST) { schema_view->layout.child_size_elements = schema_view->fixed_size; } @@ -1780,6 +1935,8 @@ static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_vi char* out, int64_t n) { const char* type_string = ArrowTypeString(schema_view->type); switch (schema_view->type) { + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string, @@ -2237,6 +2394,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_INTERVAL_MONTHS: @@ -2254,6 +2413,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_LARGE_LIST_VIEW: array->n_buffers = 3; break; @@ -2300,6 +2461,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, private_data->n_variadic_buffers = 0; private_data->variadic_buffers = NULL; private_data->variadic_buffer_sizes = NULL; + private_data->list_view_offset = 0; array->private_data = private_data; array->buffers = (const void**)(private_data->buffer_data); @@ -2831,6 +2993,8 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) continue; case NANOARROW_BUFFER_TYPE_TYPE_ID: case NANOARROW_BUFFER_TYPE_UNION_OFFSET: + case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: + case NANOARROW_BUFFER_TYPE_SIZE: array_view->buffer_views[i].size_bytes = element_size_bytes * length; continue; case NANOARROW_BUFFER_TYPE_VARIADIC_DATA: @@ -2987,12 +3151,19 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length); break; + case NANOARROW_BUFFER_TYPE_SIZE: + min_buffer_size_bytes = element_size_bytes * offset_plus_length; + break; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: // Probably don't want/need to rely on the producer to have allocated an // offsets buffer of length 1 for a zero-size array min_buffer_size_bytes = (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1); break; + case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: + min_buffer_size_bytes = + (offset_plus_length != 0) * element_size_bytes * offset_plus_length; + break; case NANOARROW_BUFFER_TYPE_DATA: min_buffer_size_bytes = _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * @@ -3029,6 +3200,8 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: case NANOARROW_TYPE_MAP: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_LARGE_LIST_VIEW: if (array_view->n_children != 1) { ArrowErrorSet(error, "Expected 1 child of %s array but found %" PRId64 " child arrays", @@ -3308,10 +3481,11 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, if (array_view->children[0]->length < last_offset) { ArrowErrorSet(error, - "Expected child of large list array to have length >= %" PRId64 + "Expected child of %s array to have length >= %" PRId64 " but found array " "with length %" PRId64, - last_offset, array_view->children[0]->length); + ArrowTypeString(array_view->storage_type), last_offset, + array_view->children[0]->length); return EINVAL; } } @@ -3554,12 +3728,53 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, } } + if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW || + array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) { + int64_t child_len = array_view->children[0]->length; + + struct ArrowBufferView offsets, sizes; + offsets.data.data = array_view->buffer_views[1].data.data; + sizes.data.data = array_view->buffer_views[2].data.data; + + for (int64_t i = array_view->offset; i < array_view->length + array_view->offset; + i++) { + int64_t offset, size; + if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW) { + offset = offsets.data.as_int32[i]; + size = sizes.data.as_int32[i]; + } else { + offset = offsets.data.as_int64[i]; + size = sizes.data.as_int64[i]; + } + + if (offset < 0) { + ArrowErrorSet(error, "Invalid negative offset %" PRId64 " at index %" PRId64, + offset, i); + return EINVAL; + } + + if (size < 0) { + ArrowErrorSet(error, "Invalid negative size %" PRId64 " at index %" PRId64, size, + i); + return EINVAL; + } + + if ((offset + size) > child_len) { + ArrowErrorSet(error, + "Offset: %" PRId64 " + size: %" PRId64 " at index: %" PRId64 + " exceeds length of child view: %" PRId64, + offset, size, i, child_len); + return EINVAL; + } + } + } + // Recurse for children for (int64_t i = 0; i < array_view->n_children; i++) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error)); } - // Dictionary valiation not implemented + // Dictionary validation not implemented if (array_view->dictionary != NULL) { NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->dictionary, error)); // TODO: validate the indices diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h index 264aad5b6e..1fcac16ac4 100644 --- a/c/vendor/nanoarrow/nanoarrow.h +++ b/c/vendor/nanoarrow/nanoarrow.h @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef NANOARROW_BUILD_ID_H_INCLUDED -#define NANOARROW_BUILD_ID_H_INCLUDED +#ifndef NANOARROW_CONFIG_H_INCLUDED +#define NANOARROW_CONFIG_H_INCLUDED #define NANOARROW_VERSION_MAJOR 0 -#define NANOARROW_VERSION_MINOR 6 +#define NANOARROW_VERSION_MINOR 7 #define NANOARROW_VERSION_PATCH 0 -#define NANOARROW_VERSION "0.6.0" +#define NANOARROW_VERSION "0.7.0" #define NANOARROW_VERSION_INT \ (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ @@ -29,6 +29,13 @@ #define NANOARROW_NAMESPACE Private +#if !defined(NANOARROW_CXX_NAMESPACE) +#define NANOARROW_CXX_NAMESPACE nanoarrow +#endif + +#define NANOARROW_CXX_NAMESPACE_BEGIN namespace NANOARROW_CXX_NAMESPACE { +#define NANOARROW_CXX_NAMESPACE_END } + #endif // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file @@ -181,14 +188,14 @@ struct ArrowArrayStream { NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL) #if defined(NANOARROW_DEBUG) -#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) { \ - ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d(%s)\n* %s:%d", EXPR_STR, \ - NAME, strerror(NAME), __FILE__, __LINE__); \ - return NAME; \ - } \ +#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR, \ + NAME, __FILE__, __LINE__); \ + return NAME; \ + } \ } while (0) #else #define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \ @@ -485,7 +492,11 @@ enum ArrowType { NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO, NANOARROW_TYPE_RUN_END_ENCODED, NANOARROW_TYPE_BINARY_VIEW, - NANOARROW_TYPE_STRING_VIEW + NANOARROW_TYPE_STRING_VIEW, + NANOARROW_TYPE_DECIMAL32, + NANOARROW_TYPE_DECIMAL64, + NANOARROW_TYPE_LIST_VIEW, + NANOARROW_TYPE_LARGE_LIST_VIEW, }; /// \brief Get a string value of an enum ArrowType value @@ -542,6 +553,10 @@ static inline const char* ArrowTypeString(enum ArrowType type) { return "interval_months"; case NANOARROW_TYPE_INTERVAL_DAY_TIME: return "interval_day_time"; + case NANOARROW_TYPE_DECIMAL32: + return "decimal32"; + case NANOARROW_TYPE_DECIMAL64: + return "decimal64"; case NANOARROW_TYPE_DECIMAL128: return "decimal128"; case NANOARROW_TYPE_DECIMAL256: @@ -578,6 +593,10 @@ static inline const char* ArrowTypeString(enum ArrowType type) { return "binary_view"; case NANOARROW_TYPE_STRING_VIEW: return "string_view"; + case NANOARROW_TYPE_LIST_VIEW: + return "list_view"; + case NANOARROW_TYPE_LARGE_LIST_VIEW: + return "large_list_view"; default: return NULL; } @@ -656,7 +675,9 @@ enum ArrowBufferType { NANOARROW_BUFFER_TYPE_DATA_OFFSET, NANOARROW_BUFFER_TYPE_DATA, NANOARROW_BUFFER_TYPE_VARIADIC_DATA, - NANOARROW_BUFFER_TYPE_VARIADIC_SIZE + NANOARROW_BUFFER_TYPE_VARIADIC_SIZE, + NANOARROW_BUFFER_TYPE_VIEW_OFFSET, + NANOARROW_BUFFER_TYPE_SIZE, }; /// \brief The maximum number of fixed buffers in an ArrowArrayView or ArrowLayout @@ -890,6 +911,9 @@ struct ArrowArrayPrivateData { // Size of each variadic buffer in bytes int64_t* variadic_buffer_sizes; + + // The current offset used to build list views + int64_t list_view_offset; }; /// \brief A representation of an interval. @@ -922,7 +946,8 @@ static inline void ArrowIntervalInit(struct ArrowInterval* interval, /// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), /// or ArrowDecimalSetBytes256(). struct ArrowDecimal { - /// \brief An array of 64-bit integers of n_words length defined in native-endian order + /// \brief An array of 64-bit integers of n_words length defined in native-endian order. + /// For a 32-bit decimal value, index 0 will be a 32-bit integer value. uint64_t words[4]; /// \brief The number of significant digits this decimal number can represent @@ -931,7 +956,8 @@ struct ArrowDecimal { /// \brief The number of digits after the decimal point. This can be negative. int32_t scale; - /// \brief The number of words in the words array + /// \brief The number of 64-bit words in the words array. For the special case of a + /// 32-bit decimal value, this will be 0. int n_words; /// \brief Cached value used by the implementation @@ -948,13 +974,14 @@ static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwid memset(decimal->words, 0, sizeof(decimal->words)); decimal->precision = precision; decimal->scale = scale; + // n_words will be 0 for bitwidth == 32 decimal->n_words = (int)(bitwidth / 8 / sizeof(uint64_t)); if (_ArrowIsLittleEndian()) { decimal->low_word_index = 0; - decimal->high_word_index = decimal->n_words - 1; + decimal->high_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0; } else { - decimal->low_word_index = decimal->n_words - 1; + decimal->low_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0; decimal->high_word_index = 0; } } @@ -965,6 +992,12 @@ static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwid /// within the signed 64-bit integer range (A precision less than or equal /// to 18 is sufficiently small). static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) { + if (decimal->n_words == 0) { + int32_t value; + memcpy(&value, decimal->words, sizeof(int32_t)); + return value; + } + return (int64_t)decimal->words[decimal->low_word_index]; } @@ -972,18 +1005,32 @@ static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decima /// \ingroup nanoarrow-utils static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal, uint8_t* out) { - memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); + if (decimal->n_words == 0) { + memcpy(out, decimal->words, sizeof(int32_t)); + } else { + memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); + } } /// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise /// \ingroup nanoarrow-utils static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) { - return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); + if (decimal->n_words == 0) { + return ArrowDecimalGetIntUnsafe(decimal) >= 0 ? 1 : -1; + } else { + return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); + } } /// \brief Sets the integer value of this decimal /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) { + if (decimal->n_words == 0) { + int32_t value32 = (int32_t)value; + memcpy(decimal->words, &value32, sizeof(int32_t)); + return; + } + if (value < 0) { memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); } else { @@ -996,6 +1043,14 @@ static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t valu /// \brief Negate the value of this decimal in place /// \ingroup nanoarrow-utils static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { + if (decimal->n_words == 0) { + int32_t value; + memcpy(&value, decimal->words, sizeof(int32_t)); + value = -value; + memcpy(decimal->words, &value, sizeof(int32_t)); + return; + } + uint64_t carry = 1; if (decimal->low_word_index == 0) { @@ -1019,7 +1074,11 @@ static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, const uint8_t* value) { - memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); + if (decimal->n_words == 0) { + memcpy(decimal->words, value, sizeof(int32_t)); + } else { + memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); + } } #ifdef __cplusplus @@ -1079,6 +1138,8 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, #define ArrowDecimalSetDigits NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) #define ArrowDecimalAppendDigitsToBuffer \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendDigitsToBuffer) +#define ArrowDecimalAppendStringToBuffer \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalAppendStringToBuffer) #define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit) #define ArrowSchemaInitFromType \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) @@ -1168,6 +1229,20 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, #endif +#if (defined _WIN32 || defined __CYGWIN__) && defined(NANOARROW_BUILD_DLL) +#if defined(NANOARROW_EXPORT_DLL) +#define NANOARROW_DLL __declspec(dllexport) +#else +#define NANOARROW_DLL __declspec(dllimport) +#endif // defined(NANOARROW_EXPORT_DLL) +#elif !defined(NANOARROW_DLL) +#if defined(__GNUC__) && __GNUC__ >= 4 +#define NANOARROW_DLL __attribute__((visibility("default"))) +#else +#define NANOARROW_DLL +#endif // __GNUC__ >= 4 +#endif + #ifdef __cplusplus extern "C" { #endif @@ -1191,19 +1266,19 @@ extern "C" { /// @{ /// \brief Allocate like malloc() -void* ArrowMalloc(int64_t size); +NANOARROW_DLL void* ArrowMalloc(int64_t size); /// \brief Reallocate like realloc() -void* ArrowRealloc(void* ptr, int64_t size); +NANOARROW_DLL void* ArrowRealloc(void* ptr, int64_t size); /// \brief Free a pointer allocated using ArrowMalloc() or ArrowRealloc(). -void ArrowFree(void* ptr); +NANOARROW_DLL void ArrowFree(void* ptr); /// \brief Return the default allocator /// /// The default allocator uses ArrowMalloc(), ArrowRealloc(), and /// ArrowFree(). -struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); +NANOARROW_DLL struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); /// \brief Create a custom deallocator /// @@ -1211,8 +1286,8 @@ struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); /// attach a custom deallocator to an ArrowBuffer. This may be used to /// avoid copying an existing buffer that was not allocated using the /// infrastructure provided here (e.g., by an R or Python object). -struct ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback, - void* private_data); +NANOARROW_DLL struct ArrowBufferAllocator ArrowBufferDeallocator( + ArrowBufferDeallocatorCallback, void* private_data); /// @} @@ -1292,8 +1367,8 @@ static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream /// \brief Set the contents of an error using printf syntax. /// /// If error is NULL, this function does nothing and returns NANOARROW_OK. -NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, - const char* fmt, ...); +NANOARROW_DLL NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, + const char* fmt, ...); /// @} @@ -1302,24 +1377,29 @@ NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, /// @{ /// \brief Return a version string in the form "major.minor.patch" -const char* ArrowNanoarrowVersion(void); +NANOARROW_DLL const char* ArrowNanoarrowVersion(void); /// \brief Return an integer that can be used to compare versions sequentially -int ArrowNanoarrowVersionInt(void); +NANOARROW_DLL int ArrowNanoarrowVersionInt(void); /// \brief Initialize a description of buffer arrangements from a storage type -void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type); +NANOARROW_DLL void ArrowLayoutInit(struct ArrowLayout* layout, + enum ArrowType storage_type); /// \brief Create a string view from a null-terminated string static inline struct ArrowStringView ArrowCharView(const char* value); /// \brief Sets the integer value of an ArrowDecimal from a string -ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, - struct ArrowStringView value); +NANOARROW_DLL ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, + struct ArrowStringView value); /// \brief Get the integer value of an ArrowDecimal as string -ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, - struct ArrowBuffer* buffer); +NANOARROW_DLL ArrowErrorCode ArrowDecimalAppendDigitsToBuffer( + const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer); + +/// \brief Get the decimal value of an ArrowDecimal as a string +NANOARROW_DLL ArrowErrorCode ArrowDecimalAppendStringToBuffer( + const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer); /// \brief Get the half float value of a float static inline uint16_t ArrowFloatToHalfFloat(float value); @@ -1348,7 +1428,7 @@ static inline int64_t ArrowResolveChunk64(int64_t index, const int64_t* offsets, /// Initializes the fields and release callback of schema_out. Caller /// is responsible for calling the schema->release callback if /// NANOARROW_OK is returned. -void ArrowSchemaInit(struct ArrowSchema* schema); +NANOARROW_DLL void ArrowSchemaInit(struct ArrowSchema* schema); /// \brief Initialize an ArrowSchema from an ArrowType /// @@ -1356,7 +1436,8 @@ void ArrowSchemaInit(struct ArrowSchema* schema); /// ArrowSchemaSetType() for the common case of constructing an /// unparameterized type. The caller is responsible for calling the schema->release /// callback if NANOARROW_OK is returned. -ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type); +NANOARROW_DLL ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, + enum ArrowType type); /// \brief Get a human-readable summary of a Schema /// @@ -1364,8 +1445,8 @@ ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowTyp /// and returns the number of characters required for the output if /// n were sufficiently large. If recursive is non-zero, the result will /// also include children. -int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t n, - char recursive); +NANOARROW_DLL int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, + int64_t n, char recursive); /// \brief Set the format field of a schema from an ArrowType /// @@ -1375,14 +1456,16 @@ int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out, int64_t /// allocated, initialized, and named; however, the caller must /// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized /// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, + enum ArrowType type); /// \brief Set the format field and initialize children of a struct schema /// /// The specified number of children are initialized; however, the caller is responsible /// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child. /// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, + int64_t n_children); /// \brief Set the format field of a fixed-size schema /// @@ -1392,17 +1475,20 @@ ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_ch /// allocated, initialized, and named; however, the caller must /// ArrowSchemaSetType() the first child. Schema must have been initialized using /// ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, - enum ArrowType type, int32_t fixed_size); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, + enum ArrowType type, + int32_t fixed_size); /// \brief Set the format field of a decimal schema /// /// Returns EINVAL for scale <= 0 or for type that is not -/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been -/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, - int32_t decimal_precision, - int32_t decimal_scale); +/// NANOARROW_TYPE_DECIMAL32, NANOARROW_TYPE_DECIMAL64, NANOARROW_TYPE_DECIMAL128 or +/// NANOARROW_TYPE_DECIMAL256. Schema must have been initialized using +/// ArrowSchemaInit() or ArrowSchemaDeepCopy(). +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, + enum ArrowType type, + int32_t decimal_precision, + int32_t decimal_scale); /// \brief Set the format field of a run-end encoded schema /// @@ -1412,8 +1498,8 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowT /// The caller must call `ArrowSchemaSetTypeXXX(schema->children[1])` to /// set the value type. Note that when building arrays using the `ArrowArrayAppendXXX()` /// functions, the run-end encoded array's logical length must be updated manually. -ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, - enum ArrowType run_end_type); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, + enum ArrowType run_end_type); /// \brief Set the format field of a time, timestamp, or duration schema /// @@ -1422,55 +1508,60 @@ ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema, /// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The /// timezone parameter must be NULL for a non-timestamp type. Schema must have been /// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, - enum ArrowTimeUnit time_unit, - const char* timezone); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, + enum ArrowType type, + enum ArrowTimeUnit time_unit, + const char* timezone); /// \brief Set the format field of a union schema /// /// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION /// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are /// allocated, and initialized. -ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, - int64_t n_children); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, + enum ArrowType type, + int64_t n_children); /// \brief Make a (recursive) copy of a schema /// /// Allocates and copies fields of schema into schema_out. -ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, - struct ArrowSchema* schema_out); +NANOARROW_DLL ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, + struct ArrowSchema* schema_out); /// \brief Copy format into schema->format /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, + const char* format); /// \brief Copy name into schema->name /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, + const char* name); /// \brief Copy metadata into schema->metadata /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy. -ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata); +NANOARROW_DLL ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, + const char* metadata); /// \brief Allocate the schema->children array /// /// Includes the memory for each child struct ArrowSchema. /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, - int64_t n_children); +NANOARROW_DLL ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, + int64_t n_children); /// \brief Allocate the schema->dictionary member /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema); +NANOARROW_DLL ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema); /// @} @@ -1494,49 +1585,51 @@ struct ArrowMetadataReader { }; /// \brief Initialize an ArrowMetadataReader -ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, - const char* metadata); +NANOARROW_DLL ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, + const char* metadata); /// \brief Read the next key/value pair from an ArrowMetadataReader -ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, - struct ArrowStringView* key_out, - struct ArrowStringView* value_out); +NANOARROW_DLL ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, + struct ArrowStringView* key_out, + struct ArrowStringView* value_out); /// \brief The number of bytes in in a key/value metadata string -int64_t ArrowMetadataSizeOf(const char* metadata); +NANOARROW_DLL int64_t ArrowMetadataSizeOf(const char* metadata); /// \brief Check for a key in schema metadata -char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key); +NANOARROW_DLL char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key); /// \brief Extract a value from schema metadata /// /// If key does not exist in metadata, value_out is unmodified -ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, - struct ArrowStringView* value_out); +NANOARROW_DLL ArrowErrorCode ArrowMetadataGetValue(const char* metadata, + struct ArrowStringView key, + struct ArrowStringView* value_out); /// \brief Initialize a builder for schema metadata from key/value pairs /// /// metadata can be an existing metadata string or NULL to initialize /// an empty metadata string. -ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata); +NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, + const char* metadata); /// \brief Append a key/value pair to a buffer containing serialized metadata -ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, - struct ArrowStringView key, - struct ArrowStringView value); +NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); /// \brief Set a key/value pair to a buffer containing serialized metadata /// /// Ensures that the only entry for key in the metadata is set to value. /// This function maintains the existing position of (the first instance of) /// key if present in the data. -ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, - struct ArrowStringView key, - struct ArrowStringView value); +NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); /// \brief Remove a key from a buffer containing serialized metadata -ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, - struct ArrowStringView key); +NANOARROW_DLL ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, + struct ArrowStringView key); /// @} @@ -1634,9 +1727,9 @@ struct ArrowSchemaView { }; /// \brief Initialize an ArrowSchemaView -ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, - const struct ArrowSchema* schema, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, + const struct ArrowSchema* schema, + struct ArrowError* error); /// @} @@ -1852,24 +1945,24 @@ static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); /// Initializes the fields and release callback of array. Caller /// is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, - enum ArrowType storage_type); +NANOARROW_DLL ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, + enum ArrowType storage_type); /// \brief Initialize the contents of an ArrowArray from an ArrowSchema /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, - const struct ArrowSchema* schema, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, + const struct ArrowSchema* schema, + struct ArrowError* error); /// \brief Initialize the contents of an ArrowArray from an ArrowArrayView /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, - const struct ArrowArrayView* array_view, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowArrayInitFromArrayView( + struct ArrowArray* array, const struct ArrowArrayView* array_view, + struct ArrowError* error); /// \brief Allocate the array->children array /// @@ -1877,7 +1970,8 @@ ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, /// whose members are marked as released and may be subsequently initialized /// with ArrowArrayInitFromType() or moved from an existing ArrowArray. /// schema must have been allocated using ArrowArrayInitFromType(). -ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children); +NANOARROW_DLL ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, + int64_t n_children); /// \brief Allocate the array->dictionary member /// @@ -1885,18 +1979,19 @@ ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_ch /// is marked as released and may be subsequently initialized /// with ArrowArrayInitFromType() or moved from an existing ArrowArray. /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array); +NANOARROW_DLL ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array); /// \brief Set the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap); +NANOARROW_DLL void ArrowArraySetValidityBitmap(struct ArrowArray* array, + struct ArrowBitmap* bitmap); /// \brief Set a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, - struct ArrowBuffer* buffer); +NANOARROW_DLL ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, + struct ArrowBuffer* buffer); /// \brief Get the validity bitmap of an ArrowArray /// @@ -1922,8 +2017,8 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); /// child array sizes for non-fixed-size arrays), recursively reserve space for /// additional elements. This is useful for reducing the number of reallocations /// that occur using the item-wise appenders. -ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, - int64_t additional_size_elements); +NANOARROW_DLL ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, + int64_t additional_size_elements); /// \brief Append a null value to an array static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n); @@ -2021,8 +2116,8 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); /// into array->buffers and checks the actual size of the buffers /// against the expected size based on the final length. /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, + struct ArrowError* error); /// \brief Finish building an ArrowArray with explicit validation /// @@ -2031,9 +2126,9 @@ ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, /// buffer data access is not possible or more validation (i.e., /// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or /// corruptible source. -ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, - enum ArrowValidationLevel validation_level, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowArrayFinishBuilding( + struct ArrowArray* array, enum ArrowValidationLevel validation_level, + struct ArrowError* error); /// @} @@ -2044,8 +2139,8 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, /// @{ /// \brief Initialize the contents of an ArrowArrayView -void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, - enum ArrowType storage_type); +NANOARROW_DLL void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, + enum ArrowType storage_type); /// \brief Move an ArrowArrayView /// @@ -2055,32 +2150,34 @@ static inline void ArrowArrayViewMove(struct ArrowArrayView* src, struct ArrowArrayView* dst); /// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema -ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, - const struct ArrowSchema* schema, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode +ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, + const struct ArrowSchema* schema, struct ArrowError* error); /// \brief Allocate the array_view->children array /// /// Includes the memory for each child struct ArrowArrayView -ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, - int64_t n_children); +NANOARROW_DLL ArrowErrorCode +ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, int64_t n_children); /// \brief Allocate array_view->dictionary -ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view); +NANOARROW_DLL ArrowErrorCode +ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view); /// \brief Set data-independent buffer sizes from length -void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length); +NANOARROW_DLL void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, + int64_t length); /// \brief Set buffer sizes and data pointers from an ArrowArray -ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, - const struct ArrowArray* array, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, + const struct ArrowArray* array, + struct ArrowError* error); /// \brief Set buffer sizes and data pointers from an ArrowArray except for those /// that require dereferencing buffer content. -ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, - const struct ArrowArray* array, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode +ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, + const struct ArrowArray* array, struct ArrowError* error); /// \brief Get the number of buffers /// @@ -2132,9 +2229,9 @@ static inline int64_t ArrowArrayViewGetBufferElementSizeBits( /// and sizes otherwise, you may wish to perform checks at a different level. See /// documentation for ArrowValidationLevel for the details of checks performed /// at each level. -ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, - enum ArrowValidationLevel validation_level, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowArrayViewValidate( + struct ArrowArrayView* array_view, enum ArrowValidationLevel validation_level, + struct ArrowError* error); /// \brief Compare two ArrowArrayView objects for equality /// @@ -2144,13 +2241,13 @@ ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, /// error if error is non-NULL. /// /// Returns NANOARROW_OK if the comparison completed successfully. -ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, - const struct ArrowArrayView* expected, - enum ArrowCompareLevel level, int* out, - struct ArrowError* reason); +NANOARROW_DLL ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, + const struct ArrowArrayView* expected, + enum ArrowCompareLevel level, int* out, + struct ArrowError* reason); /// \brief Reset the contents of an ArrowArrayView and frees resources -void ArrowArrayViewReset(struct ArrowArrayView* array_view); +NANOARROW_DLL void ArrowArrayViewReset(struct ArrowArrayView* array_view); /// \brief Check for a null element in an ArrowArrayView static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView* array_view, @@ -2229,8 +2326,8 @@ static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* a /// This function moves the ownership of schema to the array_stream. If /// this function returns NANOARROW_OK, the caller is responsible for /// releasing the ArrowArrayStream. -ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, - struct ArrowSchema* schema, int64_t n_arrays); +NANOARROW_DLL ArrowErrorCode ArrowBasicArrayStreamInit( + struct ArrowArrayStream* array_stream, struct ArrowSchema* schema, int64_t n_arrays); /// \brief Set the ith ArrowArray in this ArrowArrayStream. /// @@ -2239,16 +2336,16 @@ ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, /// be greater than zero and less than the value of n_arrays passed in /// ArrowBasicArrayStreamInit(). Callers are not required to fill all /// n_arrays members (i.e., n_arrays is a maximum bound). -void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, - struct ArrowArray* array); +NANOARROW_DLL void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, + int64_t i, struct ArrowArray* array); /// \brief Validate the contents of this ArrowArrayStream /// /// array_stream must have been initialized with ArrowBasicArrayStreamInit(). /// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray() /// to validate the contents of the arrays. -ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream* array_stream, - struct ArrowError* error); +NANOARROW_DLL ArrowErrorCode ArrowBasicArrayStreamValidate( + const struct ArrowArrayStream* array_stream, struct ArrowError* error); /// @} @@ -2893,6 +2990,9 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, return; } + NANOARROW_DCHECK(bitmap->buffer.data != NULL); + NANOARROW_DCHECK(values != NULL); + const int8_t* values_cursor = values; int64_t n_remaining = n_values; int64_t out_i_cursor = bitmap->size_bits; @@ -2940,6 +3040,9 @@ static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, return; } + NANOARROW_DCHECK(bitmap->buffer.data != NULL); + NANOARROW_DCHECK(values != NULL); + const int32_t* values_cursor = values; int64_t n_remaining = n_values; int64_t out_i_cursor = bitmap->size_bits; @@ -3283,6 +3386,9 @@ static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* a case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: case NANOARROW_BUFFER_TYPE_VALIDITY: continue; + case NANOARROW_BUFFER_TYPE_SIZE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + continue; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: // Append the current value at the end of the offset buffer for each element NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); @@ -3303,7 +3409,10 @@ static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* a NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n)); } continue; - + case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + continue; case NANOARROW_BUFFER_TYPE_TYPE_ID: case NANOARROW_BUFFER_TYPE_UNION_OFFSET: // These cases return above @@ -3693,6 +3802,22 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { + case NANOARROW_TYPE_DECIMAL32: + if (value->n_words != 0) { + return EINVAL; + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, sizeof(uint32_t))); + break; + } + case NANOARROW_TYPE_DECIMAL64: + if (value->n_words != 1) { + return EINVAL; + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, sizeof(uint64_t))); + break; + } case NANOARROW_TYPE_DECIMAL128: if (value->n_words != 2) { return EINVAL; @@ -3734,6 +3859,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { if (child_length > INT32_MAX) { return EOVERFLOW; } + NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); break; @@ -3749,6 +3875,31 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { return EINVAL; } break; + case NANOARROW_TYPE_LIST_VIEW: { + child_length = array->children[0]->length; + if (child_length > INT32_MAX) { + return EOVERFLOW; + } + + const int32_t last_valid_offset = (int32_t)private_data->list_view_offset; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), last_valid_offset)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 2), (int32_t)child_length - last_valid_offset)); + private_data->list_view_offset = child_length; + break; + } + case NANOARROW_TYPE_LARGE_LIST_VIEW: { + child_length = array->children[0]->length; + const int64_t last_valid_offset = private_data->list_view_offset; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), last_valid_offset)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, 2), + child_length - last_valid_offset)); + private_data->list_view_offset = child_length; + break; + } + case NANOARROW_TYPE_STRUCT: for (int64_t i = 0; i < array->n_children; i++) { child_length = array->children[i]->length; @@ -4023,8 +4174,10 @@ static inline int64_t ArrowArrayViewListChildOffset( const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LIST_VIEW: return array_view->buffer_views[1].data.as_int32[i]; case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_LARGE_LIST_VIEW: return array_view->buffer_views[1].data.as_int64[i]; default: return -1; @@ -4161,7 +4314,7 @@ static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( case NANOARROW_TYPE_BINARY: view.data = data_view + offsets_view->data.as_int32[i]; view.size_bytes = - offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + (int64_t)offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; break; case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_LARGE_BINARY: @@ -4201,7 +4354,7 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( case NANOARROW_TYPE_STRING: case NANOARROW_TYPE_BINARY: view.size_bytes = - offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + (int64_t)offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; break; case NANOARROW_TYPE_LARGE_STRING: @@ -4231,23 +4384,25 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( static inline void ArrowArrayViewGetIntervalUnsafe( const struct ArrowArrayView* array_view, int64_t i, struct ArrowInterval* out) { const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; + const int64_t offset = array_view->offset; + const int64_t index = offset + i; switch (array_view->storage_type) { case NANOARROW_TYPE_INTERVAL_MONTHS: { const size_t size = sizeof(int32_t); - memcpy(&out->months, data_view + i * size, sizeof(int32_t)); + memcpy(&out->months, data_view + index * size, sizeof(int32_t)); break; } case NANOARROW_TYPE_INTERVAL_DAY_TIME: { const size_t size = sizeof(int32_t) + sizeof(int32_t); - memcpy(&out->days, data_view + i * size, sizeof(int32_t)); - memcpy(&out->ms, data_view + i * size + 4, sizeof(int32_t)); + memcpy(&out->days, data_view + index * size, sizeof(int32_t)); + memcpy(&out->ms, data_view + index * size + 4, sizeof(int32_t)); break; } case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: { const size_t size = sizeof(int32_t) + sizeof(int32_t) + sizeof(int64_t); - memcpy(&out->months, data_view + i * size, sizeof(int32_t)); - memcpy(&out->days, data_view + i * size + 4, sizeof(int32_t)); - memcpy(&out->ns, data_view + i * size + 8, sizeof(int64_t)); + memcpy(&out->months, data_view + index * size, sizeof(int32_t)); + memcpy(&out->days, data_view + index * size + 4, sizeof(int32_t)); + memcpy(&out->ns, data_view + index * size + 8, sizeof(int64_t)); break; } default: @@ -4260,6 +4415,12 @@ static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* a i += array_view->offset; const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; switch (array_view->storage_type) { + case NANOARROW_TYPE_DECIMAL32: + ArrowDecimalSetBytes(out, data_view + (i * 4)); + break; + case NANOARROW_TYPE_DECIMAL64: + ArrowDecimalSetBytes(out, data_view + (i * 8)); + break; case NANOARROW_TYPE_DECIMAL128: ArrowDecimalSetBytes(out, data_view + (i * 16)); break; diff --git a/c/vendor/nanoarrow/nanoarrow.hpp b/c/vendor/nanoarrow/nanoarrow.hpp index 16c2e55b9f..6f224f66f5 100644 --- a/c/vendor/nanoarrow/nanoarrow.hpp +++ b/c/vendor/nanoarrow/nanoarrow.hpp @@ -15,16 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include -#include -#include -#include - -#include "nanoarrow.h" - -#ifndef NANOARROW_HPP_INCLUDED -#define NANOARROW_HPP_INCLUDED - /// \defgroup nanoarrow_hpp Nanoarrow C++ Helpers /// /// The utilities provided in this file are intended to support C++ users @@ -32,7 +22,38 @@ /// and error handling can be used with nanoarrow data structures. /// These utilities are not intended to mirror the nanoarrow C API. -namespace nanoarrow { + + + + + + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_EXCEPTION_HPP_INCLUDED +#define NANOARROW_HPP_EXCEPTION_HPP_INCLUDED + +#include +#include + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN /// \defgroup nanoarrow_hpp-errors Error handling helpers /// @@ -83,6 +104,37 @@ class Exception : public std::exception { /// @} +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_OPERATORS_HPP_INCLUDED +#define NANOARROW_HPP_OPERATORS_HPP_INCLUDED + +#include +#include +#include + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + namespace literals { /// \defgroup nanoarrow_hpp-string_view_helpers ArrowStringView helpers @@ -92,14 +144,57 @@ namespace literals { /// @{ /// \brief User literal operator allowing ArrowStringView construction like "str"_asv -inline ArrowStringView operator"" _asv(const char* data, std::size_t size_bytes) { +#if !defined(__clang__) && (defined(__GNUC__) && __GNUC__ < 6) +inline ArrowStringView operator"" _asv(const char* data, size_t size_bytes) { + return {data, static_cast(size_bytes)}; +} +#else +inline ArrowStringView operator""_asv(const char* data, size_t size_bytes) { return {data, static_cast(size_bytes)}; } +#endif +// N.B. older GCC requires the space above, newer Clang forbids the space // @} } // namespace literals +NANOARROW_CXX_NAMESPACE_END + +/// \brief Equality comparison operator between ArrowStringView +/// \ingroup nanoarrow_hpp-string_view_helpers +inline bool operator==(ArrowStringView l, ArrowStringView r) { + if (l.size_bytes != r.size_bytes) return false; + return memcmp(l.data, r.data, l.size_bytes) == 0; +} + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_UNIQUE_HPP_INCLUDED +#define NANOARROW_HPP_UNIQUE_HPP_INCLUDED + +#include + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + namespace internal { /// \defgroup nanoarrow_hpp-unique_base Base classes for Unique wrappers @@ -218,13 +313,13 @@ class Unique { public: /// \brief Construct an invalid instance of T holding no resources Unique() { - std::memset(&data_, 0, sizeof(data_)); + memset(&data_, 0, sizeof(data_)); init_pointer(&data_); } /// \brief Move and take ownership of data Unique(T* data) { - std::memset(&data_, 0, sizeof(data_)); + memset(&data_, 0, sizeof(data_)); move_pointer(data, &data_); } @@ -265,15 +360,6 @@ class Unique { T data_; }; -template -static inline void DeallocateWrappedBuffer(struct ArrowBufferAllocator* allocator, - uint8_t* ptr, int64_t size) { - NANOARROW_UNUSED(ptr); - NANOARROW_UNUSED(size); - auto obj = reinterpret_cast(allocator->private_data); - delete obj; -} - /// @} } // namespace internal @@ -306,50 +392,34 @@ using UniqueArrayView = internal::Unique; /// @} -/// \defgroup nanoarrow_hpp-buffer Buffer helpers -/// -/// Helpers to wrap buffer-like C++ objects as ArrowBuffer objects that can -/// be used to build ArrowArray objects. -/// -/// @{ +NANOARROW_CXX_NAMESPACE_END -/// \brief Initialize a buffer wrapping an arbitrary C++ object -/// -/// Initializes a buffer with a release callback that deletes the moved obj -/// when ArrowBufferReset is called. This version is useful for wrapping -/// an object whose .data() member is missing or unrelated to the buffer -/// value that is destined for a the buffer of an ArrowArray. T must be movable. -template -static inline void BufferInitWrapped(struct ArrowBuffer* buffer, T obj, - const uint8_t* data, int64_t size_bytes) { - T* obj_moved = new T(std::move(obj)); - buffer->data = const_cast(data); - buffer->size_bytes = size_bytes; - buffer->capacity_bytes = 0; - buffer->allocator = - ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); -} +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. -/// \brief Initialize a buffer wrapping a C++ sequence -/// -/// Specifically, this uses obj.data() to set the buffer address and -/// obj.size() * sizeof(T::value_type) to set the buffer size. This works -/// for STL containers like std::vector, std::array, and std::string. -/// This function moves obj and ensures it is deleted when ArrowBufferReset -/// is called. -template -void BufferInitSequence(struct ArrowBuffer* buffer, T obj) { - // Move before calling .data() (matters sometimes). - T* obj_moved = new T(std::move(obj)); - buffer->data = - const_cast(reinterpret_cast(obj_moved->data())); - buffer->size_bytes = obj_moved->size() * sizeof(typename T::value_type); - buffer->capacity_bytes = 0; - buffer->allocator = - ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); -} +#ifndef NANOARROW_HPP_ARRAY_STREAM_HPP_INCLUDED +#define NANOARROW_HPP_ARRAY_STREAM_HPP_INCLUDED -/// @} +#include + + + +NANOARROW_CXX_NAMESPACE_BEGIN /// \defgroup nanoarrow_hpp-array-stream ArrayStream helpers /// @@ -444,11 +514,6 @@ class ArrayStreamFactory { /// /// This class can be constructed from an struct ArrowSchema and implements a default /// get_next() method that always marks the output ArrowArray as released. -/// -/// DEPRECATED (0.4.0): Early versions of nanoarrow allowed subclasses to override -/// get_schema(), get_next(), and get_last_error(). This functionality will be removed -/// in a future release: use the pattern documented in ArrayStreamFactory to create -/// custom ArrowArrayStream implementations. class EmptyArrayStream { public: /// \brief Create an EmptyArrayStream from an ArrowSchema @@ -464,43 +529,22 @@ class EmptyArrayStream { ArrayStreamFactory::InitArrayStream(impl, out); } - /// \brief Create an empty UniqueArrayStream from a struct ArrowSchema - /// - /// DEPRECATED (0.4.0): Use the constructor + ToArrayStream() to export an - /// EmptyArrayStream to an ArrowArrayStream consumer. - static UniqueArrayStream MakeUnique(struct ArrowSchema* schema) { - UniqueArrayStream stream; - EmptyArrayStream(schema).ToArrayStream(stream.get()); - return stream; - } - - virtual ~EmptyArrayStream() {} - - protected: + private: UniqueSchema schema_; struct ArrowError error_; - void MakeStream(struct ArrowArrayStream* stream) { ToArrayStream(stream); } + friend class ArrayStreamFactory; - virtual int get_schema(struct ArrowSchema* schema) { + int GetSchema(struct ArrowSchema* schema) { return ArrowSchemaDeepCopy(schema_.get(), schema); } - virtual int get_next(struct ArrowArray* array) { + int GetNext(struct ArrowArray* array) { array->release = nullptr; return NANOARROW_OK; } - virtual const char* get_last_error() { return error_.message; } - - private: - friend class ArrayStreamFactory; - - int GetSchema(struct ArrowSchema* schema) { return get_schema(schema); } - - int GetNext(struct ArrowArray* array) { return get_next(array); } - - const char* GetLastError() { return get_last_error(); } + const char* GetLastError() { return error_.message; } }; /// \brief Implementation of an ArrowArrayStream backed by a vector of UniqueArray objects @@ -526,28 +570,6 @@ class VectorArrayStream { ArrayStreamFactory::InitArrayStream(impl, out); } - /// \brief Create a UniqueArrowArrayStream from an existing array - /// - /// DEPRECATED (0.4.0): Use the constructors + ToArrayStream() to export a - /// VectorArrayStream to an ArrowArrayStream consumer. - static UniqueArrayStream MakeUnique(struct ArrowSchema* schema, - struct ArrowArray* array) { - UniqueArrayStream stream; - VectorArrayStream(schema, array).ToArrayStream(stream.get()); - return stream; - } - - /// \brief Create a UniqueArrowArrayStream from existing arrays - /// - /// DEPRECATED (0.4.0): Use the constructor + ToArrayStream() to export a - /// VectorArrayStream to an ArrowArrayStream consumer. - static UniqueArrayStream MakeUnique(struct ArrowSchema* schema, - std::vector arrays) { - UniqueArrayStream stream; - VectorArrayStream(schema, std::move(arrays)).ToArrayStream(stream.get()); - return stream; - } - private: int64_t offset_; UniqueSchema schema_; @@ -574,25 +596,146 @@ class VectorArrayStream { /// @} +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_BUFFER_HPP_INCLUDED +#define NANOARROW_HPP_BUFFER_HPP_INCLUDED + +#include +#include +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + +namespace internal { +template +static inline void DeallocateWrappedBuffer(struct ArrowBufferAllocator* allocator, + uint8_t* ptr, int64_t size) { + NANOARROW_UNUSED(ptr); + NANOARROW_UNUSED(size); + auto obj = reinterpret_cast(allocator->private_data); + delete obj; +} +} // namespace internal + +/// \defgroup nanoarrow_hpp-buffer Buffer helpers +/// +/// Helpers to wrap buffer-like C++ objects as ArrowBuffer objects that can +/// be used to build ArrowArray objects. +/// +/// @{ + +/// \brief Initialize a buffer wrapping an arbitrary C++ object +/// +/// Initializes a buffer with a release callback that deletes the moved obj +/// when ArrowBufferReset is called. This version is useful for wrapping +/// an object whose .data() member is missing or unrelated to the buffer +/// value that is destined for a the buffer of an ArrowArray. T must be movable. +template +static inline void BufferInitWrapped(struct ArrowBuffer* buffer, T obj, + const uint8_t* data, int64_t size_bytes) { + T* obj_moved = new T(std::move(obj)); + buffer->data = const_cast(data); + buffer->size_bytes = size_bytes; + buffer->capacity_bytes = 0; + buffer->allocator = + ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); +} + +/// \brief Initialize a buffer wrapping a C++ sequence +/// +/// Specifically, this uses obj.data() to set the buffer address and +/// obj.size() * sizeof(T::value_type) to set the buffer size. This works +/// for STL containers like std::vector, std::array, and std::string. +/// This function moves obj and ensures it is deleted when ArrowBufferReset +/// is called. +template +void BufferInitSequence(struct ArrowBuffer* buffer, T obj) { + // Move before calling .data() (matters sometimes). + T* obj_moved = new T(std::move(obj)); + buffer->data = + const_cast(reinterpret_cast(obj_moved->data())); + buffer->size_bytes = obj_moved->size() * sizeof(typename T::value_type); + buffer->capacity_bytes = 0; + buffer->allocator = + ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); +} + +/// @} + +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_VIEW_HPP_INCLUDED +#define NANOARROW_HPP_VIEW_HPP_INCLUDED + +#include +#include + + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + namespace internal { struct Nothing {}; template class Maybe { public: - Maybe() : nothing_(Nothing()), is_something_(false) {} + Maybe() : is_something_(false) {} Maybe(Nothing) : Maybe() {} Maybe(T something) // NOLINT(google-explicit-constructor) - : something_(something), is_something_(true) {} + : is_something_(true), something_(something) {} explicit constexpr operator bool() const { return is_something_; } const T& operator*() const { return something_; } friend inline bool operator==(Maybe l, Maybe r) { - if (l.is_something_ != r.is_something_) return false; - return l.is_something_ ? l.something_ == r.something_ : true; + if (l.is_something_) { + return r.is_something_ && l.something_ == r.something_; + } else if (r.is_something_) { + return l.is_something_ && l.something_ == r.something_; + } else { + return l.is_something_ == r.is_something_; + } } friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); } @@ -603,16 +746,14 @@ class Maybe { // is_trivially_copyable::value static_assert(std::is_trivially_destructible::value, ""); - union { - Nothing nothing_; - T something_; - }; - bool is_something_; + bool is_something_{}; + T something_{}; }; template struct RandomAccessRange { Get get; + int64_t offset; int64_t size; using value_type = decltype(std::declval()(0)); @@ -626,8 +767,8 @@ struct RandomAccessRange { value_type operator*() const { return range->get(i); } }; - const_iterator begin() const { return {0, this}; } - const_iterator end() const { return {size, this}; } + const_iterator begin() const { return {offset, this}; } + const_iterator end() const { return {offset + size, this}; } }; template @@ -680,10 +821,8 @@ class ViewArrayAs { struct Get { const uint8_t* validity; const void* values; - int64_t offset; internal::Maybe operator()(int64_t i) const { - i += offset; if (validity == nullptr || ArrowBitGet(validity, i)) { if (std::is_same::value) { return ArrowBitGet(static_cast(values), i); @@ -703,8 +842,8 @@ class ViewArrayAs { Get{ array_view->buffer_views[0].data.as_uint8, array_view->buffer_views[1].data.data, - array_view->offset, }, + array_view->offset, array_view->length, } {} @@ -713,8 +852,8 @@ class ViewArrayAs { Get{ static_cast(array->buffers[0]), array->buffers[1], - /*offset=*/0, }, + array->offset, array->length, } {} @@ -741,10 +880,8 @@ class ViewArrayAsBytes { const uint8_t* validity; const void* offsets; const char* data; - int64_t offset; internal::Maybe operator()(int64_t i) const { - i += offset; auto* offsets = static_cast(this->offsets); if (validity == nullptr || ArrowBitGet(validity, i)) { return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]}; @@ -762,8 +899,8 @@ class ViewArrayAsBytes { array_view->buffer_views[0].data.as_uint8, array_view->buffer_views[1].data.data, array_view->buffer_views[2].data.as_char, - array_view->offset, }, + array_view->offset, array_view->length, } {} @@ -773,8 +910,62 @@ class ViewArrayAsBytes { static_cast(array->buffers[0]), array->buffers[1], static_cast(array->buffers[2]), - /*offset=*/0, }, + array->offset, + array->length, + } {} + + using value_type = typename internal::RandomAccessRange::value_type; + using const_iterator = typename internal::RandomAccessRange::const_iterator; + const_iterator begin() const { return range_.begin(); } + const_iterator end() const { return range_.end(); } + value_type operator[](int64_t i) const { return range_.get(i); } +}; + +class ViewBinaryViewArrayAsBytes { + private: + struct Get { + const uint8_t* validity; + const union ArrowBinaryView* inline_data; + const void** variadic_buffers; + + internal::Maybe operator()(int64_t i) const { + if (validity == nullptr || ArrowBitGet(validity, i)) { + const union ArrowBinaryView* bv = &inline_data[i]; + if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { + return ArrowStringView{reinterpret_cast(bv->inlined.data), + bv->inlined.size}; + } + + return ArrowStringView{ + reinterpret_cast(variadic_buffers[bv->ref.buffer_index]) + + bv->ref.offset, + bv->ref.size}; + } + return NA; + } + }; + + internal::RandomAccessRange range_; + + public: + ViewBinaryViewArrayAsBytes(const ArrowArrayView* array_view) + : range_{ + Get{ + array_view->buffer_views[0].data.as_uint8, + array_view->buffer_views[1].data.as_binary_view, + array_view->variadic_buffers, + }, + array_view->offset, + array_view->length, + } {} + + ViewBinaryViewArrayAsBytes(const ArrowArray* array) + : range_{ + Get{static_cast(array->buffers[0]), + static_cast(array->buffers[1]), + array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS}, + array->offset, array->length, } {} @@ -794,11 +985,9 @@ class ViewArrayAsFixedSizeBytes { struct Get { const uint8_t* validity; const char* data; - int64_t offset; int fixed_size; internal::Maybe operator()(int64_t i) const { - i += offset; if (validity == nullptr || ArrowBitGet(validity, i)) { return ArrowStringView{data + i * fixed_size, fixed_size}; } @@ -814,9 +1003,9 @@ class ViewArrayAsFixedSizeBytes { Get{ array_view->buffer_views[0].data.as_uint8, array_view->buffer_views[1].data.as_char, - array_view->offset, fixed_size, }, + array_view->offset, array_view->length, } {} @@ -825,9 +1014,9 @@ class ViewArrayAsFixedSizeBytes { Get{ static_cast(array->buffers[0]), static_cast(array->buffers[1]), - /*offset=*/0, fixed_size, }, + array->offset, array->length, } {} @@ -920,13 +1109,6 @@ class ViewArrayStream { /// @} -} // namespace nanoarrow - -/// \brief Equality comparison operator between ArrowStringView -/// \ingroup nanoarrow_hpp-string_view_helpers -inline bool operator==(ArrowStringView l, ArrowStringView r) { - if (l.size_bytes != r.size_bytes) return false; - return memcmp(l.data, r.data, l.size_bytes) == 0; -} +NANOARROW_CXX_NAMESPACE_END #endif diff --git a/c/vendor/toml++/toml.hpp b/c/vendor/toml++/toml.hpp new file mode 100644 index 0000000000..cc188fb72c --- /dev/null +++ b/c/vendor/toml++/toml.hpp @@ -0,0 +1,17748 @@ +//---------------------------------------------------------------------------------------------------------------------- +// +// toml++ v3.4.0 +// https://github.com/marzer/tomlplusplus +// SPDX-License-Identifier: MIT +// +//---------------------------------------------------------------------------------------------------------------------- +// +// - THIS FILE WAS ASSEMBLED FROM MULTIPLE HEADER FILES BY A SCRIPT - PLEASE DON'T EDIT IT DIRECTLY - +// +// If you wish to submit a contribution to toml++, hooray and thanks! Before you crack on, please be aware that this +// file was assembled from a number of smaller files by a python script, and code contributions should not be made +// against it directly. You should instead make your changes in the relevant source file(s). The file names of the files +// that contributed to this header can be found at the beginnings and ends of the corresponding sections of this file. +// +//---------------------------------------------------------------------------------------------------------------------- +// +// TOML Language Specifications: +// latest: https://github.com/toml-lang/toml/blob/master/README.md +// v1.0.0: https://toml.io/en/v1.0.0 +// v0.5.0: https://toml.io/en/v0.5.0 +// changelog: https://github.com/toml-lang/toml/blob/master/CHANGELOG.md +// +//---------------------------------------------------------------------------------------------------------------------- +// +// MIT License +// +// Copyright (c) Mark Gillard +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +// Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// +//---------------------------------------------------------------------------------------------------------------------- +#ifndef TOMLPLUSPLUS_HPP +#define TOMLPLUSPLUS_HPP + +#define INCLUDE_TOMLPLUSPLUS_H // old guard name used pre-v3 +#define TOMLPLUSPLUS_H // guard name used in the legacy toml.h + +//******** impl/preprocessor.hpp ************************************************************************************* + +#ifndef __cplusplus +#error toml++ is a C++ library. +#endif + +#ifndef TOML_CPP +#ifdef _MSVC_LANG +#if _MSVC_LANG > __cplusplus +#define TOML_CPP _MSVC_LANG +#endif +#endif +#ifndef TOML_CPP +#define TOML_CPP __cplusplus +#endif +#if TOML_CPP >= 202900L +#undef TOML_CPP +#define TOML_CPP 29 +#elif TOML_CPP >= 202600L +#undef TOML_CPP +#define TOML_CPP 26 +#elif TOML_CPP >= 202302L +#undef TOML_CPP +#define TOML_CPP 23 +#elif TOML_CPP >= 202002L +#undef TOML_CPP +#define TOML_CPP 20 +#elif TOML_CPP >= 201703L +#undef TOML_CPP +#define TOML_CPP 17 +#elif TOML_CPP >= 201402L +#undef TOML_CPP +#define TOML_CPP 14 +#elif TOML_CPP >= 201103L +#undef TOML_CPP +#define TOML_CPP 11 +#else +#undef TOML_CPP +#define TOML_CPP 0 +#endif +#endif + +#if !TOML_CPP +#error toml++ requires C++17 or higher. For a pre-C++11 TOML library see https://github.com/ToruNiina/Boost.toml +#elif TOML_CPP < 17 +#error toml++ requires C++17 or higher. For a C++11 TOML library see https://github.com/ToruNiina/toml11 +#endif + +#ifndef TOML_MAKE_VERSION +#define TOML_MAKE_VERSION(major, minor, patch) (((major)*10000) + ((minor)*100) + ((patch))) +#endif + +#ifndef TOML_INTELLISENSE +#ifdef __INTELLISENSE__ +#define TOML_INTELLISENSE 1 +#else +#define TOML_INTELLISENSE 0 +#endif +#endif + +#ifndef TOML_DOXYGEN +#if defined(DOXYGEN) || defined(__DOXYGEN) || defined(__DOXYGEN__) || defined(__doxygen__) || defined(__POXY__) \ + || defined(__poxy__) +#define TOML_DOXYGEN 1 +#else +#define TOML_DOXYGEN 0 +#endif +#endif + +#ifndef TOML_CLANG +#ifdef __clang__ +#define TOML_CLANG __clang_major__ +#else +#define TOML_CLANG 0 +#endif + +// special handling for apple clang; see: +// - https://github.com/marzer/tomlplusplus/issues/189 +// - https://en.wikipedia.org/wiki/Xcode +// - +// https://stackoverflow.com/questions/19387043/how-can-i-reliably-detect-the-version-of-clang-at-preprocessing-time +#if TOML_CLANG && defined(__apple_build_version__) +#undef TOML_CLANG +#define TOML_CLANG_VERSION TOML_MAKE_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#if TOML_CLANG_VERSION >= TOML_MAKE_VERSION(15, 0, 0) +#define TOML_CLANG 16 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(14, 3, 0) +#define TOML_CLANG 15 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(14, 0, 0) +#define TOML_CLANG 14 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(13, 1, 6) +#define TOML_CLANG 13 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(13, 0, 0) +#define TOML_CLANG 12 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(12, 0, 5) +#define TOML_CLANG 11 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(12, 0, 0) +#define TOML_CLANG 10 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(11, 0, 3) +#define TOML_CLANG 9 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(11, 0, 0) +#define TOML_CLANG 8 +#elif TOML_CLANG_VERSION >= TOML_MAKE_VERSION(10, 0, 1) +#define TOML_CLANG 7 +#else +#define TOML_CLANG 6 // not strictly correct but doesn't matter below this +#endif +#undef TOML_CLANG_VERSION +#endif +#endif + +#ifndef TOML_ICC +#ifdef __INTEL_COMPILER +#define TOML_ICC __INTEL_COMPILER +#ifdef __ICL +#define TOML_ICC_CL TOML_ICC +#else +#define TOML_ICC_CL 0 +#endif +#else +#define TOML_ICC 0 +#define TOML_ICC_CL 0 +#endif +#endif + +#ifndef TOML_MSVC_LIKE +#ifdef _MSC_VER +#define TOML_MSVC_LIKE _MSC_VER +#else +#define TOML_MSVC_LIKE 0 +#endif +#endif + +#ifndef TOML_MSVC +#if TOML_MSVC_LIKE && !TOML_CLANG && !TOML_ICC +#define TOML_MSVC TOML_MSVC_LIKE +#else +#define TOML_MSVC 0 +#endif +#endif + +#ifndef TOML_GCC_LIKE +#ifdef __GNUC__ +#define TOML_GCC_LIKE __GNUC__ +#else +#define TOML_GCC_LIKE 0 +#endif +#endif + +#ifndef TOML_GCC +#if TOML_GCC_LIKE && !TOML_CLANG && !TOML_ICC +#define TOML_GCC TOML_GCC_LIKE +#else +#define TOML_GCC 0 +#endif +#endif + +#ifndef TOML_CUDA +#if defined(__CUDACC__) || defined(__CUDA_ARCH__) || defined(__CUDA_LIBDEVICE__) +#define TOML_CUDA 1 +#else +#define TOML_CUDA 0 +#endif +#endif + +#ifndef TOML_ARCH_ITANIUM +#if defined(__ia64__) || defined(__ia64) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) +#define TOML_ARCH_ITANIUM 1 +#define TOML_ARCH_BITNESS 64 +#else +#define TOML_ARCH_ITANIUM 0 +#endif +#endif + +#ifndef TOML_ARCH_AMD64 +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define TOML_ARCH_AMD64 1 +#define TOML_ARCH_BITNESS 64 +#else +#define TOML_ARCH_AMD64 0 +#endif +#endif + +#ifndef TOML_ARCH_X86 +#if defined(__i386__) || defined(_M_IX86) +#define TOML_ARCH_X86 1 +#define TOML_ARCH_BITNESS 32 +#else +#define TOML_ARCH_X86 0 +#endif +#endif + +#ifndef TOML_ARCH_ARM +#if defined(__aarch64__) || defined(__ARM_ARCH_ISA_A64) || defined(_M_ARM64) || defined(__ARM_64BIT_STATE) \ + || defined(_M_ARM64EC) +#define TOML_ARCH_ARM32 0 +#define TOML_ARCH_ARM64 1 +#define TOML_ARCH_ARM 1 +#define TOML_ARCH_BITNESS 64 +#elif defined(__arm__) || defined(_M_ARM) || defined(__ARM_32BIT_STATE) +#define TOML_ARCH_ARM32 1 +#define TOML_ARCH_ARM64 0 +#define TOML_ARCH_ARM 1 +#define TOML_ARCH_BITNESS 32 +#else +#define TOML_ARCH_ARM32 0 +#define TOML_ARCH_ARM64 0 +#define TOML_ARCH_ARM 0 +#endif +#endif + +#ifndef TOML_ARCH_BITNESS +#define TOML_ARCH_BITNESS 0 +#endif + +#ifndef TOML_ARCH_X64 +#if TOML_ARCH_BITNESS == 64 +#define TOML_ARCH_X64 1 +#else +#define TOML_ARCH_X64 0 +#endif +#endif + +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__) || defined(__CYGWIN__) +#define TOML_WINDOWS 1 +#else +#define TOML_WINDOWS 0 +#endif + +#ifdef __unix__ +#define TOML_UNIX 1 +#else +#define TOML_UNIX 0 +#endif + +#ifdef __linux__ +#define TOML_LINUX 1 +#else +#define TOML_LINUX 0 +#endif + +// TOML_HAS_INCLUDE +#ifndef TOML_HAS_INCLUDE +#ifdef __has_include +#define TOML_HAS_INCLUDE(header) __has_include(header) +#else +#define TOML_HAS_INCLUDE(header) 0 +#endif +#endif + +// TOML_HAS_BUILTIN +#ifndef TOML_HAS_BUILTIN +#ifdef __has_builtin +#define TOML_HAS_BUILTIN(name) __has_builtin(name) +#else +#define TOML_HAS_BUILTIN(name) 0 +#endif +#endif + +// TOML_HAS_FEATURE +#ifndef TOML_HAS_FEATURE +#ifdef __has_feature +#define TOML_HAS_FEATURE(name) __has_feature(name) +#else +#define TOML_HAS_FEATURE(name) 0 +#endif +#endif + +// TOML_HAS_ATTR +#ifndef TOML_HAS_ATTR +#ifdef __has_attribute +#define TOML_HAS_ATTR(attr) __has_attribute(attr) +#else +#define TOML_HAS_ATTR(attr) 0 +#endif +#endif + +// TOML_HAS_CPP_ATTR +#ifndef TOML_HAS_CPP_ATTR +#ifdef __has_cpp_attribute +#define TOML_HAS_CPP_ATTR(attr) __has_cpp_attribute(attr) +#else +#define TOML_HAS_CPP_ATTR(attr) 0 +#endif +#endif + +// TOML_ATTR (gnu attributes) +#ifndef TOML_ATTR +#if TOML_CLANG || TOML_GCC_LIKE +#define TOML_ATTR(...) __attribute__((__VA_ARGS__)) +#else +#define TOML_ATTR(...) +#endif +#endif + +// TOML_DECLSPEC (msvc attributes) +#ifndef TOML_DECLSPEC +#if TOML_MSVC_LIKE +#define TOML_DECLSPEC(...) __declspec(__VA_ARGS__) +#else +#define TOML_DECLSPEC(...) +#endif +#endif + +// TOML_COMPILER_HAS_EXCEPTIONS +#ifndef TOML_COMPILER_HAS_EXCEPTIONS +#if defined(__EXCEPTIONS) || defined(_CPPUNWIND) || defined(__cpp_exceptions) +#define TOML_COMPILER_HAS_EXCEPTIONS 1 +#else +#define TOML_COMPILER_HAS_EXCEPTIONS 0 +#endif +#endif + +// TOML_COMPILER_HAS_RTTI +#ifndef TOML_COMPILER_HAS_RTTI +#if defined(_CPPRTTI) || defined(__GXX_RTTI) || TOML_HAS_FEATURE(cxx_rtti) +#define TOML_COMPILER_HAS_RTTI 1 +#else +#define TOML_COMPILER_HAS_RTTI 0 +#endif +#endif + +// TOML_CONCAT +#define TOML_CONCAT_1(x, y) x##y +#define TOML_CONCAT(x, y) TOML_CONCAT_1(x, y) + +// TOML_MAKE_STRING +#define TOML_MAKE_STRING_1(s) #s +#define TOML_MAKE_STRING(s) TOML_MAKE_STRING_1(s) + +// TOML_PRAGMA_XXXX (compiler-specific pragmas) +#if TOML_CLANG +#define TOML_PRAGMA_CLANG(decl) _Pragma(TOML_MAKE_STRING(clang decl)) +#else +#define TOML_PRAGMA_CLANG(decl) +#endif +#if TOML_CLANG >= 8 +#define TOML_PRAGMA_CLANG_GE_8(decl) TOML_PRAGMA_CLANG(decl) +#else +#define TOML_PRAGMA_CLANG_GE_8(decl) +#endif +#if TOML_CLANG >= 9 +#define TOML_PRAGMA_CLANG_GE_9(decl) TOML_PRAGMA_CLANG(decl) +#else +#define TOML_PRAGMA_CLANG_GE_9(decl) +#endif +#if TOML_CLANG >= 10 +#define TOML_PRAGMA_CLANG_GE_10(decl) TOML_PRAGMA_CLANG(decl) +#else +#define TOML_PRAGMA_CLANG_GE_10(decl) +#endif +#if TOML_CLANG >= 11 +#define TOML_PRAGMA_CLANG_GE_11(decl) TOML_PRAGMA_CLANG(decl) +#else +#define TOML_PRAGMA_CLANG_GE_11(decl) +#endif +#if TOML_GCC +#define TOML_PRAGMA_GCC(decl) _Pragma(TOML_MAKE_STRING(GCC decl)) +#else +#define TOML_PRAGMA_GCC(decl) +#endif +#if TOML_MSVC +#define TOML_PRAGMA_MSVC(...) __pragma(__VA_ARGS__) +#else +#define TOML_PRAGMA_MSVC(...) +#endif +#if TOML_ICC +#define TOML_PRAGMA_ICC(...) __pragma(__VA_ARGS__) +#else +#define TOML_PRAGMA_ICC(...) +#endif + +// TOML_ALWAYS_INLINE +#ifdef _MSC_VER +#define TOML_ALWAYS_INLINE __forceinline +#elif TOML_GCC || TOML_CLANG || TOML_HAS_ATTR(__always_inline__) +#define TOML_ALWAYS_INLINE \ + TOML_ATTR(__always_inline__) \ + inline +#else +#define TOML_ALWAYS_INLINE inline +#endif + +// TOML_NEVER_INLINE +#ifdef _MSC_VER +#define TOML_NEVER_INLINE TOML_DECLSPEC(noinline) +#elif TOML_CUDA // https://gitlab.gnome.org/GNOME/glib/-/issues/2555 +#define TOML_NEVER_INLINE TOML_ATTR(noinline) +#else +#if TOML_GCC || TOML_CLANG || TOML_HAS_ATTR(__noinline__) +#define TOML_NEVER_INLINE TOML_ATTR(__noinline__) +#endif +#endif +#ifndef TOML_NEVER_INLINE +#define TOML_NEVER_INLINE +#endif + +// MSVC attributes +#define TOML_ABSTRACT_INTERFACE TOML_DECLSPEC(novtable) +#define TOML_EMPTY_BASES TOML_DECLSPEC(empty_bases) + +// TOML_TRIVIAL_ABI +#if TOML_CLANG || TOML_HAS_ATTR(__trivial_abi__) +#define TOML_TRIVIAL_ABI TOML_ATTR(__trivial_abi__) +#else +#define TOML_TRIVIAL_ABI +#endif + +// TOML_NODISCARD +#if TOML_CPP >= 17 && TOML_HAS_CPP_ATTR(nodiscard) >= 201603 +#define TOML_NODISCARD [[nodiscard]] +#elif TOML_CLANG || TOML_GCC || TOML_HAS_ATTR(__warn_unused_result__) +#define TOML_NODISCARD TOML_ATTR(__warn_unused_result__) +#else +#define TOML_NODISCARD +#endif + +// TOML_NODISCARD_CTOR +#if TOML_CPP >= 17 && TOML_HAS_CPP_ATTR(nodiscard) >= 201907 +#define TOML_NODISCARD_CTOR [[nodiscard]] +#else +#define TOML_NODISCARD_CTOR +#endif + +// pure + const +#ifndef TOML_PURE +#ifdef NDEBUG +#define TOML_PURE \ + TOML_DECLSPEC(noalias) \ + TOML_ATTR(pure) +#else +#define TOML_PURE +#endif +#endif +#ifndef TOML_CONST +#ifdef NDEBUG +#define TOML_CONST \ + TOML_DECLSPEC(noalias) \ + TOML_ATTR(const) +#else +#define TOML_CONST +#endif +#endif +#ifndef TOML_INLINE_GETTER +#define TOML_INLINE_GETTER \ + TOML_NODISCARD \ + TOML_ALWAYS_INLINE +#endif +#ifndef TOML_PURE_GETTER +#define TOML_PURE_GETTER \ + TOML_NODISCARD \ + TOML_PURE +#endif +#ifndef TOML_PURE_INLINE_GETTER +#define TOML_PURE_INLINE_GETTER \ + TOML_NODISCARD \ + TOML_ALWAYS_INLINE \ + TOML_PURE +#endif +#ifndef TOML_CONST_GETTER +#define TOML_CONST_GETTER \ + TOML_NODISCARD \ + TOML_CONST +#endif +#ifndef TOML_CONST_INLINE_GETTER +#define TOML_CONST_INLINE_GETTER \ + TOML_NODISCARD \ + TOML_ALWAYS_INLINE \ + TOML_CONST +#endif + +// TOML_ASSUME +#ifdef _MSC_VER +#define TOML_ASSUME(expr) __assume(expr) +#elif TOML_ICC || TOML_CLANG || TOML_HAS_BUILTIN(__builtin_assume) +#define TOML_ASSUME(expr) __builtin_assume(expr) +#elif TOML_HAS_CPP_ATTR(assume) >= 202207 +#define TOML_ASSUME(expr) [[assume(expr)]] +#elif TOML_HAS_ATTR(__assume__) +#define TOML_ASSUME(expr) __attribute__((__assume__(expr))) +#else +#define TOML_ASSUME(expr) static_cast(0) +#endif + +// TOML_UNREACHABLE +#ifdef _MSC_VER +#define TOML_UNREACHABLE __assume(0) +#elif TOML_ICC || TOML_CLANG || TOML_GCC || TOML_HAS_BUILTIN(__builtin_unreachable) +#define TOML_UNREACHABLE __builtin_unreachable() +#else +#define TOML_UNREACHABLE static_cast(0) +#endif + +// TOML_LIKELY +#if TOML_CPP >= 20 && TOML_HAS_CPP_ATTR(likely) >= 201803 +#define TOML_LIKELY(...) (__VA_ARGS__) [[likely]] +#define TOML_LIKELY_CASE [[likely]] +#elif TOML_GCC || TOML_CLANG || TOML_HAS_BUILTIN(__builtin_expect) +#define TOML_LIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 1)) +#else +#define TOML_LIKELY(...) (__VA_ARGS__) +#endif +#ifndef TOML_LIKELY_CASE +#define TOML_LIKELY_CASE +#endif + +// TOML_UNLIKELY +#if TOML_CPP >= 20 && TOML_HAS_CPP_ATTR(unlikely) >= 201803 +#define TOML_UNLIKELY(...) (__VA_ARGS__) [[unlikely]] +#define TOML_UNLIKELY_CASE [[unlikely]] +#elif TOML_GCC || TOML_CLANG || TOML_HAS_BUILTIN(__builtin_expect) +#define TOML_UNLIKELY(...) (__builtin_expect(!!(__VA_ARGS__), 0)) +#else +#define TOML_UNLIKELY(...) (__VA_ARGS__) +#endif +#ifndef TOML_UNLIKELY_CASE +#define TOML_UNLIKELY_CASE +#endif + +// TOML_FLAGS_ENUM +#if TOML_CLANG || TOML_HAS_ATTR(flag_enum) +#define TOML_FLAGS_ENUM __attribute__((flag_enum)) +#else +#define TOML_FLAGS_ENUM +#endif + +// TOML_OPEN_ENUM + TOML_CLOSED_ENUM +#if TOML_CLANG || TOML_HAS_ATTR(enum_extensibility) +#define TOML_OPEN_ENUM __attribute__((enum_extensibility(open))) +#define TOML_CLOSED_ENUM __attribute__((enum_extensibility(closed))) +#else +#define TOML_OPEN_ENUM +#define TOML_CLOSED_ENUM +#endif + +// TOML_OPEN_FLAGS_ENUM + TOML_CLOSED_FLAGS_ENUM +#define TOML_OPEN_FLAGS_ENUM TOML_OPEN_ENUM TOML_FLAGS_ENUM +#define TOML_CLOSED_FLAGS_ENUM TOML_CLOSED_ENUM TOML_FLAGS_ENUM + +// TOML_MAKE_FLAGS +#define TOML_MAKE_FLAGS_2(T, op, linkage) \ + TOML_CONST_INLINE_GETTER \ + linkage constexpr T operator op(T lhs, T rhs) noexcept \ + { \ + using under = std::underlying_type_t; \ + return static_cast(static_cast(lhs) op static_cast(rhs)); \ + } \ + \ + linkage constexpr T& operator TOML_CONCAT(op, =)(T & lhs, T rhs) noexcept \ + { \ + return lhs = (lhs op rhs); \ + } \ + \ + static_assert(true) +#define TOML_MAKE_FLAGS_1(T, linkage) \ + static_assert(std::is_enum_v); \ + \ + TOML_MAKE_FLAGS_2(T, &, linkage); \ + TOML_MAKE_FLAGS_2(T, |, linkage); \ + TOML_MAKE_FLAGS_2(T, ^, linkage); \ + \ + TOML_CONST_INLINE_GETTER \ + linkage constexpr T operator~(T val) noexcept \ + { \ + using under = std::underlying_type_t; \ + return static_cast(~static_cast(val)); \ + } \ + \ + TOML_CONST_INLINE_GETTER \ + linkage constexpr bool operator!(T val) noexcept \ + { \ + using under = std::underlying_type_t; \ + return !static_cast(val); \ + } \ + \ + static_assert(true) +#define TOML_MAKE_FLAGS(T) TOML_MAKE_FLAGS_1(T, ) + +#define TOML_UNUSED(...) static_cast(__VA_ARGS__) + +#define TOML_DELETE_DEFAULTS(T) \ + T(const T&) = delete; \ + T(T&&) = delete; \ + T& operator=(const T&) = delete; \ + T& operator=(T&&) = delete + +#define TOML_ASYMMETRICAL_EQUALITY_OPS(LHS, RHS, ...) \ + __VA_ARGS__ TOML_NODISCARD \ + friend bool operator==(RHS rhs, LHS lhs) noexcept \ + { \ + return lhs == rhs; \ + } \ + __VA_ARGS__ TOML_NODISCARD \ + friend bool operator!=(LHS lhs, RHS rhs) noexcept \ + { \ + return !(lhs == rhs); \ + } \ + __VA_ARGS__ TOML_NODISCARD \ + friend bool operator!=(RHS rhs, LHS lhs) noexcept \ + { \ + return !(lhs == rhs); \ + } \ + static_assert(true) + +#define TOML_EVAL_BOOL_1(T, F) T +#define TOML_EVAL_BOOL_0(T, F) F + +#if !defined(__POXY__) && !defined(POXY_IMPLEMENTATION_DETAIL) +#define POXY_IMPLEMENTATION_DETAIL(...) __VA_ARGS__ +#endif + +// COMPILER-SPECIFIC WARNING MANAGEMENT + +#if TOML_CLANG + +#define TOML_PUSH_WARNINGS \ + TOML_PRAGMA_CLANG(diagnostic push) \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wunknown-warning-option") \ + static_assert(true) + +#define TOML_DISABLE_SWITCH_WARNINGS \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wswitch") \ + static_assert(true) + +#define TOML_DISABLE_ARITHMETIC_WARNINGS \ + TOML_PRAGMA_CLANG_GE_10(diagnostic ignored "-Wimplicit-int-float-conversion") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wfloat-equal") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wdouble-promotion") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wchar-subscripts") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wshift-sign-overflow") \ + static_assert(true) + +#define TOML_DISABLE_SPAM_WARNINGS \ + TOML_PRAGMA_CLANG_GE_8(diagnostic ignored "-Wdefaulted-function-deleted") \ + TOML_PRAGMA_CLANG_GE_9(diagnostic ignored "-Wctad-maybe-unsupported") \ + TOML_PRAGMA_CLANG_GE_10(diagnostic ignored "-Wzero-as-null-pointer-constant") \ + TOML_PRAGMA_CLANG_GE_11(diagnostic ignored "-Wsuggest-destructor-override") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wweak-vtables") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wweak-template-vtables") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wdouble-promotion") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wchar-subscripts") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wmissing-field-initializers") \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Wpadded") \ + static_assert(true) + +#define TOML_POP_WARNINGS \ + TOML_PRAGMA_CLANG(diagnostic pop) \ + static_assert(true) + +#define TOML_DISABLE_WARNINGS \ + TOML_PRAGMA_CLANG(diagnostic push) \ + TOML_PRAGMA_CLANG(diagnostic ignored "-Weverything") \ + static_assert(true, "") + +#define TOML_ENABLE_WARNINGS \ + TOML_PRAGMA_CLANG(diagnostic pop) \ + static_assert(true) + +#define TOML_SIMPLE_STATIC_ASSERT_MESSAGES 1 + +#elif TOML_MSVC + +#define TOML_PUSH_WARNINGS \ + __pragma(warning(push)) \ + static_assert(true) + +#if TOML_HAS_INCLUDE() +#pragma warning(push, 0) +#include +#pragma warning(pop) +#define TOML_DISABLE_CODE_ANALYSIS_WARNINGS \ + __pragma(warning(disable : ALL_CODE_ANALYSIS_WARNINGS)) \ + static_assert(true) +#else +#define TOML_DISABLE_CODE_ANALYSIS_WARNINGS static_assert(true) +#endif + +#define TOML_DISABLE_SWITCH_WARNINGS \ + __pragma(warning(disable : 4061)) \ + __pragma(warning(disable : 4062)) \ + __pragma(warning(disable : 4063)) \ + __pragma(warning(disable : 5262)) /* switch-case implicit fallthrough (false-positive) */ \ + __pragma(warning(disable : 26819)) /* cg: unannotated fallthrough */ \ + static_assert(true) + +#define TOML_DISABLE_SPAM_WARNINGS \ + __pragma(warning(disable : 4127)) /* conditional expr is constant */ \ + __pragma(warning(disable : 4324)) /* structure was padded due to alignment specifier */ \ + __pragma(warning(disable : 4348)) \ + __pragma(warning(disable : 4464)) /* relative include path contains '..' */ \ + __pragma(warning(disable : 4505)) /* unreferenced local function removed */ \ + __pragma(warning(disable : 4514)) /* unreferenced inline function has been removed */ \ + __pragma(warning(disable : 4582)) /* constructor is not implicitly called */ \ + __pragma(warning(disable : 4619)) /* there is no warning number 'XXXX' */ \ + __pragma(warning(disable : 4623)) /* default constructor was implicitly defined as deleted */ \ + __pragma(warning(disable : 4625)) /* copy constructor was implicitly defined as deleted */ \ + __pragma(warning(disable : 4626)) /* assignment operator was implicitly defined as deleted */ \ + __pragma(warning(disable : 4710)) /* function not inlined */ \ + __pragma(warning(disable : 4711)) /* function selected for automatic expansion */ \ + __pragma(warning(disable : 4820)) /* N bytes padding added */ \ + __pragma(warning(disable : 4946)) /* reinterpret_cast used between related classes */ \ + __pragma(warning(disable : 5026)) /* move constructor was implicitly defined as deleted */ \ + __pragma(warning(disable : 5027)) /* move assignment operator was implicitly defined as deleted */ \ + __pragma(warning(disable : 5039)) /* potentially throwing function passed to 'extern "C"' function */ \ + __pragma(warning(disable : 5045)) /* Compiler will insert Spectre mitigation */ \ + __pragma(warning(disable : 5264)) /* const variable is not used (false-positive) */ \ + __pragma(warning(disable : 26451)) \ + __pragma(warning(disable : 26490)) \ + __pragma(warning(disable : 26495)) \ + __pragma(warning(disable : 26812)) \ + __pragma(warning(disable : 26819)) \ + static_assert(true) + +#define TOML_DISABLE_ARITHMETIC_WARNINGS \ + __pragma(warning(disable : 4365)) /* argument signed/unsigned mismatch */ \ + __pragma(warning(disable : 4738)) /* storing 32-bit float result in memory */ \ + __pragma(warning(disable : 5219)) /* implicit conversion from integral to float */ \ + static_assert(true) + +#define TOML_POP_WARNINGS \ + __pragma(warning(pop)) \ + static_assert(true) + +#define TOML_DISABLE_WARNINGS \ + __pragma(warning(push, 0)) \ + __pragma(warning(disable : 4348)) \ + __pragma(warning(disable : 4668)) \ + __pragma(warning(disable : 5105)) \ + __pragma(warning(disable : 5264)) \ + TOML_DISABLE_CODE_ANALYSIS_WARNINGS; \ + TOML_DISABLE_SWITCH_WARNINGS; \ + TOML_DISABLE_SPAM_WARNINGS; \ + TOML_DISABLE_ARITHMETIC_WARNINGS; \ + static_assert(true) + +#define TOML_ENABLE_WARNINGS TOML_POP_WARNINGS + +#elif TOML_ICC + +#define TOML_PUSH_WARNINGS \ + __pragma(warning(push)) \ + static_assert(true) + +#define TOML_DISABLE_SPAM_WARNINGS \ + __pragma(warning(disable : 82)) /* storage class is not first */ \ + __pragma(warning(disable : 111)) /* statement unreachable (false-positive) */ \ + __pragma(warning(disable : 869)) /* unreferenced parameter */ \ + __pragma(warning(disable : 1011)) /* missing return (false-positive) */ \ + __pragma(warning(disable : 2261)) /* assume expr side-effects discarded */ \ + static_assert(true) + +#define TOML_POP_WARNINGS \ + __pragma(warning(pop)) \ + static_assert(true) + +#define TOML_DISABLE_WARNINGS \ + __pragma(warning(push, 0)) \ + TOML_DISABLE_SPAM_WARNINGS + +#define TOML_ENABLE_WARNINGS \ + __pragma(warning(pop)) \ + static_assert(true) + +#elif TOML_GCC + +#define TOML_PUSH_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic push) \ + static_assert(true) + +#define TOML_DISABLE_SWITCH_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wswitch") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wswitch-enum") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wswitch-default") \ + static_assert(true) + +#define TOML_DISABLE_ARITHMETIC_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wfloat-equal") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wsign-conversion") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wchar-subscripts") \ + static_assert(true) + +#define TOML_DISABLE_SUGGEST_ATTR_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wsuggest-attribute=const") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wsuggest-attribute=pure") \ + static_assert(true) + +#define TOML_DISABLE_SPAM_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wpadded") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wcast-align") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wcomment") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wtype-limits") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wuseless-cast") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wchar-subscripts") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wsubobject-linkage") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wmissing-field-initializers") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wmaybe-uninitialized") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wnoexcept") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wnull-dereference") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wduplicated-branches") \ + static_assert(true) + +#define TOML_POP_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic pop) \ + static_assert(true) + +#define TOML_DISABLE_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic push) \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wall") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wextra") \ + TOML_PRAGMA_GCC(diagnostic ignored "-Wpedantic") \ + TOML_DISABLE_SWITCH_WARNINGS; \ + TOML_DISABLE_ARITHMETIC_WARNINGS; \ + TOML_DISABLE_SUGGEST_ATTR_WARNINGS; \ + TOML_DISABLE_SPAM_WARNINGS; \ + static_assert(true) + +#define TOML_ENABLE_WARNINGS \ + TOML_PRAGMA_GCC(diagnostic pop) \ + static_assert(true) + +#endif + +#ifndef TOML_PUSH_WARNINGS +#define TOML_PUSH_WARNINGS static_assert(true) +#endif +#ifndef TOML_DISABLE_CODE_ANALYSIS_WARNINGS +#define TOML_DISABLE_CODE_ANALYSIS_WARNINGS static_assert(true) +#endif +#ifndef TOML_DISABLE_SWITCH_WARNINGS +#define TOML_DISABLE_SWITCH_WARNINGS static_assert(true) +#endif +#ifndef TOML_DISABLE_SUGGEST_ATTR_WARNINGS +#define TOML_DISABLE_SUGGEST_ATTR_WARNINGS static_assert(true) +#endif +#ifndef TOML_DISABLE_SPAM_WARNINGS +#define TOML_DISABLE_SPAM_WARNINGS static_assert(true) +#endif +#ifndef TOML_DISABLE_ARITHMETIC_WARNINGS +#define TOML_DISABLE_ARITHMETIC_WARNINGS static_assert(true) +#endif +#ifndef TOML_POP_WARNINGS +#define TOML_POP_WARNINGS static_assert(true) +#endif +#ifndef TOML_DISABLE_WARNINGS +#define TOML_DISABLE_WARNINGS static_assert(true) +#endif +#ifndef TOML_ENABLE_WARNINGS +#define TOML_ENABLE_WARNINGS static_assert(true) +#endif +#ifndef TOML_SIMPLE_STATIC_ASSERT_MESSAGES +#define TOML_SIMPLE_STATIC_ASSERT_MESSAGES 0 +#endif + +#ifdef TOML_CONFIG_HEADER +#include TOML_CONFIG_HEADER +#endif + +// is the library being built as a shared lib/dll using meson and friends? +#ifndef TOML_SHARED_LIB +#define TOML_SHARED_LIB 0 +#endif + +// header-only mode +#if !defined(TOML_HEADER_ONLY) && defined(TOML_ALL_INLINE) // was TOML_ALL_INLINE pre-2.0 +#define TOML_HEADER_ONLY TOML_ALL_INLINE +#endif +#if !defined(TOML_HEADER_ONLY) || (defined(TOML_HEADER_ONLY) && TOML_HEADER_ONLY) || TOML_INTELLISENSE +#undef TOML_HEADER_ONLY +#define TOML_HEADER_ONLY 1 +#endif +#if TOML_DOXYGEN || TOML_SHARED_LIB +#undef TOML_HEADER_ONLY +#define TOML_HEADER_ONLY 0 +#endif + +// internal implementation switch +#if defined(TOML_IMPLEMENTATION) || TOML_HEADER_ONLY +#undef TOML_IMPLEMENTATION +#define TOML_IMPLEMENTATION 1 +#else +#define TOML_IMPLEMENTATION 0 +#endif + +// dll/shared lib function exports (legacy - TOML_API was the old name for this setting) +#if !defined(TOML_EXPORTED_MEMBER_FUNCTION) && !defined(TOML_EXPORTED_STATIC_FUNCTION) \ + && !defined(TOML_EXPORTED_FREE_FUNCTION) && !defined(TOML_EXPORTED_CLASS) && defined(TOML_API) +#define TOML_EXPORTED_MEMBER_FUNCTION TOML_API +#define TOML_EXPORTED_STATIC_FUNCTION TOML_API +#define TOML_EXPORTED_FREE_FUNCTION TOML_API +#endif + +// dll/shared lib exports +#if TOML_SHARED_LIB +#undef TOML_API +#undef TOML_EXPORTED_CLASS +#undef TOML_EXPORTED_MEMBER_FUNCTION +#undef TOML_EXPORTED_STATIC_FUNCTION +#undef TOML_EXPORTED_FREE_FUNCTION +#if TOML_WINDOWS +#if TOML_IMPLEMENTATION +#define TOML_EXPORTED_CLASS __declspec(dllexport) +#define TOML_EXPORTED_FREE_FUNCTION __declspec(dllexport) +#else +#define TOML_EXPORTED_CLASS __declspec(dllimport) +#define TOML_EXPORTED_FREE_FUNCTION __declspec(dllimport) +#endif +#ifndef TOML_CALLCONV +#define TOML_CALLCONV __cdecl +#endif +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define TOML_EXPORTED_CLASS __attribute__((visibility("default"))) +#define TOML_EXPORTED_MEMBER_FUNCTION __attribute__((visibility("default"))) +#define TOML_EXPORTED_STATIC_FUNCTION __attribute__((visibility("default"))) +#define TOML_EXPORTED_FREE_FUNCTION __attribute__((visibility("default"))) +#endif +#endif +#ifndef TOML_EXPORTED_CLASS +#define TOML_EXPORTED_CLASS +#endif +#ifndef TOML_EXPORTED_MEMBER_FUNCTION +#define TOML_EXPORTED_MEMBER_FUNCTION +#endif +#ifndef TOML_EXPORTED_STATIC_FUNCTION +#define TOML_EXPORTED_STATIC_FUNCTION +#endif +#ifndef TOML_EXPORTED_FREE_FUNCTION +#define TOML_EXPORTED_FREE_FUNCTION +#endif + +// experimental language features +#if !defined(TOML_ENABLE_UNRELEASED_FEATURES) && defined(TOML_UNRELEASED_FEATURES) // was TOML_UNRELEASED_FEATURES + // pre-3.0 +#define TOML_ENABLE_UNRELEASED_FEATURES TOML_UNRELEASED_FEATURES +#endif +#if (defined(TOML_ENABLE_UNRELEASED_FEATURES) && TOML_ENABLE_UNRELEASED_FEATURES) || TOML_INTELLISENSE +#undef TOML_ENABLE_UNRELEASED_FEATURES +#define TOML_ENABLE_UNRELEASED_FEATURES 1 +#endif +#ifndef TOML_ENABLE_UNRELEASED_FEATURES +#define TOML_ENABLE_UNRELEASED_FEATURES 0 +#endif + +// parser +#if !defined(TOML_ENABLE_PARSER) && defined(TOML_PARSER) // was TOML_PARSER pre-3.0 +#define TOML_ENABLE_PARSER TOML_PARSER +#endif +#if !defined(TOML_ENABLE_PARSER) || (defined(TOML_ENABLE_PARSER) && TOML_ENABLE_PARSER) || TOML_INTELLISENSE +#undef TOML_ENABLE_PARSER +#define TOML_ENABLE_PARSER 1 +#endif + +// formatters +#if !defined(TOML_ENABLE_FORMATTERS) || (defined(TOML_ENABLE_FORMATTERS) && TOML_ENABLE_FORMATTERS) || TOML_INTELLISENSE +#undef TOML_ENABLE_FORMATTERS +#define TOML_ENABLE_FORMATTERS 1 +#endif + +// SIMD +#if !defined(TOML_ENABLE_SIMD) || (defined(TOML_ENABLE_SIMD) && TOML_ENABLE_SIMD) || TOML_INTELLISENSE +#undef TOML_ENABLE_SIMD +#define TOML_ENABLE_SIMD 1 +#endif + +// windows compat +#if !defined(TOML_ENABLE_WINDOWS_COMPAT) && defined(TOML_WINDOWS_COMPAT) // was TOML_WINDOWS_COMPAT pre-3.0 +#define TOML_ENABLE_WINDOWS_COMPAT TOML_WINDOWS_COMPAT +#endif +#if !defined(TOML_ENABLE_WINDOWS_COMPAT) || (defined(TOML_ENABLE_WINDOWS_COMPAT) && TOML_ENABLE_WINDOWS_COMPAT) \ + || TOML_INTELLISENSE +#undef TOML_ENABLE_WINDOWS_COMPAT +#define TOML_ENABLE_WINDOWS_COMPAT 1 +#endif + +#if !TOML_WINDOWS +#undef TOML_ENABLE_WINDOWS_COMPAT +#define TOML_ENABLE_WINDOWS_COMPAT 0 +#endif + +#ifndef TOML_INCLUDE_WINDOWS_H +#define TOML_INCLUDE_WINDOWS_H 0 +#endif + +// custom optional +#ifdef TOML_OPTIONAL_TYPE +#define TOML_HAS_CUSTOM_OPTIONAL_TYPE 1 +#else +#define TOML_HAS_CUSTOM_OPTIONAL_TYPE 0 +#endif + +// exceptions (library use) +#if TOML_COMPILER_HAS_EXCEPTIONS +#if !defined(TOML_EXCEPTIONS) || (defined(TOML_EXCEPTIONS) && TOML_EXCEPTIONS) +#undef TOML_EXCEPTIONS +#define TOML_EXCEPTIONS 1 +#endif +#else +#if defined(TOML_EXCEPTIONS) && TOML_EXCEPTIONS +#error TOML_EXCEPTIONS was explicitly enabled but exceptions are disabled/unsupported by the compiler. +#endif +#undef TOML_EXCEPTIONS +#define TOML_EXCEPTIONS 0 +#endif + +// calling convention for static/free/friend functions +#ifndef TOML_CALLCONV +#define TOML_CALLCONV +#endif + +#ifndef TOML_UNDEF_MACROS +#define TOML_UNDEF_MACROS 1 +#endif + +#ifndef TOML_MAX_NESTED_VALUES +#define TOML_MAX_NESTED_VALUES 256 +// this refers to the depth of nested values, e.g. inline tables and arrays. +// 256 is crazy high! if you're hitting this limit with real input, TOML is probably the wrong tool for the job... +#endif + +#ifdef TOML_CHAR_8_STRINGS +#if TOML_CHAR_8_STRINGS +#error TOML_CHAR_8_STRINGS was removed in toml++ 2.0.0; all value setters and getters now work with char8_t strings implicitly. +#endif +#endif + +#ifdef TOML_LARGE_FILES +#if !TOML_LARGE_FILES +#error Support for !TOML_LARGE_FILES (i.e. 'small files') was removed in toml++ 3.0.0. +#endif +#endif + +#ifndef TOML_LIFETIME_HOOKS +#define TOML_LIFETIME_HOOKS 0 +#endif + +#ifdef NDEBUG +#undef TOML_ASSERT +#define TOML_ASSERT(expr) static_assert(true) +#endif +#ifndef TOML_ASSERT +#ifndef assert +TOML_DISABLE_WARNINGS; +#include +TOML_ENABLE_WARNINGS; +#endif +#define TOML_ASSERT(expr) assert(expr) +#endif +#ifdef NDEBUG +#define TOML_ASSERT_ASSUME(expr) TOML_ASSUME(expr) +#else +#define TOML_ASSERT_ASSUME(expr) TOML_ASSERT(expr) +#endif + +#ifndef TOML_ENABLE_FLOAT16 +#define TOML_ENABLE_FLOAT16 0 +#endif + +#if !defined(TOML_FLOAT_CHARCONV) && (TOML_GCC || TOML_CLANG || (TOML_ICC && !TOML_ICC_CL)) +// not supported by any version of GCC or Clang as of 26/11/2020 +// not supported by any version of ICC on Linux as of 11/01/2021 +#define TOML_FLOAT_CHARCONV 0 +#endif +#if !defined(TOML_INT_CHARCONV) && (defined(__EMSCRIPTEN__) || defined(__APPLE__)) +// causes link errors on emscripten +// causes Mac OS SDK version errors on some versions of Apple Clang +#define TOML_INT_CHARCONV 0 +#endif +#ifndef TOML_INT_CHARCONV +#define TOML_INT_CHARCONV 1 +#endif +#ifndef TOML_FLOAT_CHARCONV +#define TOML_FLOAT_CHARCONV 1 +#endif +#if (TOML_INT_CHARCONV || TOML_FLOAT_CHARCONV) && !TOML_HAS_INCLUDE() +#undef TOML_INT_CHARCONV +#undef TOML_FLOAT_CHARCONV +#define TOML_INT_CHARCONV 0 +#define TOML_FLOAT_CHARCONV 0 +#endif + +#if defined(__cpp_concepts) && __cpp_concepts >= 201907 +#define TOML_REQUIRES(...) requires(__VA_ARGS__) +#else +#define TOML_REQUIRES(...) +#endif +#define TOML_ENABLE_IF(...) , typename std::enable_if<(__VA_ARGS__), int>::type = 0 +#define TOML_CONSTRAINED_TEMPLATE(condition, ...) \ + template <__VA_ARGS__ TOML_ENABLE_IF(condition)> \ + TOML_REQUIRES(condition) +#define TOML_HIDDEN_CONSTRAINT(condition, ...) TOML_CONSTRAINED_TEMPLATE(condition, __VA_ARGS__) + +#if defined(__SIZEOF_FLOAT128__) && defined(__FLT128_MANT_DIG__) && defined(__LDBL_MANT_DIG__) \ + && __FLT128_MANT_DIG__ > __LDBL_MANT_DIG__ +#define TOML_FLOAT128 __float128 +#endif + +#ifdef __SIZEOF_INT128__ +#define TOML_INT128 __int128_t +#define TOML_UINT128 __uint128_t +#endif + +// clang-format off + +//******** impl/version.hpp ****************************************************************************************** + +#define TOML_LIB_MAJOR 3 +#define TOML_LIB_MINOR 4 +#define TOML_LIB_PATCH 0 + +#define TOML_LANG_MAJOR 1 +#define TOML_LANG_MINOR 0 +#define TOML_LANG_PATCH 0 + +//******** impl/preprocessor.hpp ************************************************************************************* + +#define TOML_LIB_SINGLE_HEADER 1 + +#if TOML_ENABLE_UNRELEASED_FEATURES + #define TOML_LANG_EFFECTIVE_VERSION \ + TOML_MAKE_VERSION(TOML_LANG_MAJOR, TOML_LANG_MINOR, TOML_LANG_PATCH+1) +#else + #define TOML_LANG_EFFECTIVE_VERSION \ + TOML_MAKE_VERSION(TOML_LANG_MAJOR, TOML_LANG_MINOR, TOML_LANG_PATCH) +#endif + +#define TOML_LANG_HIGHER_THAN(major, minor, patch) \ + (TOML_LANG_EFFECTIVE_VERSION > TOML_MAKE_VERSION(major, minor, patch)) + +#define TOML_LANG_AT_LEAST(major, minor, patch) \ + (TOML_LANG_EFFECTIVE_VERSION >= TOML_MAKE_VERSION(major, minor, patch)) + +#define TOML_LANG_UNRELEASED \ + TOML_LANG_HIGHER_THAN(TOML_LANG_MAJOR, TOML_LANG_MINOR, TOML_LANG_PATCH) + +#ifndef TOML_ABI_NAMESPACES + #if TOML_DOXYGEN + #define TOML_ABI_NAMESPACES 0 + #else + #define TOML_ABI_NAMESPACES 1 + #endif +#endif +#if TOML_ABI_NAMESPACES + #define TOML_NAMESPACE_START namespace toml { inline namespace TOML_CONCAT(v, TOML_LIB_MAJOR) + #define TOML_NAMESPACE_END } static_assert(true) + #define TOML_NAMESPACE ::toml::TOML_CONCAT(v, TOML_LIB_MAJOR) + #define TOML_ABI_NAMESPACE_START(name) inline namespace name { static_assert(true) + #define TOML_ABI_NAMESPACE_BOOL(cond, T, F) TOML_ABI_NAMESPACE_START(TOML_CONCAT(TOML_EVAL_BOOL_, cond)(T, F)) + #define TOML_ABI_NAMESPACE_END } static_assert(true) +#else + #define TOML_NAMESPACE_START namespace toml + #define TOML_NAMESPACE_END static_assert(true) + #define TOML_NAMESPACE toml + #define TOML_ABI_NAMESPACE_START(...) static_assert(true) + #define TOML_ABI_NAMESPACE_BOOL(...) static_assert(true) + #define TOML_ABI_NAMESPACE_END static_assert(true) +#endif +#define TOML_IMPL_NAMESPACE_START TOML_NAMESPACE_START { namespace impl +#define TOML_IMPL_NAMESPACE_END } TOML_NAMESPACE_END +#if TOML_HEADER_ONLY + #define TOML_ANON_NAMESPACE_START static_assert(TOML_IMPLEMENTATION); TOML_IMPL_NAMESPACE_START + #define TOML_ANON_NAMESPACE_END TOML_IMPL_NAMESPACE_END + #define TOML_ANON_NAMESPACE TOML_NAMESPACE::impl + #define TOML_EXTERNAL_LINKAGE inline + #define TOML_INTERNAL_LINKAGE inline +#else + #define TOML_ANON_NAMESPACE_START static_assert(TOML_IMPLEMENTATION); \ + using namespace toml; \ + namespace + #define TOML_ANON_NAMESPACE_END static_assert(true) + #define TOML_ANON_NAMESPACE + #define TOML_EXTERNAL_LINKAGE + #define TOML_INTERNAL_LINKAGE static +#endif + +// clang-format on + +// clang-format off + +#if TOML_SIMPLE_STATIC_ASSERT_MESSAGES + + #define TOML_SA_NEWLINE " " + #define TOML_SA_LIST_SEP ", " + #define TOML_SA_LIST_BEG " (" + #define TOML_SA_LIST_END ")" + #define TOML_SA_LIST_NEW " " + #define TOML_SA_LIST_NXT ", " + +#else + + #define TOML_SA_NEWLINE "\n| " + #define TOML_SA_LIST_SEP TOML_SA_NEWLINE " - " + #define TOML_SA_LIST_BEG TOML_SA_LIST_SEP + #define TOML_SA_LIST_END + #define TOML_SA_LIST_NEW TOML_SA_NEWLINE TOML_SA_NEWLINE + #define TOML_SA_LIST_NXT TOML_SA_LIST_NEW + +#endif + +#define TOML_SA_NATIVE_VALUE_TYPE_LIST \ + TOML_SA_LIST_BEG "std::string" \ + TOML_SA_LIST_SEP "int64_t" \ + TOML_SA_LIST_SEP "double" \ + TOML_SA_LIST_SEP "bool" \ + TOML_SA_LIST_SEP "toml::date" \ + TOML_SA_LIST_SEP "toml::time" \ + TOML_SA_LIST_SEP "toml::date_time" \ + TOML_SA_LIST_END + +#define TOML_SA_NODE_TYPE_LIST \ + TOML_SA_LIST_BEG "toml::table" \ + TOML_SA_LIST_SEP "toml::array" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_SEP "toml::value" \ + TOML_SA_LIST_END + +#define TOML_SA_UNWRAPPED_NODE_TYPE_LIST \ + TOML_SA_LIST_NEW "A native TOML value type" \ + TOML_SA_NATIVE_VALUE_TYPE_LIST \ + \ + TOML_SA_LIST_NXT "A TOML node type" \ + TOML_SA_NODE_TYPE_LIST + +// clang-format on + +TOML_PUSH_WARNINGS; +TOML_DISABLE_SPAM_WARNINGS; +TOML_DISABLE_SWITCH_WARNINGS; +TOML_DISABLE_SUGGEST_ATTR_WARNINGS; + +// misc warning false-positives +#if TOML_MSVC +#pragma warning(disable : 5031) // #pragma warning(pop): likely mismatch +#if TOML_SHARED_LIB +#pragma warning(disable : 4251) // dll exports for std lib types +#endif +#elif TOML_CLANG +TOML_PRAGMA_CLANG(diagnostic ignored "-Wheader-hygiene") +#if TOML_CLANG >= 12 +TOML_PRAGMA_CLANG(diagnostic ignored "-Wc++20-extensions") +#endif +#if TOML_CLANG == 13 +TOML_PRAGMA_CLANG(diagnostic ignored "-Wreserved-identifier") +#endif +#endif + +//******** impl/std_new.hpp ****************************************************************************************** + +TOML_DISABLE_WARNINGS; +#include +TOML_ENABLE_WARNINGS; + +#if (!defined(__apple_build_version__) && TOML_CLANG >= 8) || TOML_GCC >= 7 || TOML_ICC >= 1910 || TOML_MSVC >= 1914 +#define TOML_LAUNDER(x) __builtin_launder(x) +#elif defined(__cpp_lib_launder) && __cpp_lib_launder >= 201606 +#define TOML_LAUNDER(x) std::launder(x) +#else +#define TOML_LAUNDER(x) x +#endif + +//******** impl/std_string.hpp *************************************************************************************** + +TOML_DISABLE_WARNINGS; +#include +#include +TOML_ENABLE_WARNINGS; + +#if TOML_DOXYGEN \ + || (defined(__cpp_char8_t) && __cpp_char8_t >= 201811 && defined(__cpp_lib_char8_t) \ + && __cpp_lib_char8_t >= 201907) +#define TOML_HAS_CHAR8 1 +#else +#define TOML_HAS_CHAR8 0 +#endif + +namespace toml // non-abi namespace; this is not an error +{ + using namespace std::string_literals; + using namespace std::string_view_literals; +} + +#if TOML_ENABLE_WINDOWS_COMPAT + +TOML_IMPL_NAMESPACE_START +{ + TOML_NODISCARD + TOML_EXPORTED_FREE_FUNCTION + std::string narrow(std::wstring_view); + + TOML_NODISCARD + TOML_EXPORTED_FREE_FUNCTION + std::wstring widen(std::string_view); + +#if TOML_HAS_CHAR8 + + TOML_NODISCARD + TOML_EXPORTED_FREE_FUNCTION + std::wstring widen(std::u8string_view); + +#endif +} +TOML_IMPL_NAMESPACE_END; + +#endif // TOML_ENABLE_WINDOWS_COMPAT + +//******** impl/std_optional.hpp ************************************************************************************* + +TOML_DISABLE_WARNINGS; +#if !TOML_HAS_CUSTOM_OPTIONAL_TYPE +#include +#endif +TOML_ENABLE_WARNINGS; + +TOML_NAMESPACE_START +{ +#if TOML_HAS_CUSTOM_OPTIONAL_TYPE + + template + using optional = TOML_OPTIONAL_TYPE; + +#else + + template + using optional = std::optional; + +#endif +} +TOML_NAMESPACE_END; + +//******** impl/forward_declarations.hpp ***************************************************************************** + +TOML_DISABLE_WARNINGS; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +TOML_ENABLE_WARNINGS; +TOML_PUSH_WARNINGS; +#ifdef _MSC_VER +#ifndef __clang__ +#pragma inline_recursion(on) +#endif +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif + +#ifndef TOML_DISABLE_ENVIRONMENT_CHECKS +#define TOML_ENV_MESSAGE \ + "If you're seeing this error it's because you're building toml++ for an environment that doesn't conform to " \ + "one of the 'ground truths' assumed by the library. Essentially this just means that I don't have the " \ + "resources to test on more platforms, but I wish I did! You can try disabling the checks by defining " \ + "TOML_DISABLE_ENVIRONMENT_CHECKS, but your mileage may vary. Please consider filing an issue at " \ + "https://github.com/marzer/tomlplusplus/issues to help me improve support for your target environment. " \ + "Thanks!" + +static_assert(CHAR_BIT == 8, TOML_ENV_MESSAGE); +#ifdef FLT_RADIX +static_assert(FLT_RADIX == 2, TOML_ENV_MESSAGE); +#endif +static_assert('A' == 65, TOML_ENV_MESSAGE); +static_assert(sizeof(double) == 8, TOML_ENV_MESSAGE); +static_assert(std::numeric_limits::is_iec559, TOML_ENV_MESSAGE); +static_assert(std::numeric_limits::digits == 53, TOML_ENV_MESSAGE); +static_assert(std::numeric_limits::digits10 == 15, TOML_ENV_MESSAGE); +static_assert(std::numeric_limits::radix == 2, TOML_ENV_MESSAGE); + +#undef TOML_ENV_MESSAGE +#endif // !TOML_DISABLE_ENVIRONMENT_CHECKS + +// undocumented forward declarations are hidden from doxygen because they fuck it up =/ + +namespace toml // non-abi namespace; this is not an error +{ + using ::std::size_t; + using ::std::intptr_t; + using ::std::uintptr_t; + using ::std::ptrdiff_t; + using ::std::nullptr_t; + using ::std::int8_t; + using ::std::int16_t; + using ::std::int32_t; + using ::std::int64_t; + using ::std::uint8_t; + using ::std::uint16_t; + using ::std::uint32_t; + using ::std::uint64_t; + using ::std::uint_least32_t; + using ::std::uint_least64_t; +} + +TOML_NAMESPACE_START +{ + struct date; + struct time; + struct time_offset; + + TOML_ABI_NAMESPACE_BOOL(TOML_HAS_CUSTOM_OPTIONAL_TYPE, custopt, stdopt); + struct date_time; + TOML_ABI_NAMESPACE_END; + + struct source_position; + struct source_region; + + class node; + template + class node_view; + + class key; + class array; + class table; + template + class value; + + class path; + + class toml_formatter; + class json_formatter; + class yaml_formatter; + + TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex); +#if TOML_EXCEPTIONS + using parse_result = table; +#else + class parse_result; +#endif + TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS +} +TOML_NAMESPACE_END; + +TOML_IMPL_NAMESPACE_START +{ + using node_ptr = std::unique_ptr; + + TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, impl_ex, impl_noex); + class parser; + TOML_ABI_NAMESPACE_END; // TOML_EXCEPTIONS + + // clang-format off + + inline constexpr std::string_view control_char_escapes[] = + { + "\\u0000"sv, + "\\u0001"sv, + "\\u0002"sv, + "\\u0003"sv, + "\\u0004"sv, + "\\u0005"sv, + "\\u0006"sv, + "\\u0007"sv, + "\\b"sv, + "\\t"sv, + "\\n"sv, + "\\u000B"sv, + "\\f"sv, + "\\r"sv, + "\\u000E"sv, + "\\u000F"sv, + "\\u0010"sv, + "\\u0011"sv, + "\\u0012"sv, + "\\u0013"sv, + "\\u0014"sv, + "\\u0015"sv, + "\\u0016"sv, + "\\u0017"sv, + "\\u0018"sv, + "\\u0019"sv, + "\\u001A"sv, + "\\u001B"sv, + "\\u001C"sv, + "\\u001D"sv, + "\\u001E"sv, + "\\u001F"sv, + }; + + inline constexpr std::string_view node_type_friendly_names[] = + { + "none"sv, + "table"sv, + "array"sv, + "string"sv, + "integer"sv, + "floating-point"sv, + "boolean"sv, + "date"sv, + "time"sv, + "date-time"sv + }; + + // clang-format on +} +TOML_IMPL_NAMESPACE_END; + +#if TOML_ABI_NAMESPACES +#if TOML_EXCEPTIONS +#define TOML_PARSER_TYPENAME TOML_NAMESPACE::impl::impl_ex::parser +#else +#define TOML_PARSER_TYPENAME TOML_NAMESPACE::impl::impl_noex::parser +#endif +#else +#define TOML_PARSER_TYPENAME TOML_NAMESPACE::impl::parser +#endif + +namespace toml +{ +} + +TOML_NAMESPACE_START // abi namespace +{ + inline namespace literals + { + } + + enum class TOML_CLOSED_ENUM node_type : uint8_t + { + none, + table, + array, + string, + integer, + floating_point, + boolean, + date, + time, + date_time + }; + + template + inline std::basic_ostream& operator<<(std::basic_ostream& lhs, node_type rhs) + { + const auto str = impl::node_type_friendly_names[static_cast>(rhs)]; + using str_char_t = decltype(str)::value_type; + if constexpr (std::is_same_v) + return lhs << str; + else + { + if constexpr (sizeof(Char) == sizeof(str_char_t)) + return lhs << std::basic_string_view{ reinterpret_cast(str.data()), str.length() }; + else + return lhs << str.data(); + } + } + + enum class TOML_OPEN_FLAGS_ENUM value_flags : uint16_t // being an "OPEN" flags enum is not an error + { + none, + format_as_binary = 1, + format_as_octal = 2, + format_as_hexadecimal = 3, + }; + TOML_MAKE_FLAGS(value_flags); + + inline constexpr value_flags preserve_source_value_flags = + POXY_IMPLEMENTATION_DETAIL(value_flags{ static_cast>(-1) }); + + enum class TOML_CLOSED_FLAGS_ENUM format_flags : uint64_t + { + none, + quote_dates_and_times = (1ull << 0), + quote_infinities_and_nans = (1ull << 1), + allow_literal_strings = (1ull << 2), + allow_multi_line_strings = (1ull << 3), + allow_real_tabs_in_strings = (1ull << 4), + allow_unicode_strings = (1ull << 5), + allow_binary_integers = (1ull << 6), + allow_octal_integers = (1ull << 7), + allow_hexadecimal_integers = (1ull << 8), + indent_sub_tables = (1ull << 9), + indent_array_elements = (1ull << 10), + indentation = indent_sub_tables | indent_array_elements, + relaxed_float_precision = (1ull << 11), + terse_key_value_pairs = (1ull << 12), + }; + TOML_MAKE_FLAGS(format_flags); + + template + struct TOML_TRIVIAL_ABI inserter + { + static_assert(std::is_reference_v); + + T value; + }; + template + inserter(T&&) -> inserter; + template + inserter(T&) -> inserter; + + using default_formatter = toml_formatter; +} +TOML_NAMESPACE_END; + +TOML_IMPL_NAMESPACE_START +{ + template + using remove_cvref = std::remove_cv_t>; + + template + using common_signed_type = std::common_type_t...>; + + template + inline constexpr bool is_one_of = (false || ... || std::is_same_v); + + template + inline constexpr bool all_integral = (std::is_integral_v && ...); + + template + inline constexpr bool is_cvref = std::is_reference_v || std::is_const_v || std::is_volatile_v; + + template + inline constexpr bool is_wide_string = + is_one_of, const wchar_t*, wchar_t*, std::wstring_view, std::wstring>; + + template + inline constexpr bool value_retrieval_is_nothrow = !std::is_same_v, std::string> +#if TOML_HAS_CHAR8 + && !std::is_same_v, std::u8string> +#endif + + && !is_wide_string; + + template + struct copy_ref_; + template + using copy_ref = typename copy_ref_::type; + + template + struct copy_ref_ + { + using type = Dest; + }; + + template + struct copy_ref_ + { + using type = std::add_lvalue_reference_t; + }; + + template + struct copy_ref_ + { + using type = std::add_rvalue_reference_t; + }; + + template + struct copy_cv_; + template + using copy_cv = typename copy_cv_::type; + + template + struct copy_cv_ + { + using type = Dest; + }; + + template + struct copy_cv_ + { + using type = std::add_const_t; + }; + + template + struct copy_cv_ + { + using type = std::add_volatile_t; + }; + + template + struct copy_cv_ + { + using type = std::add_cv_t; + }; + + template + using copy_cvref = + copy_ref, std::remove_reference_t>, Dest>, Src>; + + template + inline constexpr bool always_false = false; + + template + inline constexpr bool first_is_same = false; + template + inline constexpr bool first_is_same = true; + + template > + struct underlying_type_ + { + using type = std::underlying_type_t; + }; + template + struct underlying_type_ + { + using type = T; + }; + template + using underlying_type = typename underlying_type_::type; + + // general value traits + // (as they relate to their equivalent native TOML type) + struct default_value_traits + { + using native_type = void; + static constexpr bool is_native = false; + static constexpr bool is_losslessly_convertible_to_native = false; + static constexpr bool can_represent_native = false; + static constexpr bool can_partially_represent_native = false; + static constexpr auto type = node_type::none; + }; + + template + struct value_traits; + + template > + struct value_traits_base_selector + { + static_assert(!is_cvref); + + using type = default_value_traits; + }; + template + struct value_traits_base_selector + { + static_assert(!is_cvref); + + using type = value_traits>; + }; + + template + struct value_traits : value_traits_base_selector::type + {}; + template + struct value_traits : value_traits + {}; + template + struct value_traits : value_traits + {}; + template + struct value_traits : value_traits + {}; + template + struct value_traits : value_traits + {}; + template + struct value_traits : value_traits + {}; + + // integer value_traits specializations - standard types + template + struct integer_limits + { + static constexpr T min = T{ (std::numeric_limits>::min)() }; + static constexpr T max = T{ (std::numeric_limits>::max)() }; + }; + template + struct integer_traits_base : integer_limits + { + using native_type = int64_t; + static constexpr bool is_native = std::is_same_v, native_type>; + static constexpr bool is_signed = static_cast>(-1) < underlying_type{}; + static constexpr auto type = node_type::integer; + static constexpr bool can_partially_represent_native = true; + }; + template + struct unsigned_integer_traits : integer_traits_base + { + static constexpr bool is_losslessly_convertible_to_native = + integer_limits>::max <= 9223372036854775807ULL; + static constexpr bool can_represent_native = false; + }; + template + struct signed_integer_traits : integer_traits_base + { + using native_type = int64_t; + static constexpr bool is_losslessly_convertible_to_native = + integer_limits>::min >= (-9223372036854775807LL - 1LL) + && integer_limits>::max <= 9223372036854775807LL; + static constexpr bool can_represent_native = + integer_limits>::min <= (-9223372036854775807LL - 1LL) + && integer_limits>::max >= 9223372036854775807LL; + }; + template ::is_signed> + struct integer_traits : signed_integer_traits + {}; + template + struct integer_traits : unsigned_integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; + static_assert(value_traits::is_native); + static_assert(value_traits::is_signed); + static_assert(value_traits::is_losslessly_convertible_to_native); + static_assert(value_traits::can_represent_native); + static_assert(value_traits::can_partially_represent_native); + + // integer value_traits specializations - non-standard types +#ifdef TOML_INT128 + template <> + struct integer_limits + { + static constexpr TOML_INT128 max = + static_cast((TOML_UINT128{ 1u } << ((__SIZEOF_INT128__ * CHAR_BIT) - 1)) - 1); + static constexpr TOML_INT128 min = -max - TOML_INT128{ 1 }; + }; + template <> + struct integer_limits + { + static constexpr TOML_UINT128 min = TOML_UINT128{}; + static constexpr TOML_UINT128 max = (2u * static_cast(integer_limits::max)) + 1u; + }; + template <> + struct value_traits : integer_traits + {}; + template <> + struct value_traits : integer_traits + {}; +#endif +#ifdef TOML_SMALL_INT_TYPE + template <> + struct value_traits : signed_integer_traits + {}; +#endif + + // floating-point traits base + template + struct float_traits_base + { + static constexpr auto type = node_type::floating_point; + using native_type = double; + static constexpr bool is_native = std::is_same_v; + static constexpr bool is_signed = true; + + static constexpr int bits = static_cast(sizeof(T) * CHAR_BIT); + static constexpr int digits = MantissaDigits; + static constexpr int digits10 = DecimalDigits; + + static constexpr bool is_losslessly_convertible_to_native = bits <= 64 // + && digits <= 53 // DBL_MANT_DIG + && digits10 <= 15; // DBL_DIG + + static constexpr bool can_represent_native = digits >= 53 // DBL_MANT_DIG + && digits10 >= 15; // DBL_DIG + + static constexpr bool can_partially_represent_native = digits > 0 && digits10 > 0; + }; + template + struct float_traits : float_traits_base::digits, std::numeric_limits::digits10> + {}; +#if TOML_ENABLE_FLOAT16 + template <> + struct float_traits<_Float16> : float_traits_base<_Float16, __FLT16_MANT_DIG__, __FLT16_DIG__> + {}; +#endif +#ifdef TOML_FLOAT128 + template <> + struct float_traits : float_traits_base + {}; +#endif + + // floating-point traits + template <> + struct value_traits : float_traits + {}; + template <> + struct value_traits : float_traits + {}; + template <> + struct value_traits : float_traits + {}; +#if TOML_ENABLE_FLOAT16 + template <> + struct value_traits<_Float16> : float_traits<_Float16> + {}; +#endif +#ifdef TOML_FLOAT128 + template <> + struct value_traits : float_traits + {}; +#endif +#ifdef TOML_SMALL_FLOAT_TYPE + template <> + struct value_traits : float_traits + {}; +#endif + static_assert(value_traits::is_native); + static_assert(value_traits::is_losslessly_convertible_to_native); + static_assert(value_traits::can_represent_native); + static_assert(value_traits::can_partially_represent_native); + + // string value_traits specializations - char-based strings + template + struct string_traits + { + using native_type = std::string; + static constexpr bool is_native = std::is_same_v; + static constexpr bool is_losslessly_convertible_to_native = true; + static constexpr bool can_represent_native = + !std::is_array_v && (!std::is_pointer_v || std::is_const_v>); + static constexpr bool can_partially_represent_native = can_represent_native; + static constexpr auto type = node_type::string; + }; + template <> + struct value_traits : string_traits + {}; + template <> + struct value_traits : string_traits + {}; + template <> + struct value_traits : string_traits + {}; + template + struct value_traits : string_traits + {}; + template <> + struct value_traits : string_traits + {}; + template + struct value_traits : string_traits + {}; + + // string value_traits specializations - char8_t-based strings +#if TOML_HAS_CHAR8 + template <> + struct value_traits : string_traits + {}; + template <> + struct value_traits : string_traits + {}; + template <> + struct value_traits : string_traits + {}; + template + struct value_traits : string_traits + {}; + template <> + struct value_traits : string_traits + {}; + template + struct value_traits : string_traits + {}; +#endif + + // string value_traits specializations - wchar_t-based strings on Windows +#if TOML_ENABLE_WINDOWS_COMPAT + template + struct wstring_traits + { + using native_type = std::string; + static constexpr bool is_native = false; + static constexpr bool is_losslessly_convertible_to_native = true; // narrow + static constexpr bool can_represent_native = std::is_same_v; // widen + static constexpr bool can_partially_represent_native = can_represent_native; + static constexpr auto type = node_type::string; + }; + template <> + struct value_traits : wstring_traits + {}; + template <> + struct value_traits : wstring_traits + {}; + template <> + struct value_traits : wstring_traits + {}; + template + struct value_traits : wstring_traits + {}; + template <> + struct value_traits : wstring_traits + {}; + template + struct value_traits : wstring_traits + {}; +#endif + + // other 'native' value_traits specializations + template + struct native_value_traits + { + using native_type = T; + static constexpr bool is_native = true; + static constexpr bool is_losslessly_convertible_to_native = true; + static constexpr bool can_represent_native = true; + static constexpr bool can_partially_represent_native = true; + static constexpr auto type = NodeType; + }; + template <> + struct value_traits : native_value_traits + {}; + template <> + struct value_traits : native_value_traits + {}; + template <> + struct value_traits