diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..3926d51 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,20 @@ +version: 2 +updates: + # Weekly dependency upgrades for the bundler ecosystem (version updates). + - package-ecosystem: bundler + directory: "/" + schedule: + interval: weekly + open-pull-requests-limit: 10 + groups: + # Bundle every RuboCop gem (rubocop + rubocop-* plugins) into one PR. + rubocop: + patterns: + - "rubocop" + - "rubocop-*" + # Bundle the Rails component gems into one PR so their versions move together. + rails: + patterns: + - "activerecord" + - "activemodel" + - "activesupport" diff --git a/.github/workflows/bundle-audit.yml b/.github/workflows/bundle-audit.yml new file mode 100644 index 0000000..12c4871 --- /dev/null +++ b/.github/workflows/bundle-audit.yml @@ -0,0 +1,24 @@ +name: bundle-audit + +on: + schedule: + # Daily security scan so newly published CVEs against existing dependencies + # surface even on days without a push. + - cron: '17 6 * * *' + workflow_dispatch: + +permissions: + contents: read + +jobs: + audit: + name: bundle-audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' + bundler-cache: true + - name: Audit dependencies for known CVEs + run: bundle exec bundle-audit check --update diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..1c97d1f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,98 @@ +name: CI + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + test: + name: Test (Ruby ${{ matrix.ruby }}, Rails ${{ matrix.rails }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + # Diagonal matrix: every supported Ruby and every supported Rails is + # exercised at least once without running the full Ruby x Rails product. + matrix: + include: + - { ruby: '3.2', rails: '7.1', gemfile: rails_7_1 } + - { ruby: '3.3', rails: '7.2', gemfile: rails_7_2 } + - { ruby: '3.4', rails: '8.0', gemfile: rails_8_0 } + - { ruby: '3.4', rails: '8.1', gemfile: rails_8_1 } + services: + postgres: + image: postgres:17 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: pg_sql_caller_test + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + env: + CI: 'true' + BUNDLE_GEMFILE: ${{ github.workspace }}/gemfiles/${{ matrix.gemfile }}.gemfile + steps: + - uses: actions/checkout@v4 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + - name: Set up database config + run: cp -v spec/config/database.github.yml spec/config/database.yml + - name: Run specs + run: bundle exec rspec + + rubocop: + name: RuboCop + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' + bundler-cache: true + - name: Run RuboCop + run: bundle exec rubocop --parallel + + bundle-audit: + name: bundle-audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' + bundler-cache: true + - name: Audit dependencies for known CVEs + run: bundle exec bundle-audit check --update + + semgrep: + name: Semgrep (SQL injection) + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install Semgrep + run: pip install semgrep + - name: Scan with registry + custom ActiveRecord SQL rules + # Non-blocking: findings are reported to the Security tab rather than failing CI. + # Add `--error` (and drop continue-on-error) to make findings block the build. + continue-on-error: true + run: semgrep scan --config p/default --config .semgrep/ --sarif --output semgrep.sarif + - name: Upload findings to GitHub code scanning + if: always() + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: semgrep.sarif + category: semgrep diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..16ce3d8 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,31 @@ +name: CodeQL + +on: + push: + branches: [master] + pull_request: + branches: [master] + schedule: + # Weekly re-scan so newly published queries catch regressions even without a push. + - cron: '21 3 * * 1' + +jobs: + analyze: + name: Analyze (Ruby) + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@v4 + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ruby + # security-extended includes the dataflow query rb/sql-injection. + queries: security-extended + - name: Perform CodeQL analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:ruby" diff --git a/.gitignore b/.gitignore index c788e20..1ce1fb7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ .rspec_status /Gemfile.lock +/gemfiles/*.gemfile.lock /spec/config/database.yml diff --git a/.rubocop.yml b/.rubocop.yml index 0ae2ad3..4ef7b74 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,71 +1,228 @@ +plugins: + - rubocop-performance + - rubocop-rspec + - rubocop-rake + AllCops: - DisplayCopNames: true - TargetRubyVersion: 2.5 + TargetRubyVersion: 3.2 + DisplayStyleGuide: true + ExtraDetails: true + NewCops: enable + Exclude: + - 'vendor/**/*' + - 'tmp/**/*' + +##################### Bundler ############################### + +Bundler/OrderedGems: Exclude: - - vendor/**/* - - tmp/**/* + - Gemfile + +##################### Gemspec ############################### + +Gemspec/RequireMFA: + Enabled: false + +Gemspec/RequiredRubyVersion: + Enabled: false + +##################### Lint ############################### + +Lint/UnderscorePrefixedVariableName: + Enabled: false + +Lint/AmbiguousOperatorPrecedence: + Enabled: false + +Lint/MissingSuper: + Enabled: false + +Lint/AmbiguousBlockAssociation: + Exclude: + - 'spec/**/*.rb' + +Lint/UselessAccessModifier: + Enabled: true + MethodCreatingMethods: + - delegate + - attribute + - service_entity + - parameter + - parameters + ContextCreatingMethods: + - concerning + - included + +##################### Layout ############################# Layout/LineLength: - Max: 180 + Max: 311 + +##################### Performance ######################### + +Performance/CollectionLiteralInLoop: + Exclude: + - 'spec/**/*.rb' + +##################### Style ############################### + +Style/StringLiterals: + EnforcedStyle: single_quotes + +Style/StringLiteralsInInterpolation: + EnforcedStyle: single_quotes + +Style/KeywordParametersOrder: + Enabled: false + +Style/IfUnlessModifier: + Enabled: false + +Style/WhenThen: + Enabled: false + +Style/SafeNavigation: + Enabled: false + +Style/Documentation: + Enabled: false Style/SymbolArray: - EnforcedStyle: brackets + Enabled: false + +Style/HashAsLastArrayItem: + Enabled: false + +Style/ClassAndModuleChildren: + Enabled: false + +Style/GuardClause: + Enabled: false Style/Lambda: + Enabled: false EnforcedStyle: literal -Naming/MethodParameterName: - AllowedNames: - - id +Style/WordArray: + Enabled: false -Style/HashEachMethods: +Style/BlockDelimiters: Enabled: true + EnforcedStyle: braces_for_chaining -Style/HashTransformKeys: +Style/InvertibleUnlessCondition: Enabled: true + InverseMethods: + :!=: :== + :>: :<= + :<=: :> + :<: :>= + :>=: :< + :!~: :=~ + :zero?: :nonzero? + :nonzero?: :zero? + :any?: :none? + :none?: :any? + :even?: :odd? + :odd?: :even? + :present?: :blank? + :blank?: :present? + +##################### Metrics ############################# -Style/HashTransformValues: - Enabled: true +Metrics/PerceivedComplexity: + Max: 45 -Style/Documentation: - Enabled: false +Metrics/ClassLength: + Max: 1060 -Metrics/PerceivedComplexity: +Metrics/BlockNesting: + Max: 5 + +Metrics/BlockLength: Enabled: false +Metrics/CyclomaticComplexity: + Max: 45 + Metrics/MethodLength: Enabled: false +Metrics/ModuleLength: + Enabled: false + Metrics/AbcSize: + Max: 241 + +Metrics/ParameterLists: + Max: 9 + +##################### Naming ############################## + +Naming/MethodParameterName: + AllowedNames: + - "iv" + - "by" + - "to" + - "id" + - "io" + - "on" + +Naming/VariableNumber: Enabled: false -Metrics/ModuleLength: +Naming/PredicatePrefix: Enabled: false -Metrics/BlockLength: +Naming/PredicateMethod: Enabled: false -Metrics/ClassLength: +##################### RSpec ############################### + +RSpec/NamedSubject: Enabled: false -Metrics/CyclomaticComplexity: +RSpec/MultipleMemoizedHelpers: + Enabled: false + +RSpec/ExampleLength: + Max: 141 + +RSpec/MultipleExpectations: + Max: 79 + +RSpec/DescribeClass: + Enabled: false + +RSpec/MessageSpies: + Enabled: false + +RSpec/SharedExamples: Enabled: false -Layout/MultilineOperationIndentation: +RSpec/NestedGroups: + Max: 18 + +RSpec/ContextWording: Enabled: false -Layout/FirstHashElementIndentation: +RSpec/ExampleWording: Enabled: false -Layout/FirstArrayElementIndentation: +RSpec/ExpectChange: Enabled: false -Layout/FirstArgumentIndentation: +RSpec/AnyInstance: Enabled: false -Layout/ClosingParenthesisIndentation: +RSpec/SpecFilePathFormat: Enabled: false -Layout/ArgumentAlignment: +RSpec/IndexedLet: Enabled: false +RSpec/ExpectInLet: + Enabled: true + +RSpec/IncludeExamples: + Enabled: false diff --git a/.semgrep/sql-injection.yml b/.semgrep/sql-injection.yml new file mode 100644 index 0000000..bcde40c --- /dev/null +++ b/.semgrep/sql-injection.yml @@ -0,0 +1,24 @@ +rules: + - id: pg-sql-caller-interpolated-raw-sql + languages: [ruby] + severity: WARNING + message: >- + Interpolated string passed to a raw-SQL execution method. Interpolating + values (or identifiers) directly into a SQL string risks SQL injection if + any interpolated part is caller-controlled. Prefer bound `?` placeholders + with `*bindings` (sanitized through `sanitize_sql_array`), or for dynamic + identifiers use `connection.quote_column_name` / `quoted_table_name`. + metadata: + category: security + cwe: "CWE-89: Improper Neutralization of Special Elements used in an SQL Command ('SQL Injection')" + owasp: "A03:2021 - Injection" + confidence: MEDIUM + references: + - https://guides.rubyonrails.org/security.html#sql-injection + patterns: + - pattern-either: + - pattern: $FN("...#{$X}...", ...) + - pattern: $RECV.$FN("...#{$X}...", ...) + - metavariable-regex: + metavariable: $FN + regex: ^(execute|select_value|select_values|select_all|select_rows|select_row|select_all_serialized|select_value_serialized|select_values_serialized|explain_analyze)$ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d3f734a..0000000 --- a/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -sudo: false -language: ruby -cache: bundler -services: - - postgresql -rvm: - - 2.5.7 -before_install: gem install bundler -v 2.1.4 -before_script: - - psql -c 'CREATE DATABASE pg_sql_caller_test;' -U postgres - - cp -v spec/config/database.travis.yml spec/config/database.yml diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..798eeca --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,92 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- `PgSqlCaller::Model` — a standalone, instantiable class holding the SQL API. + Build one directly with `PgSqlCaller::Model.new(ApplicationRecord)`; `PgSqlCaller::Base` + is now a thin `Singleton` facade subclassing it. +- `PgSqlCaller::BulkUpdate` — partial update of many existing rows in a single + `UPDATE ... FROM unnest(...)` statement and round-trip, with single- or composite-column + `unique_by` matching and column validation. +- `quote_value`, `quote_column_name`, and `quote_table_name` quoting helpers. +- `with_min_messages(level)` — temporarily set the connection's `client_min_messages` + around a block. +- `with_notice_processor(callback)` — capture PostgreSQL `NOTICE` output emitted during a block. +- `define_sql_method` (single-name) helper; the variadic `define_sql_methods` is retained for + backward compatibility. +- CI test matrix against Rails 7.1, 7.2, 8.0, and 8.1 (bundled `gemfiles/`). + +### Changed + +- **BREAKING**: Minimum Ruby raised to `>= 3.2.0` (was `>= 2.3.0`). +- **BREAKING**: `activerecord` and `activesupport` now require `>= 7.1` (previously unconstrained). +- `PgSqlCaller::Base` now forwards class-level calls to its singleton instance via + `delegate_missing_to`, so every public `Model` instance method is available as a class + method automatically. +- **BREAKING**: `current_database_name` was renamed to `current_database`. + +### Removed + +- **BREAKING**: The custom `Forwardable`-based `delegate` macro on `Base`, superseded by + ActiveSupport delegation and `delegate_missing_to`. + +## [0.2.3] - 2025-02-07 + +### Fixed + +- `select_value_serialized` no longer raises when the query returns no rows; it now + returns `nil`. + +## [0.2.2] - 2023-02-08 + +### Fixed + +- The class-level `PgSqlCaller::Base.connection` call now resolves correctly (delegated + to the singleton instance). + +## [0.2.1] - 2023-02-08 + +### Added + +- `connection` exposed as a public method on the caller. + +## [0.2.0] - 2020-12-23 + +### Added + +- `select_value_serialized` and `select_values_serialized` type-cast reads. +- `next_sequence_value` to peek at a table's next sequence value. +- `table_full_size` and `table_data_size` relation-size helpers. + +### Fixed + +- Serialized reads no longer raise on columns whose type is unknown; the raw value is + returned unchanged. + +### Changed + +- Homepage moved to the `didww` organization. + +## [0.1.0] - 2020-03-24 + +### Added + +- Initial release: `PgSqlCaller::Base` singleton facade over an ActiveRecord class, + with `?`-bound, sanitized SQL helpers — `select_value`, `select_values`, `select_all`, + `select_rows`, `select_row`, `execute`, `select_all_serialized`, `transaction`, + `transaction_open?`, `explain_analyze`, `typecast_array`, `sanitize_sql_array`, and + `current_database_name`. + +[Unreleased]: https://github.com/didww/pg_sql_caller/compare/v0.2.3...HEAD +[0.2.3]: https://github.com/didww/pg_sql_caller/compare/v0.2.2...v0.2.3 +[0.2.2]: https://github.com/didww/pg_sql_caller/compare/v0.2.1...v0.2.2 +[0.2.1]: https://github.com/didww/pg_sql_caller/compare/v0.2.0...v0.2.1 +[0.2.0]: https://github.com/didww/pg_sql_caller/compare/v0.1.0...v0.2.0 +[0.1.0]: https://github.com/didww/pg_sql_caller/releases/tag/v0.1.0 diff --git a/Gemfile b/Gemfile index cbdb778..d7f64b4 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,13 @@ gemspec gem 'database_cleaner' gem 'pg' -gem 'rake', '~> 12.0' -gem 'rspec', '~> 3.0' -gem 'rubocop', '~> 0.80.1' + +gem 'bundler-audit', '~> 0.9' + +gem 'rspec', '~> 3.13' +gem 'rake', '~> 13.4' + +gem 'rubocop', '~> 1.87' +gem 'rubocop-performance', '~> 1.26' +gem 'rubocop-rake', '~> 0.7.1' +gem 'rubocop-rspec', '~> 3.10' diff --git a/README.md b/README.md index 16d4246..0b258a9 100644 --- a/README.md +++ b/README.md @@ -1,67 +1,395 @@ # PgSqlCaller -Postgresql Sql Caller for ActiveRecord. +[![Gem Version](https://img.shields.io/gem/v/pg_sql_caller.svg)](https://rubygems.org/gems/pg_sql_caller) +[![CI](https://github.com/didww/pg_sql_caller/actions/workflows/ci.yml/badge.svg)](https://github.com/didww/pg_sql_caller/actions/workflows/ci.yml) +[![CodeQL](https://github.com/didww/pg_sql_caller/actions/workflows/codeql.yml/badge.svg)](https://github.com/didww/pg_sql_caller/actions/workflows/codeql.yml) + +A small, focused wrapper for running **raw SQL against PostgreSQL through ActiveRecord**. + +It gives you a clean API for the things ActiveRecord's query builder makes awkward — `SELECT`s that return a single scalar, a single column, raw rows, `EXPLAIN ANALYZE`, sequence/table introspection, PostgreSQL `NOTICE` capture, and efficient set-based bulk updates — while keeping every value **bound and sanitized** by ActiveRecord so your statements stay injection-safe. + +```ruby +class Sql < PgSqlCaller::Base + model_class 'ApplicationRecord' +end + +Sql.select_value('SELECT count(*) FROM users WHERE active = ?', true) # => 42 +Sql.select_values('SELECT id FROM users WHERE name = ?', 'John Doe') # => [1, 2, 3] +Sql.transaction { Sql.execute('DELETE FROM logs WHERE created_at < ?', 1.year.ago) } +``` + +## Table of contents + +- [Why use this](#why-use-this) +- [Requirements](#requirements) +- [Installation](#installation) +- [Configuration](#configuration) — three ways to set it up +- [How `?` placeholders work](#how--placeholders-work) +- [API reference](#api-reference) + - [Reading data](#reading-data) + - [Serialized reads (Ruby type casting)](#serialized-reads-ruby-type-casting) + - [Writing data](#writing-data) + - [Transactions](#transactions) + - [Database & table introspection](#database--table-introspection) + - [Query plans](#query-plans) + - [PostgreSQL NOTICE capture](#postgresql-notice-capture) + - [Quoting & sanitizing helpers](#quoting--sanitizing-helpers) + - [Extending with custom SQL methods](#extending-with-custom-sql-methods) +- [Bulk updates](#bulk-updates) +- [Security](#security) +- [Versioning & changelog](#versioning--changelog) +- [Development](#development) +- [Contributing](#contributing) +- [License](#license) + +## Why use this + +ActiveRecord already exposes low-level connection methods (`select_value`, `select_all`, `execute`, …), but reaching for them directly means writing `Model.connection.select_value(...)` everywhere, manually sanitizing bind values, and re-implementing the same small helpers in every project. `PgSqlCaller`: + +- Wraps those connection methods behind a stable, documented API on a class **you** name. +- Binds `?` placeholders through ActiveRecord's sanitizer automatically — no manual quoting. +- Adds PostgreSQL-specific helpers (type-cast reads, sequence peeking, relation sizes, `EXPLAIN ANALYZE`, `NOTICE` capture). +- Provides a fast, injection-safe [bulk update](#bulk-updates) for partial updates of many existing rows in a single round-trip. + +## Requirements + +| Dependency | Version | +| ------------- | ------------------ | +| Ruby | `>= 3.2.0` | +| ActiveRecord | `>= 7.1` | +| ActiveSupport | `>= 7.1` | +| Database | PostgreSQL | + +Continuously tested against Rails **7.1, 7.2, 8.0, and 8.1** on Ruby **3.2–3.4**. PostgreSQL is required — the gem uses PostgreSQL-specific features (`pg_total_relation_size`, `unnest`, sequence introspection, the `pg` notice processor). ## Installation -Add this line to your application's Gemfile: +Add to your application's `Gemfile`: ```ruby gem 'pg_sql_caller' ``` -And then execute: +Then run: + +```sh +bundle install +``` + +Or install it directly: - $ bundle install +```sh +gem install pg_sql_caller +``` -Or install it yourself as: +## Configuration - $ gem install pg_sql_caller +A caller is always backed by **one ActiveRecord class**, whose connection runs every statement and whose column types are used to sanitize and cast values. Pick whichever of the three setups below fits your app. -## Usage +### 1. Subclass `PgSqlCaller::Base` (recommended) -create subclass from `PgSqlCaller::Base` and define `model_class` for it +Declare the backing model once, then call SQL methods directly on your class. This is the most common setup and lets you have several callers (e.g. one per database) if needed. ```ruby require 'pg_sql_caller' -class MySqlCaller < PgSqlCaller::Base - model_class 'ApplicationRecord' +class Sql < PgSqlCaller::Base + model_class 'ApplicationRecord' # a String (constantized on first use) or the Class itself end -MySqlCaller.select_values 'SELECT id from users WHERE parent_name = ?', 'John Doe' # => [1, 2, 3] +Sql.select_values('SELECT id FROM users WHERE parent_name = ?', 'John Doe') # => [1, 2, 3] ``` -or just define `model_class` for `PgSqlCaller::Base` itself +`model_class` accepts either the class or its name as a `String`. Passing a `String` defers loading the constant until the first call, which avoids autoload-order problems at boot. + +`PgSqlCaller::Base` is a `Singleton`: every class-level call is forwarded to the shared `.instance`, and **every** public instance method (including ones you add with [`define_sql_method`](#extending-with-custom-sql-methods)) is available as a class method. + +### 2. Configure `PgSqlCaller::Base` directly + +If you only need a single, global caller, configure the base class itself instead of subclassing: ```ruby PgSqlCaller::Base.model_class 'ApplicationRecord' -PgSqlCaller::Base.select_values 'SELECT id from users WHERE parent_name = ?', 'John Doe' # => [1, 2, 3] +PgSqlCaller::Base.select_values('SELECT id FROM users WHERE parent_name = ?', 'John Doe') # => [1, 2, 3] +``` + +### 3. Instantiate `PgSqlCaller::Model` per call + +For one-off use, or when you want an ordinary object rather than a singleton, build a `Model` directly. This is also what [`BulkUpdate`](#bulk-updates) uses internally. + +```ruby +sql = PgSqlCaller::Model.new(ApplicationRecord) +sql.select_value('SELECT count(*) FROM users') # => 42 +``` + +> The class methods on `PgSqlCaller::Base` and the instance methods on `PgSqlCaller::Model` are the same API — the examples in the reference below use a `sql` instance, but `Sql.select_value(...)` works identically. + +## How `?` placeholders work + +Every reading and writing method takes a SQL string plus optional positional bindings. Each `?` in the SQL is replaced, **in order**, by a binding value that ActiveRecord quotes and escapes — values are never interpolated into the string yourself: + +```ruby +sql.select_value('SELECT id FROM employees WHERE name = ?', "O'Brien") +# ActiveRecord turns this into: SELECT id FROM employees WHERE name = 'O''Brien' +``` + +If you pass no bindings, the SQL is run verbatim. See [Security](#security) for the guarantees this provides. + +## API reference + +The examples use this schema: + +```ruby +class Department < ApplicationRecord; end +class Employee < ApplicationRecord # columns: id, department_id, name, created_at, updated_at + belongs_to :department +end +``` + +### Reading data + +| Method | Returns | +| --------------------------------------- | ------------------------------------------------------------------- | +| `select_value(sql, *bindings)` | First column of the first row, or `nil` if no row matches | +| `select_values(sql, *bindings)` | First column of **every** row, as an `Array` | +| `select_row(sql, *bindings)` | First row as an `Array` of column values, or `nil` | +| `select_rows(sql, *bindings)` | Every row as an `Array` of column-value `Array`s | +| `select_all(sql, *bindings)` | An `ActiveRecord::Result` of String-keyed row hashes | + +```ruby +sql.select_value('SELECT count(*) FROM employees') # => 2 +sql.select_value('SELECT name FROM employees WHERE id = ?', -1) # => nil + +sql.select_values('SELECT name FROM employees WHERE department_id = ?', 5) +# => ["John", "Jane"] + +sql.select_row('SELECT id, name FROM employees ORDER BY id') # => [1, "John"] +sql.select_rows('SELECT id, name FROM employees') # => [[1, "John"], [2, "Jane"]] + +result = sql.select_all('SELECT id, name FROM employees') +result # => # +result.to_a # => [{ "id" => 1, "name" => "John" }, { "id" => 2, "name" => "Jane" }] +``` + +> **Type casting note:** the non-serialized reads above return values as decoded by the PostgreSQL adapter — common scalar types (integers, booleans, floats, timestamps) come back as Ruby objects, but **array and other complex/custom column types arrive as raw strings** (e.g. `'{1,2,3}'`). Use the serialized variants below when you need those cast to Ruby types. + +### Serialized reads (Ruby type casting) + +The `*_serialized` variants run the same query, then cast each value back to its Ruby type using ActiveRecord's column types — handling arrays and custom attribute types that the raw adapter leaves as strings. `select_all_serialized` additionally keys each row by `Symbol`. + +| Method | Returns | +| ----------------------------------------- | ------------------------------------------------------------- | +| `select_value_serialized(sql, *bindings)` | First value of the first row, type-cast, or `nil` | +| `select_values_serialized(sql, *bindings)`| Every row as an `Array` of type-cast values | +| `select_all_serialized(sql, *bindings)` | Every row as a `Hash` with `Symbol` keys and type-cast values | + +```ruby +# Raw read returns the PostgreSQL array literal as a String... +sql.select_value('SELECT ARRAY[1,2,3]::int[]') # => "{1,2,3}" +# ...the serialized read casts it to a Ruby Array. +sql.select_value_serialized('SELECT ARRAY[1,2,3]::int[]') # => [1, 2, 3] + +sql.select_values_serialized('SELECT id, ARRAY[1,2]::int[] FROM employees') +# => [[1, [1, 2]]] + +sql.select_all_serialized('SELECT id, created_at FROM employees') +# => [{ id: 1, created_at: 2026-06-08 12:00:00 +0000 }, ...] +``` + +### Writing data + +| Method | Returns | +| -------------------------- | ------------------------------------------------------------- | +| `execute(sql, *bindings)` | The raw `PG::Result` (use `#cmd_tuples` for affected rows) | + +`execute` is for `INSERT` / `UPDATE` / `DELETE` / DDL and any statement whose row data you don't need back. + +```ruby +result = sql.execute('UPDATE employees SET name = ? WHERE id = ?', 'Renamed', 1) +result.cmd_tuples # => 1 (number of rows affected) +``` + +For updating many existing rows efficiently, see [Bulk updates](#bulk-updates). + +### Transactions + +```ruby +sql.transaction do + sql.execute('UPDATE accounts SET balance = balance - ? WHERE id = ?', 100, from_id) + sql.execute('UPDATE accounts SET balance = balance + ? WHERE id = ?', 100, to_id) +end +``` + +- `transaction { ... }` — runs the block inside a database transaction, committing on success and rolling back if it raises. Returns the block's value. Raises `ArgumentError` if no block is given. +- `transaction_open?` — `true` when a transaction is currently open on the connection (including one opened on the model class itself, e.g. `ApplicationRecord.transaction { ... }`). + +### Database & table introspection + +| Method | Returns | +| ------------------------------- | ----------------------------------------------------------------------------------------- | +| `current_database` | The connected database name (`SELECT current_database()`) | +| `next_sequence_value(table)` | The table's `_id_seq` `last_value + 1`, read **without consuming** the sequence | +| `table_full_size(table)` | Total on-disk size in bytes including indexes & TOAST (`pg_total_relation_size`) | +| `table_data_size(table)` | Main data fork size in bytes only (`pg_relation_size`) | + +```ruby +sql.current_database # => "my_app_production" +sql.next_sequence_value('employees') # => 124 +sql.table_full_size('employees') # => 81920 +sql.table_data_size('employees') # => 8192 +``` + +> `next_sequence_value` peeks at the sequence's current value; it does not allocate or advance it, so it is **not** safe to use as a way to reserve an id under concurrency. + +### Query plans + +```ruby +puts sql.explain_analyze('SELECT * FROM employees WHERE department_id = 5') +# QUERY_PLAN +# Seq Scan on employees (cost=0.00..1.05 rows=1 width=...) (actual time=0.012..0.013 rows=1 loops=1) +# Filter: (department_id = 5) +# Planning Time: 0.060 ms +# Execution Time: 0.030 ms +``` + +`explain_analyze(sql)` runs `EXPLAIN ANALYZE` (which **executes** the statement) and returns the plan as a single multi-line `String` under a `QUERY_PLAN` header. + +### PostgreSQL NOTICE capture + +Capture `NOTICE` output (e.g. from `RAISE NOTICE` inside a `DO` block or function) emitted while a block runs: + +```ruby +sql.with_notice_processor(->(msg) { Rails.logger.info(msg) }) do + sql.execute("DO $$ BEGIN RAISE NOTICE 'migrating row %', 42; END $$") +end +``` + +- `with_notice_processor(callback) { ... }` — invokes `callback` with each notice message (a chomped `String`) emitted during the block. Lowers `client_min_messages` to `notice` for the duration and restores the previous notice processor afterward. Returns the block's value. +- `with_min_messages(level) { ... }` — temporarily sets the connection's `client_min_messages` to `level` (`debug5`…`debug1`, `log`, `notice`, `warning`, `error`) for the block, restoring the previous value afterward. Returns the block's value. + +### Quoting & sanitizing helpers + +For the cases where you must build SQL fragments yourself, these expose ActiveRecord's quoting so you stay safe: + +| Method | Purpose | +| ------------------------------ | --------------------------------------------------------------------------------- | +| `quote_value(value)` | Quote/escape a value as a SQL literal — `"O'Brien"` → `"'O''Brien'"` | +| `quote_column_name(name)` | Quote a column identifier — `"name"` → `'"name"'` | +| `quote_table_name(name)` | Quote a table identifier — `"employees"` → `'"employees"'` | +| `sanitize_sql_array(sql, *b)` | Interpolate `?` placeholders and return the safe SQL `String` (no execution) | +| `typecast_array(values, type:)`| Encode a Ruby `Array` into a PostgreSQL array literal for the given attribute type | + +```ruby +sql.quote_value("O'Brien") # => "'O''Brien'" +sql.quote_column_name('name') # => "\"name\"" +sql.sanitize_sql_array('name = ? AND id = ?', "O'Brien", 5) # => "name = 'O''Brien' AND id = 5" +sql.typecast_array([1, 2, 3], type: :integer) # => "{1,2,3}" +sql.typecast_array(['a', 'b,c'], type: :string) # => "{a,\"b,c\"}" +``` + +Accessors `model_class` (the wrapped class) and `connection` (its adapter) are also public. + +### Extending with custom SQL methods + +`PgSqlCaller::Model` builds its core readers with the class macro `define_sql_method`, which wraps any connection method that takes a SQL string. Subclass `Model` (or `Base`) to expose additional connection methods with the same `?`-binding behavior: + +```ruby +class Sql < PgSqlCaller::Base + model_class 'ApplicationRecord' + + # Expose the adapter's #exec_query through the same binding/sanitizing path. + define_sql_method :exec_query +end + +Sql.exec_query('SELECT * FROM employees WHERE id = ?', 1) ``` +Because `PgSqlCaller::Base` delegates missing class methods to its singleton instance, methods added this way are immediately callable at the class level. + +## Bulk updates + +`PgSqlCaller::BulkUpdate` performs a **partial update of many existing rows in a single statement and a single round-trip**, using `UPDATE ... FROM unnest(...)`. Each column is sent as one typed PostgreSQL array; `unnest` zips the arrays back into rows that are joined to the target table on a key. + +```ruby +PgSqlCaller::BulkUpdate.call(Employee, [ + { id: 1, name: 'John', department_id: 10 }, + { id: 2, name: 'Jane', department_id: 20 } +]) +# => 2 (number of rows affected) +``` + +### Matching on a composite key + +By default rows are matched on `:id`. Pass `unique_by` to match on a different column, or an array of columns for a composite key: + +```ruby +PgSqlCaller::BulkUpdate.call(Employee, attrs_list, unique_by: :employee_number) +PgSqlCaller::BulkUpdate.call(Employee, attrs_list, unique_by: %i[department_id name]) +``` + +### Rules and behavior + +- **Every row must include each `unique_by` column**, and all hashes must share the same set of keys. +- Only the columns you list are written; `unique_by` columns are used for matching, the rest are updated. Columns you omit (e.g. `created_at`) are left untouched. +- Rows that don't match an existing row are simply not updated — this **never inserts**. +- Returns the number of rows affected (`0` when `attrs_list` is empty — a no-op). +- Raises `ArgumentError` (before touching the database) if a row omits a `unique_by` column or names a column that doesn't exist on the model. + +### Why not `upsert_all` or a loop of `update_all`? + +- **vs. `upsert_all`:** PostgreSQL `NOT NULL`-checks the candidate `INSERT` tuple of `INSERT ... ON CONFLICT DO UPDATE` *before* conflict arbitration, so upsert rejects partial payloads that omit the table's other `NOT NULL` columns. This join only ever touches the listed columns of rows that already exist. +- **vs. N `update_all` calls in a transaction:** a transaction makes those writes atomic but doesn't batch them — it's still N statements, N round-trips, and N parse/plan cycles. `BulkUpdate` is one statement and one round-trip; round-trip latency dominates the N-call approach as the row count grows, so `BulkUpdate` stays roughly flat while the loop scales linearly. + +> There's a benchmark demonstrating the speedup in `spec/pg_sql_caller/bulk_update_spec.rb`. Run it with: +> ```sh +> bundle exec rspec spec/pg_sql_caller/bulk_update_spec.rb --tag benchmark +> ``` + +## Security + +`PgSqlCaller` is built so that **values are always bound through ActiveRecord's sanitizer and never interpolated into SQL**: + +- All `?` placeholders in reading/writing methods are sanitized by `sanitize_sql_array` (quoted and escaped). +- `BulkUpdate` binds every value as a typed PostgreSQL array; the only identifiers placed into its SQL are restricted to the model's own column names (validated against `column_names`), so the statement is injection-safe by construction — even values like `"'); DROP TABLE employees;--"` are stored verbatim as data. + +What is **your** responsibility: any SQL fragment, table name, or column name you build into a statement string yourself (rather than passing as a `?` binding) is run as-is. Use `quote_column_name`, `quote_table_name`, and `quote_value` for those, and never interpolate untrusted input directly into the SQL string. + +The repository's CI runs RuboCop, `bundle-audit` (dependency CVEs), CodeQL, and Semgrep (including a custom SQL-injection ruleset) on every change. + +## Versioning & changelog + +This project adheres to [Semantic Versioning](https://semver.org). Given a `MAJOR.MINOR.PATCH` version, breaking API changes bump `MAJOR`, backward-compatible additions bump `MINOR`, and fixes bump `PATCH`. + +All notable changes are recorded in [CHANGELOG.md](CHANGELOG.md), which follows the [Keep a Changelog](https://keepachangelog.com) format. Unreleased changes are listed there before each release. + ## Development -After checking out the repo, run `bin/setup` to install dependencies. -Create `spec/config/database.yml` (look at `spec/config/database.travis.yml` for example). -You need to create test database, so run `psql -c 'CREATE DATABASE pg_sql_caller_test;'`. -Then, run `rake spec` to run the tests. -You can also run `bin/console` for an interactive prompt that will allow you to experiment. +After checking out the repo: -To install this gem onto your local machine, run `bundle exec rake install`. -To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). +```sh +bin/setup # install dependencies +cp spec/config/database.github.yml spec/config/database.yml # then edit credentials as needed +psql -c 'CREATE DATABASE pg_sql_caller_test;' # create the test database +bundle exec rake spec # run the tests +``` -## TODO +`bin/console` gives you an interactive prompt to experiment. -* add more tests -* add more usage examples -* add documentation -* release 1.0 after all above +To test against a specific Rails version, use one of the bundled gemfiles: -## Contributing +```sh +BUNDLE_GEMFILE=gemfiles/rails_8_1.gemfile bundle install +BUNDLE_GEMFILE=gemfiles/rails_8_1.gemfile bundle exec rspec +``` + +Available: `rails_7_1`, `rails_7_2`, `rails_8_0`, `rails_8_1`. -Bug reports and pull requests are welcome on GitHub at https://github.com/didww/pg_sql_caller. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/didww/sql_caller/blob/master/CODE_OF_CONDUCT.md). +To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `lib/pg_sql_caller/version.rb`, then run `bundle exec rake release`, which creates a git tag, pushes commits and tags, and pushes the `.gem` to [rubygems.org](https://rubygems.org). + +## Contributing +Bug reports and pull requests are welcome on GitHub at https://github.com/didww/pg_sql_caller. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/didww/pg_sql_caller/blob/master/CODE_OF_CONDUCT.md). ## License @@ -69,4 +397,4 @@ The gem is available as open source under the terms of the [MIT License](https:/ ## Code of Conduct -Everyone interacting in the PGSqlCaller project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/didww/pg_sql_caller/blob/master/CODE_OF_CONDUCT.md). +Everyone interacting in the PgSqlCaller project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [code of conduct](https://github.com/didww/pg_sql_caller/blob/master/CODE_OF_CONDUCT.md). diff --git a/gemfiles/rails_7_1.gemfile b/gemfiles/rails_7_1.gemfile new file mode 100644 index 0000000..92f81f4 --- /dev/null +++ b/gemfiles/rails_7_1.gemfile @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# Pins ActiveRecord/ActiveSupport to the Rails 7.1 series for the CI test matrix. +# Run with: BUNDLE_GEMFILE=gemfiles/rails_7_1.gemfile bundle exec rspec + +source 'https://rubygems.org' + +gemspec path: '..' + +gem 'activerecord', '~> 7.1.0' +gem 'activesupport', '~> 7.1.0' + +gem 'database_cleaner' +gem 'pg' + +gem 'rspec', '~> 3.13' diff --git a/gemfiles/rails_7_2.gemfile b/gemfiles/rails_7_2.gemfile new file mode 100644 index 0000000..0d1af06 --- /dev/null +++ b/gemfiles/rails_7_2.gemfile @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# Pins ActiveRecord/ActiveSupport to the Rails 7.2 series for the CI test matrix. +# Run with: BUNDLE_GEMFILE=gemfiles/rails_7_2.gemfile bundle exec rspec + +source 'https://rubygems.org' + +gemspec path: '..' + +gem 'activerecord', '~> 7.2.0' +gem 'activesupport', '~> 7.2.0' + +gem 'database_cleaner' +gem 'pg' + +gem 'rspec', '~> 3.13' diff --git a/gemfiles/rails_8_0.gemfile b/gemfiles/rails_8_0.gemfile new file mode 100644 index 0000000..d14f527 --- /dev/null +++ b/gemfiles/rails_8_0.gemfile @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# Pins ActiveRecord/ActiveSupport to the Rails 8.0 series for the CI test matrix. +# Run with: BUNDLE_GEMFILE=gemfiles/rails_8_0.gemfile bundle exec rspec + +source 'https://rubygems.org' + +gemspec path: '..' + +gem 'activerecord', '~> 8.0.0' +gem 'activesupport', '~> 8.0.0' + +gem 'database_cleaner' +gem 'pg' + +gem 'rspec', '~> 3.13' diff --git a/gemfiles/rails_8_1.gemfile b/gemfiles/rails_8_1.gemfile new file mode 100644 index 0000000..f1f8d3c --- /dev/null +++ b/gemfiles/rails_8_1.gemfile @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# Pins ActiveRecord/ActiveSupport to the Rails 8.1 series for the CI test matrix. +# Run with: BUNDLE_GEMFILE=gemfiles/rails_8_1.gemfile bundle exec rspec + +source 'https://rubygems.org' + +gemspec path: '..' + +gem 'activerecord', '~> 8.1.0' +gem 'activesupport', '~> 8.1.0' + +gem 'database_cleaner' +gem 'pg' + +gem 'rspec', '~> 3.13' diff --git a/lib/pg_sql_caller.rb b/lib/pg_sql_caller.rb index 2dae180..b9614ea 100644 --- a/lib/pg_sql_caller.rb +++ b/lib/pg_sql_caller.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true require 'pg_sql_caller/version' +require 'pg_sql_caller/model' require 'pg_sql_caller/base' +require 'pg_sql_caller/bulk_update' module PgSqlCaller # Your code goes here... diff --git a/lib/pg_sql_caller/base.rb b/lib/pg_sql_caller/base.rb index d93418d..51072df 100644 --- a/lib/pg_sql_caller/base.rb +++ b/lib/pg_sql_caller/base.rb @@ -1,160 +1,66 @@ # frozen_string_literal: true require 'singleton' -require 'forwardable' require 'active_support/core_ext/class/attribute' +require 'active_support/core_ext/module/delegation' +require 'active_support/core_ext/string/inflections' +require 'pg_sql_caller/model' module PgSqlCaller - class Base + # Class-level, app-wide facade over a single shared Model instance (a Singleton). + # Declare the ActiveRecord class once, then call the same SQL methods directly on + # the class — every call is forwarded to `.instance`. + # + # class Sql < PgSqlCaller::Base + # model_class 'ApplicationRecord' # a String (constantized on first use) or the Class itself + # end + # + # Sql.select_value('SELECT count(*) FROM users WHERE active = ?', true) # => 42 + # Sql.transaction { Sql.execute('DELETE FROM logs') } + # + # `PgSqlCaller::Base` can also be configured and used directly: + # + # PgSqlCaller::Base.model_class ApplicationRecord + # PgSqlCaller::Base.current_database # => 'my_db' + # + # Every public {PgSqlCaller::Model} instance method is available as a class method here. + # + # @see PgSqlCaller::Model + class Base < Model include Singleton - extend Forwardable - extend SingleForwardable - CONNECTION_SQL_METHODS = [ - :select_value, - :select_values, - :execute, - :select_all, - :select_rows - ].freeze + # @!method self.instance + # The shared singleton instance (from Ruby's Singleton) that every class-level + # call is delegated to. Built on first access. + # @return [PgSqlCaller::Base] class_attribute :_model_class, instance_writer: false class << self - # @names [Array] method names - def delegate(*names, **options) - raise ArgumentError, 'provide at least one method name' if names.empty? - - target = options.fetch(:to) - type = options.fetch(:type, :instance) - raise ArgumentError, ':type can be :single or :instance' unless [:single, :instance].include?(type) - - if type == :instance - instance_delegate names => target - else - single_delegate names => target - end - end - - def define_sql_methods(*names) - names.each do |name| - define_method(name) do |sql, *bindings| - sql = sanitize_sql_array(sql, *bindings) if bindings.any? - connection.send(name, sql) - end - end - end - - # @param klass [Class, String] class or class name + # Configure which ActiveRecord class backs this caller — the class itself or its + # name as a String (constantized lazily on first use). Call once, at boot. + # + # PgSqlCaller::Base.model_class ApplicationRecord + # + # @param klass [Class, String] the class, or its name + # @return [Class, String] the value just set def model_class(klass) self._model_class = klass end - end - - delegate( - *CONNECTION_SQL_METHODS, - :connection, - :transaction_open?, - :select_all_serialized, - :select_value_serialized, - :select_values_serialized, - :next_sequence_value, - :table_full_size, - :table_data_size, - :select_row, - :transaction, - :explain_analyze, - :typecast_array, - :sanitize_sql_array, - :current_database, - to: :instance, - type: :single - ) - - define_sql_methods(*CONNECTION_SQL_METHODS) - - delegate :connection, to: :model_class - - def transaction_open? - connection.send(:transaction_open?) - end - - def select_all_serialized(sql, *bindings) - result = select_all(sql, *bindings) - result.map do |row| - row.map { |key, value| [key.to_sym, deserialize_result(result, key, value)] }.to_h - end - end - - def select_value_serialized(sql, *bindings) - result = select_all(sql, *bindings) - key = result.first&.keys&.first - return if key.nil? - value = result.first.values.first - deserialize_result(result, key, value) + # Forward any unknown class-level call to the shared Singleton instance — + # e.g. `Base.select_value(...)` runs `Base.instance.select_value(...)`. This + # covers every public Model instance method (including ones added later via + # `define_sql_method`) without maintaining an explicit list. + delegate_missing_to :instance end - def select_values_serialized(sql, *bindings) - result = select_all(sql, *bindings) - result.map do |row| - row.map { |key, value| deserialize_result(result, key, value) } - end - end - - def next_sequence_value(table_name) - select_value("SELECT last_value FROM #{table_name}_id_seq") + 1 - end - - def table_full_size(table_name) - select_value('SELECT pg_total_relation_size(?)', table_name) - end - - def table_data_size(table_name) - select_value('SELECT pg_relation_size(?)', table_name) - end - - def select_row(sql, *bindings) - select_rows(sql, *bindings)[0] - end - - def transaction - raise ArgumentError, 'block must be given' unless block_given? - - connection.transaction { yield } - end - - def explain_analyze(sql) - result = select_values("EXPLAIN ANALYZE #{sql}") - ['QUERY_PLAN', *result].join("\n") - end - - def typecast_array(values, type:) - type = ActiveRecord::Type.lookup(type, array: true) - data = type.serialize(values) - data.encoder.encode(data.values) - end - - def sanitize_sql_array(sql, *bindings) - model_class.send :sanitize_sql_array, bindings.unshift(sql) - end - - def current_database_name - select_value('SELECT current_database();') - end - - private - - def deserialize_result(result, column_name, raw_value) - column_type = result.column_types[column_name] - return raw_value if column_type.nil? - - column_type.deserialize(raw_value) - end - - def model_class - return @model_class if defined?(@model_class) - + # Build the singleton instance. Invoked once by {.instance}; never called directly + # (Singleton makes +.new+ private). Resolves the configured {.model_class} name/class + # into a Class for {#model_class}. + # + # @raise [NotImplementedError] if {.model_class} was never configured + def initialize raise NotImplementedError, "define model_class in #{self.class}" if _model_class.nil? @model_class = _model_class.is_a?(String) ? _model_class.constantize : _model_class diff --git a/lib/pg_sql_caller/bulk_update.rb b/lib/pg_sql_caller/bulk_update.rb new file mode 100644 index 0000000..febd650 --- /dev/null +++ b/lib/pg_sql_caller/bulk_update.rb @@ -0,0 +1,193 @@ +# frozen_string_literal: true + +require 'active_support/core_ext/string/filters' +require 'pg_sql_caller/model' + +module PgSqlCaller + # Bulk partial-update of existing rows keyed by one or more columns, via + # `UPDATE ... FROM unnest(...)`: + # + # PgSqlCaller::BulkUpdate.call(Employee, [ + # { id: 1, name: 'John', department_id: 10 }, + # { id: 2, name: 'Jane', department_id: 20 } + # ]) + # + # Match on a composite key (or any custom set of uniqueness columns) by passing + # `unique_by` an array instead of a single column: + # + # PgSqlCaller::BulkUpdate.call(Employee, attrs_list, unique_by: %i[department_id name]) + # + # Chosen over `upsert_all`: PostgreSQL NOT NULL-checks the candidate INSERT tuple of + # `INSERT ... ON CONFLICT DO UPDATE` *before* conflict arbitration, so upsert rejects + # partial payloads that omit the table's other NOT NULL columns. This join only ever + # touches the listed columns of rows that already exist. + # + # Preferred over N separate `update_all` calls wrapped in a transaction: a transaction + # makes those writes atomic but does nothing to batch them — it is still N statements, + # N client<->server round-trips, and N parse/plan cycles. This is a single statement + # and a single round-trip; PostgreSQL applies the whole set-based update server-side. + # Round-trip latency dominates the N-call approach as the row count grows, so this stays + # roughly flat while the loop scales linearly (see + # spec/pg_sql_caller/bulk_update_spec.rb benchmark). + # + # Each column is sent as one typed PostgreSQL array; `unnest` zips the arrays back + # into rows. Values are bound through ActiveRecord's sanitizer (PgSqlCaller::Model) and + # never interpolated; the only identifiers placed into the SQL are restricted to the + # model's own columns, so the statement is injection-safe by construction. + class BulkUpdate + # Build and run a bulk update in one call. + # + # @param model_class [Class] the model whose table is updated + # @param attrs_list [Array] one hash per row; each MUST include every + # `unique_by` column, and all hashes MUST share the same keys + # @param unique_by [Symbol, Array] the match column(s) — a single column, + # or all parts of a composite key (default +:id+) + # @return [Integer] the number of rows affected + def self.call(model_class, attrs_list, unique_by: :id) + new(model_class, attrs_list, unique_by: unique_by).call + end + + attr_reader :model_class, :unique_by, :attrs_list + + # @param model_class [Class] the model whose table is updated + # @param attrs_list [Array] one hash per row; each MUST include every + # `unique_by` column, and all hashes MUST share the same keys + # @param unique_by [Symbol, Array] the match column(s) — a single column, + # or all parts of a composite key (default +:id+) + def initialize(model_class, attrs_list, unique_by: :id) + @model_class = model_class + @attrs_list = attrs_list + @unique_by = Array(unique_by) + end + + # Execute the bulk update as a single `UPDATE ... FROM unnest(...)` statement. + # + # @return [Integer] the number of rows affected (0 when +attrs_list+ is empty) + # @raise [ArgumentError] if a row omits a `unique_by` column, or names a column + # that does not exist on the model + def call + return 0 if attrs_list.empty? + + sql_caller.execute(sql, *bindings).cmd_tuples + end + + private + + # The SQL executor, built from the model's own connection: it sanitizes the bound + # values, runs the statement and encodes the typed PostgreSQL arrays. + # + # @return [PgSqlCaller::Model] + def sql_caller + @sql_caller ||= PgSqlCaller::Model.new(model_class) + end + + # Columns to write, taken from the first row (assumed identical across all rows). + # + # @return [Array] + # @raise [ArgumentError] via {#validate_columns!} when the payload is invalid + def columns + @columns ||= attrs_list.first.keys.tap { |cols| validate_columns!(cols) } + end + + # The columns actually updated — every column except the `unique_by` match column(s). + # + # @return [Array] + def value_columns + @value_columns ||= columns - unique_by + end + + # Validate the payload's columns before any SQL runs: every `unique_by` column must + # be present, at least one value column must remain, every column must exist on the + # model, and every row must carry the same key set as the first row (so no row + # silently writes NULLs or drops extra keys). + # + # @param cols [Array] the columns taken from the first row + # @return [void] + # @raise [ArgumentError] if a `unique_by` column is missing, there are no value + # columns to update, a column is unknown, or a row's keys differ from the first row + def validate_columns!(cols) + missing = unique_by - cols + raise ArgumentError, "attrs_list rows must include unique_by #{missing.inspect}" if missing.any? + + raise ArgumentError, "attrs_list has no value columns to update (only unique_by #{unique_by.inspect})" if (cols - unique_by).empty? + + unknown = cols.map(&:to_s) - model_class.column_names + raise ArgumentError, "unknown #{model_class} columns: #{unknown.join(', ')}" if unknown.any? + + sorted = cols.sort + attrs_list.each_with_index do |attrs, index| + next if attrs.keys.sort == sorted + + raise ArgumentError, "attrs_list[#{index}] keys #{attrs.keys.inspect} differ from first row #{cols.inspect}" + end + end + + # The full `UPDATE ... FROM unnest(...)` statement, with one `?` placeholder per + # column for the value arrays. + # + # @return [String] + def sql + <<~SQL.squish + UPDATE #{model_class.quoted_table_name} AS t + SET #{set_clause} + FROM unnest(#{unnest_args}) AS v(#{column_aliases}) + WHERE #{match_clause} + SQL + end + + # The `SET col = v.col, ...` assignments for the value columns. + # + # @return [String] + def set_clause + value_columns.map { |col| "#{quoted(col)} = v.#{quoted(col)}" }.join(', ') + end + + # Match each row on every `unique_by` column — one column, or all parts of a composite key. + # + # @return [String] the `WHERE` join condition, e.g. +"t.a = v.a AND t.b = v.b"+ + def match_clause + unique_by.map { |col| "t.#{quoted(col)} = v.#{quoted(col)}" }.join(' AND ') + end + + # One `?` placeholder per column, cast to that column's array type so PostgreSQL + # can resolve the otherwise-unknown bind parameter. + # + # @return [String] e.g. +"?::bigint[], ?::text[]"+ + def unnest_args + columns.map { |col| "?::#{sql_type(col)}[]" }.join(', ') + end + + # The `v(col, ...)` column alias list, in column order. + # + # @return [String] + def column_aliases + columns.map { |col| quoted(col) }.join(', ') + end + + # One PostgreSQL array literal per column, in column order, matching the `?`s above. + # + # @return [Array] one encoded array literal per column + def bindings + columns.map do |col| + values = attrs_list.map { |attrs| attrs[col] } + sql_caller.typecast_array(values, type: model_class.type_for_attribute(col.to_s).type) + end + end + + # The PostgreSQL type of a column, used to build its array cast. + # + # @param col [Symbol] a column name + # @return [String] the column's SQL type (e.g. +"bigint"+, +"timestamp without time zone"+) + def sql_type(col) + model_class.columns_hash.fetch(col.to_s).sql_type + end + + # Quote a column-name identifier for safe inclusion in the SQL. + # + # @param identifier [Symbol, String] a column name + # @return [String] the quoted identifier + def quoted(identifier) + sql_caller.quote_column_name(identifier) + end + end +end diff --git a/lib/pg_sql_caller/model.rb b/lib/pg_sql_caller/model.rb new file mode 100644 index 0000000..f5c9a13 --- /dev/null +++ b/lib/pg_sql_caller/model.rb @@ -0,0 +1,304 @@ +# frozen_string_literal: true + +require 'active_support/core_ext/class/attribute' +require 'active_support/core_ext/module/delegation' + +module PgSqlCaller + # Wraps a single ActiveRecord class and runs raw SQL through its connection. + # Positional `?` placeholders are bound and sanitized by ActiveRecord, so values + # are never interpolated into the SQL string. + # + # sql = PgSqlCaller::Model.new(ApplicationRecord) + # sql.select_value('SELECT count(*) FROM users WHERE active = ?', true) # => 42 + # sql.select_values('SELECT email FROM users WHERE dept_id = ?', 5) # => ['a@x', 'b@x'] + # sql.select_all('SELECT id, name FROM users') # => [{ 'id' => 1, 'name' => 'Jo' }, ...] + # sql.transaction { sql.execute('UPDATE users SET active = false') } + # + # The `*_serialized` variants additionally cast each value back to its Ruby type + # using the result's column types (e.g. timestamp -> Time, int[] -> Array), and + # key rows by Symbol: + # + # sql.select_all_serialized('SELECT id, created_at FROM users') + # # => [{ id: 1, created_at: 2026-06-08 12:00:00 +0000 }, ...] + class Model + class << self + # Define a single connection-backed SQL instance method named +name+. + # + # @param name [Symbol] the connection method to wrap (e.g. +:select_value+) + # @return [Symbol] the name of the defined method + def define_sql_method(name) + define_method(name) do |sql, *bindings| + sql = sanitize_sql_array(sql, *bindings) if bindings.any? + connection.public_send(name, sql) + end + end + + # Define several connection-backed SQL instance methods at once — a thin wrapper + # over {.define_sql_method}, kept for backward compatibility. + # + # @param names [Array] the connection methods to wrap + # @return [Array] +names+, unchanged + def define_sql_methods(*names) + names.each { |name| define_sql_method(name) } + end + end + + # @!method select_value(sql, *bindings) + # Run +sql+ and return the value of the first column of the first row. + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Object, nil] the single value, or nil when no row matches + define_sql_method :select_value + + # @!method select_values(sql, *bindings) + # Run +sql+ and return the first column of every row. + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Array] + define_sql_method :select_values + + # @!method execute(sql, *bindings) + # Execute +sql+ (e.g. INSERT/UPDATE/DELETE/DDL) and return the raw adapter result. + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [PG::Result] the raw PostgreSQL result (e.g. +#cmd_tuples+ for affected rows) + define_sql_method :execute + + # @!method select_all(sql, *bindings) + # Run +sql+ and return every row. + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [ActiveRecord::Result] rows as String-keyed hashes + define_sql_method :select_all + + # @!method select_rows(sql, *bindings) + # Run +sql+ and return rows as arrays of column values (no column names). + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Array] + define_sql_method :select_rows + + # @return [Class] the ActiveRecord class this instance wraps + attr_reader :model_class + + # @!method connection + # The ActiveRecord connection adapter of {#model_class}; every SQL method runs through it. + # @return [ActiveRecord::ConnectionAdapters::AbstractAdapter] + delegate :connection, to: :model_class + + # @!method quote_column_name(name) + # Quote a column-name identifier for safe inclusion in SQL (delegated to the + # {#connection}, since the model class itself does not expose it). + # @param name [String, Symbol] the column name to quote + # @return [String] the quoted identifier + delegate :quote_column_name, to: :connection + + # @!method quote_table_name(name) + # Quote a table-name identifier for safe inclusion in SQL (delegated to the + # {#connection}, since the model class itself does not expose it). + # @param name [String, Symbol] the table name to quote + # @return [String] the quoted identifier + delegate :quote_table_name, to: :connection + + # @param model_class [Class] the class whose connection is used + # to run statements and to sanitize/typecast values + def initialize(model_class) + @model_class = model_class + end + + # Whether a database transaction is currently open on the connection. + # + # @return [Boolean] + def transaction_open? + connection.send(:transaction_open?) + end + + # Like {#select_all}, but cast each value back to its Ruby type (using the result's + # column types) and key every row by Symbol. + # + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Array Object}>] + def select_all_serialized(sql, *bindings) + result = select_all(sql, *bindings) + result.map do |row| + row.to_h { |key, value| [key.to_sym, deserialize_result(result, key, value)] } + end + end + + # Like {#select_value}, but cast the value back to its Ruby type. + # + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Object, nil] the type-cast value, or nil when no row matches + def select_value_serialized(sql, *bindings) + result = select_all(sql, *bindings) + key = result.first&.keys&.first + return if key.nil? + + value = result.first.values.first + deserialize_result(result, key, value) + end + + # Run +sql+ and return each row as an array of its type-cast column values. + # + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Array] one inner array per row + def select_values_serialized(sql, *bindings) + result = select_all(sql, *bindings) + result.map do |row| + row.map { |key, value| deserialize_result(result, key, value) } + end + end + + # The next value of the table's `_id_seq` sequence (its current + # last_value + 1), read without consuming the sequence. + # + # @param table_name [String, Symbol] + # @return [Integer] + def next_sequence_value(table_name) + sequence_name = quote_table_name("#{table_name}_id_seq") + # `sequence_name` is an identifier escaped via quote_table_name (identifiers cannot use `?` + # bindings), so the interpolation below is injection-safe. + # nosemgrep: pg-sql-caller-interpolated-raw-sql + select_value("SELECT last_value FROM #{sequence_name}") + 1 + end + + # Total on-disk size of the table including indexes and TOAST, in bytes + # (PostgreSQL `pg_total_relation_size`). + # + # @param table_name [String, Symbol] + # @return [Integer] size in bytes + def table_full_size(table_name) + select_value('SELECT pg_total_relation_size(?)', table_name) + end + + # On-disk size of the table's main data fork only, in bytes + # (PostgreSQL `pg_relation_size`). + # + # @param table_name [String, Symbol] + # @return [Integer] size in bytes + def table_data_size(table_name) + select_value('SELECT pg_relation_size(?)', table_name) + end + + # Run +sql+ and return the first row as an array of column values. + # + # @param sql [String] SQL statement, optionally containing `?` placeholders + # @param bindings [Array] values bound, in order, to the `?` placeholders + # @return [Array, nil] the first row, or nil when no row matches + def select_row(sql, *bindings) + select_rows(sql, *bindings)[0] + end + + # Run the given block inside a database transaction, committing on success and + # rolling back if it raises. + # + # @yield executes within the open transaction + # @return [Object] the block's return value + # @raise [ArgumentError] if no block is given + def transaction(&) + raise ArgumentError, 'block must be given' unless block_given? + + connection.transaction(&) + end + + # Run `EXPLAIN ANALYZE` for +sql+ and return the query plan as text. + # + # @param sql [String] the statement to analyze + # @return [String] the plan, one line per row, prefixed with a +QUERY_PLAN+ header + def explain_analyze(sql) + # `sql` is the statement to analyze; the caller owns the full SQL by contract (like #execute), + # so there is no boundary to bind across. + # nosemgrep: pg-sql-caller-interpolated-raw-sql + result = select_values("EXPLAIN ANALYZE #{sql}") + ['QUERY_PLAN', *result].join("\n") + end + + # Encode a Ruby array into a PostgreSQL array literal for the given attribute type, + # ready to bind as a single `?` value. + # + # @param values [Array] the Ruby values to encode + # @param type [Symbol] an ActiveRecord attribute type (e.g. +:integer+, +:string+, +:datetime+) + # @return [String] a PostgreSQL array literal, e.g. +"{1,2,3}"+ + def typecast_array(values, type:) + type = ActiveRecord::Type.lookup(type, array: true) + data = type.serialize(values) + data.encoder.encode(data.values) + end + + # Interpolate `?` placeholders in +sql+ with +bindings+ through ActiveRecord's + # sanitizer (values are quoted/escaped, never raw-interpolated). + # + # @param sql [String] SQL containing `?` placeholders + # @param bindings [Array] values bound, in order, to the placeholders + # @return [String] the safe, ready-to-run SQL + def sanitize_sql_array(sql, *bindings) + model_class.send :sanitize_sql_array, bindings.unshift(sql) + end + + # @return [String] the name of the currently connected database (`current_database()`) + def current_database + select_value('SELECT current_database();') + end + + # Capture PostgreSQL NOTICE output (e.g. from +RAISE NOTICE+) emitted while the block + # runs, passing each message to +callback+. Lowers +client_min_messages+ to +notice+ + # for the duration (see {#with_min_messages}) and restores the previous notice + # processor afterward. + # + # sql.with_notice_processor(->(msg) { logger.info(msg) }) do + # sql.execute("DO $$ BEGIN RAISE NOTICE 'hi'; END $$") + # end + # + # @param callback [#call] invoked with each notice message (a chomped String) + # @yield runs with the notice processor installed + # @return [Object] the block's return value + def with_notice_processor(callback) + with_min_messages('notice') do + old_processor = connection.raw_connection.set_notice_processor { |result| callback.call(result.to_s.chomp) } + yield + ensure + connection.raw_connection.set_notice_processor(&old_processor) + end + end + + # Temporarily set the connection's +client_min_messages+ to +level+ for the duration + # of the block, restoring the previous value afterward. + # + # @param level [String] one of: debug5, debug4, debug3, debug2, debug1, log, notice, warning, error + # @yield runs with the level applied + # @return [Object] the block's return value + def with_min_messages(level) + old_level = select_value('SHOW client_min_messages') + execute('SET client_min_messages TO ?', level) + yield + ensure + execute('SET client_min_messages TO ?', old_level) unless old_level.nil? + end + + # Quote and escape a value as a SQL literal, safe to inline into a statement. + # + # @param value [Object] the value to quote (e.g. String, Numeric, nil, Time) + # @return [String] the quoted SQL literal (e.g. +"'O''Brien'"+) + def quote_value(value) + connection.quote(value) + end + + private + + # Cast a raw result value back to its Ruby type using the result set's column types. + # + # @param result [ActiveRecord::Result] the result the value came from (carries column types) + # @param column_name [String] the column the value belongs to + # @param raw_value [Object] the raw value as returned by the adapter + # @return [Object] the type-cast value, or +raw_value+ unchanged when the column type is unknown + def deserialize_result(result, column_name, raw_value) + column_type = result.column_types[column_name] + return raw_value if column_type.nil? + + column_type.deserialize(raw_value) + end + end +end diff --git a/lib/pg_sql_caller/version.rb b/lib/pg_sql_caller/version.rb index bcd043e..555f109 100644 --- a/lib/pg_sql_caller/version.rb +++ b/lib/pg_sql_caller/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module PgSqlCaller - VERSION = '0.2.3' + VERSION = '1.0.0' end diff --git a/spec/config/database.github.yml b/spec/config/database.github.yml new file mode 100644 index 0000000..cf9c39d --- /dev/null +++ b/spec/config/database.github.yml @@ -0,0 +1,7 @@ +test: + adapter: postgresql + database: pg_sql_caller_test + host: 127.0.0.1 + port: 5432 + username: postgres + password: postgres diff --git a/spec/config/database.travis.yml b/spec/config/database.travis.yml deleted file mode 100644 index 1efe999..0000000 --- a/spec/config/database.travis.yml +++ /dev/null @@ -1,3 +0,0 @@ -test: - adapter: postgresql - database: pg_sql_caller_test diff --git a/spec/fixtures/active_record.rb b/spec/fixtures/active_record.rb index 259e669..bd4d02d 100644 --- a/spec/fixtures/active_record.rb +++ b/spec/fixtures/active_record.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'yaml' +require 'fileutils' require 'active_support/logger' require 'active_record' @@ -8,7 +9,7 @@ ActiveRecord::Base.establish_connection config['test'] if ENV['CI'] - ActiveRecord::Base.logger = ActiveSupport::Logger.new(STDOUT) + ActiveRecord::Base.logger = ActiveSupport::Logger.new($stdout) else FileUtils.mkdir_p 'tmp' ActiveRecord::Base.logger = ActiveSupport::Logger.new('tmp/test.log') @@ -42,6 +43,7 @@ end class ApplicationRecord < ActiveRecord::Base + self.abstract_class = true end class Department < ApplicationRecord diff --git a/spec/pg_sql_caller/base_spec.rb b/spec/pg_sql_caller/base_spec.rb new file mode 100644 index 0000000..736e9a0 --- /dev/null +++ b/spec/pg_sql_caller/base_spec.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +RSpec.describe PgSqlCaller::Base do + it 'performs select_values correctly' do + dep = Department.create! name: 'Tech' + employees = Employee.create!( + [ + { name: 'John Doe', department_id: dep.id }, + { name: 'Jane Doe', department_id: dep.id } + ] + ) + + dep2 = Department.create! name: 'Sales' + Employee.create! name: 'Jake Doe', department_id: dep2.id + + expect( + described_class.select_values('select name from employees where department_id = ?', dep.id) + ).to match_array(employees.map(&:name)) + end + + it 'performs transaction_open? correctly' do + expect(described_class.transaction_open?).to be(false) + described_class.transaction do + expect(described_class.transaction_open?).to be(true) + end + ApplicationRecord.transaction do + expect(described_class.transaction_open?).to be(true) + end + end +end diff --git a/spec/pg_sql_caller/bulk_update_spec.rb b/spec/pg_sql_caller/bulk_update_spec.rb new file mode 100644 index 0000000..da7fc70 --- /dev/null +++ b/spec/pg_sql_caller/bulk_update_spec.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +RSpec.describe PgSqlCaller::BulkUpdate do + subject { described_class.call(Employee, attrs_list) } + + let!(:dep) { Department.create!(name: 'Tech') } + let!(:other_dep) { Department.create!(name: 'Sales') } + + let!(:first) { Employee.create!(name: 'John', department_id: dep.id) } + let!(:second) { Employee.create!(name: 'Jane', department_id: dep.id) } + # Untouched by every attrs_list below — guards against an over-broad UPDATE. + let!(:bystander) { Employee.create!(name: 'Jake', department_id: dep.id) } + + let(:attrs_list) do + [ + { id: first.id, name: 'John Updated', department_id: other_dep.id }, + { id: second.id, name: 'Jane Updated', department_id: other_dep.id } + ] + end + + it 'returns the number of rows affected' do + expect(subject).to eq(2) + end + + it 'writes each row its own per-column values', :aggregate_failures do + subject + expect(first.reload).to have_attributes(name: 'John Updated', department_id: other_dep.id) + expect(second.reload).to have_attributes(name: 'Jane Updated', department_id: other_dep.id) + end + + it 'touches only the listed rows' do + expect { subject }.not_to(change { bystander.reload.attributes }) + end + + it 'leaves unlisted columns untouched' do + expect { subject }.not_to(change { first.reload.created_at }) + end + + context 'with values that would break naive string interpolation' do + let(:attrs_list) do + [{ id: first.id, name: "boom'); DROP TABLE employees;--\n\"quoted\", {brace}" }] + end + + it 'stores the raw text verbatim' do + subject + expect(first.reload.name).to eq("boom'); DROP TABLE employees;--\n\"quoted\", {brace}") + end + end + + context 'with datetime columns' do + let(:created_at) { Time.now - 3 } + let(:attrs_list) { [{ id: first.id, created_at: created_at }] } + + it 'round-trips the timestamp' do + subject + expect(first.reload.created_at).to be_within(1).of(created_at) + end + end + + context 'with a composite unique_by' do + subject { described_class.call(Employee, attrs_list, unique_by: %i[department_id name]) } + + let(:new_created_at) { Time.now - 100 } + let(:attrs_list) do + [ + { department_id: dep.id, name: 'John', created_at: new_created_at }, + { department_id: dep.id, name: 'Jane', created_at: new_created_at } + ] + end + + it 'matches rows on every key column', :aggregate_failures do + expect(subject).to eq(2) + expect(first.reload.created_at).to be_within(1).of(new_created_at) + expect(second.reload.created_at).to be_within(1).of(new_created_at) + # 'Jake' shares the department but not the name, so the composite key skips it. + expect(bystander.reload.created_at).not_to be_within(1).of(new_created_at) + end + end + + context 'when attrs_list is empty' do + let(:attrs_list) { [] } + + it 'is a no-op returning zero' do + expect { expect(subject).to eq(0) }.not_to(change { first.reload.attributes }) + end + end + + context 'when a row omits the unique_by column' do + let(:attrs_list) { [{ name: 'Nameless' }] } + + it 'raises ArgumentError' do + expect { subject }.to raise_error(ArgumentError, /include unique_by/) + end + end + + context 'when a column does not exist on the model' do + let(:attrs_list) { [{ id: first.id, bogus_column: 1 }] } + + it 'raises ArgumentError before touching the database', :aggregate_failures do + expect { subject }.to raise_error(ArgumentError, /unknown.*bogus_column/) + expect(first.reload.name).to eq('John') + end + end + + context 'when rows carry only the unique_by column' do + let(:attrs_list) { [{ id: first.id }, { id: second.id }] } + + it 'raises ArgumentError rather than building empty SET SQL', :aggregate_failures do + expect { subject }.to raise_error(ArgumentError, /no value columns/) + expect(first.reload.name).to eq('John') + end + end + + context 'when rows do not all share the same keys' do + let(:attrs_list) do + [ + { id: first.id, name: 'John Updated' }, + { id: second.id, department_id: other_dep.id } + ] + end + + it 'raises ArgumentError before touching the database', :aggregate_failures do + expect { subject }.to raise_error(ArgumentError, /differ from first row/) + expect(first.reload.name).to eq('John') + end + end + + # Excluded from the default suite (see filter_run_excluding :benchmark). + # Run with: bundle exec rspec spec/pg_sql_caller/bulk_update_spec.rb --tag benchmark + describe 'performance vs N update_all calls in a transaction', :benchmark do + let(:row_count) { 500 } + + # Cheap, callback-free bulk insert of NEW rows, so setup cost doesn't dwarf + # the thing being measured. + let(:ids) do + bulk_dep = Department.create!(name: 'Bulk') + now = Time.now + rows = Array.new(row_count) do |i| + { department_id: bulk_dep.id, name: "Employee #{i}", created_at: now, updated_at: now } + end + Employee.insert_all(rows) + # Only the rows just inserted — excludes the outer let!s, so attrs_list + # stays exactly row_count and the printed `rows=` count is accurate. + Employee.where(department_id: bulk_dep.id).order(:id).pluck(:id) + end + + let(:attrs_list) do + ids.map { |id| { id: id, name: "Updated #{id}" } } + end + + def best_of_three + Array.new(3) { + started = Process.clock_gettime(Process::CLOCK_MONOTONIC) + yield + Process.clock_gettime(Process::CLOCK_MONOTONIC) - started + }.min + end + + it 'is faster than updating each row in a loop' do + attrs_list # build the payload and seed the rows before timing + + loop_time = best_of_three do + attrs_list.each do |attrs| + Employee.where(id: attrs[:id]).update_all(attrs.except(:id)) + end + end + bulk_time = best_of_three { described_class.call(Employee, attrs_list) } + + loop_ms = (loop_time * 1000).round(1) + bulk_ms = (bulk_time * 1000).round(1) + speedup = (loop_time / bulk_time).round(1) + warn "\n[BulkUpdate benchmark] rows=#{row_count} " \ + "N×update_all=#{loop_ms}ms BulkUpdate=#{bulk_ms}ms speedup=#{speedup}×\n" + expect(bulk_time).to be < loop_time + end + end +end diff --git a/spec/pg_sql_caller/model_spec.rb b/spec/pg_sql_caller/model_spec.rb new file mode 100644 index 0000000..c5d7698 --- /dev/null +++ b/spec/pg_sql_caller/model_spec.rb @@ -0,0 +1,325 @@ +# frozen_string_literal: true + +RSpec.describe PgSqlCaller::Model do + subject(:sql) { described_class.new(ApplicationRecord) } + + let(:dep) { Department.create!(name: 'Tech') } + + def create_employee(name, department: dep) + Employee.create!(name: name, department_id: department.id) + end + + # The class-level helper that generates the connection-backed SQL methods below. + describe '.define_sql_method' do + it 'defines an instance method that runs the SQL through the connection, binding ?s' do + klass = Class.new(described_class) { define_sql_method(:select_value) } + employee = create_employee('John') + + expect(klass.new(ApplicationRecord).select_value('SELECT name FROM employees WHERE id = ?', employee.id)) + .to eq('John') + end + end + + # --------------------------------------------------------------------------- + # Methods generated by define_sql_method + # --------------------------------------------------------------------------- + + describe '#select_value' do + it 'returns the value of the first column of the first row' do + create_employee('John') + create_employee('Jane') + + expect(sql.select_value('SELECT count(*) FROM employees')).to eq(2) + end + + it 'returns nil when no row matches' do + expect(sql.select_value('SELECT name FROM employees WHERE id = ?', -1)).to be_nil + end + + it 'binds ? placeholders, sanitizing the value' do + employee = create_employee("O'Brien") + + expect(sql.select_value('SELECT id FROM employees WHERE name = ?', "O'Brien")).to eq(employee.id) + end + end + + describe '#select_values' do + it 'returns the first column of every row' do + create_employee('John') + create_employee('Jane') + create_employee('Jake', department: Department.create!(name: 'Sales')) + + expect(sql.select_values('SELECT name FROM employees WHERE department_id = ?', dep.id)) + .to match_array(%w[John Jane]) + end + + it 'returns an empty array when no row matches' do + expect(sql.select_values('SELECT name FROM employees WHERE id = ?', -1)).to eq([]) + end + end + + describe '#execute' do + it 'runs the statement and returns the raw PG::Result' do + employee = create_employee('John') + + result = sql.execute('UPDATE employees SET name = ? WHERE id = ?', 'Renamed', employee.id) + + expect(result).to be_a(PG::Result) + expect(result.cmd_tuples).to eq(1) + expect(employee.reload.name).to eq('Renamed') + end + end + + describe '#select_all' do + it 'returns every row as String-keyed hashes in an ActiveRecord::Result' do + employee = create_employee('John') + + result = sql.select_all('SELECT id, name FROM employees') + + expect(result).to be_a(ActiveRecord::Result) + expect(result.to_a).to eq([{ 'id' => employee.id, 'name' => 'John' }]) + end + end + + describe '#select_rows' do + it 'returns rows as arrays of column values' do + employee = create_employee('John') + + expect(sql.select_rows('SELECT id, name FROM employees')).to eq([[employee.id, 'John']]) + end + end + + # --------------------------------------------------------------------------- + # Methods defined explicitly with def (plus the delegated/attr_reader accessors) + # --------------------------------------------------------------------------- + + describe '#model_class' do + it 'returns the wrapped ActiveRecord class' do + expect(sql.model_class).to eq(ApplicationRecord) + end + end + + describe '#connection' do + it 'returns the model class connection' do + expect(sql.connection).to eq(ApplicationRecord.connection) + end + end + + describe '#quote_column_name' do + it 'quotes a column identifier' do + expect(sql.quote_column_name('name')).to eq('"name"') + end + end + + describe '#quote_table_name' do + it 'quotes a table identifier' do + expect(sql.quote_table_name('employees')).to eq('"employees"') + end + end + + describe '#quote_value' do + it 'quotes and escapes a value as a SQL literal' do + expect(sql.quote_value("O'Brien")).to eq("'O''Brien'") + end + end + + describe '#transaction_open?' do + it 'is false outside and true inside a transaction' do + expect(sql.transaction_open?).to be(false) + sql.transaction do + expect(sql.transaction_open?).to be(true) + end + expect(sql.transaction_open?).to be(false) + end + + it 'sees a transaction opened on the model class itself' do + ApplicationRecord.transaction do + expect(sql.transaction_open?).to be(true) + end + end + end + + describe '#transaction' do + it 'runs the block inside a transaction and returns its value' do + result = sql.transaction do + expect(sql.transaction_open?).to be(true) + :block_value + end + + expect(result).to eq(:block_value) + end + + it 'rolls back when the block raises' do + expect { + sql.transaction do + Department.create!(name: 'Rollback') + raise 'boom' + end + }.to raise_error('boom') + + expect(Department.where(name: 'Rollback')).not_to exist + end + + it 'raises ArgumentError when no block is given' do + expect { sql.transaction }.to raise_error(ArgumentError, 'block must be given') + end + end + + describe '#select_all_serialized' do + it 'returns every row as a Symbol-keyed hash' do + employee = create_employee('John') + + expect(sql.select_all_serialized('SELECT id, name FROM employees')) + .to eq([{ id: employee.id, name: 'John' }]) + end + + it 'casts each value back to its Ruby type' do + create_employee('John') + + expect(sql.select_all_serialized('SELECT ARRAY[1,2,3]::int[] AS nums FROM employees')) + .to eq([{ nums: [1, 2, 3] }]) + end + + it 'returns an empty array when no row matches' do + expect(sql.select_all_serialized('SELECT id FROM employees WHERE id = ?', -1)).to eq([]) + end + end + + describe '#select_value_serialized' do + it 'casts the single value back to its Ruby type' do + # The non-serialized read returns the raw PostgreSQL array literal... + expect(sql.select_value('SELECT ARRAY[1,2,3]::int[]')).to eq('{1,2,3}') + # ...while the serialized read casts it to a Ruby Array. + expect(sql.select_value_serialized('SELECT ARRAY[1,2,3]::int[]')).to eq([1, 2, 3]) + end + + it 'returns nil when no row matches' do + expect(sql.select_value_serialized('SELECT id FROM employees WHERE id = ?', -1)).to be_nil + end + end + + describe '#select_values_serialized' do + it 'returns each row as an array of type-cast values' do + employee = create_employee('John') + + expect(sql.select_values_serialized('SELECT id, ARRAY[1,2]::int[] FROM employees')) + .to eq([[employee.id, [1, 2]]]) + end + end + + describe '#next_sequence_value' do + it 'returns the sequence last_value + 1' do + employee = create_employee('John') + + expect(sql.next_sequence_value('employees')).to eq(employee.id + 1) + end + end + + describe '#table_full_size' do + it 'returns the total on-disk size in bytes' do + expect(sql.table_full_size('employees')).to be_a(Integer).and(be > 0) + end + end + + describe '#table_data_size' do + it 'returns the main fork size in bytes, not exceeding the full size' do + data_size = sql.table_data_size('employees') + + expect(data_size).to be_a(Integer).and(be >= 0) + expect(data_size).to be <= sql.table_full_size('employees') + end + end + + describe '#select_row' do + it 'returns the first row as an array of column values' do + employee = create_employee('John') + create_employee('Jane') + + expect(sql.select_row('SELECT id, name FROM employees ORDER BY id')).to eq([employee.id, 'John']) + end + + it 'returns nil when no row matches' do + expect(sql.select_row('SELECT id FROM employees WHERE id = ?', -1)).to be_nil + end + end + + describe '#explain_analyze' do + it 'returns the query plan text under a QUERY_PLAN header' do + create_employee('John') + + plan = sql.explain_analyze('SELECT * FROM employees') + + expect(plan).to start_with("QUERY_PLAN\n") + expect(plan).to include('actual time') + end + end + + describe '#typecast_array' do + it 'encodes a Ruby array into a PostgreSQL array literal' do + expect(sql.typecast_array([1, 2, 3], type: :integer)).to eq('{1,2,3}') + end + + it 'quotes elements that need it' do + expect(sql.typecast_array(['a', 'b,c'], type: :string)).to eq('{a,"b,c"}') + end + end + + describe '#sanitize_sql_array' do + it 'interpolates ? placeholders, escaping the bound values' do + expect(sql.sanitize_sql_array('name = ? AND id = ?', "O'Brien", 5)) + .to eq("name = 'O''Brien' AND id = 5") + end + end + + describe '#current_database' do + it 'returns the name of the connected database' do + expect(sql.current_database).to eq('pg_sql_caller_test') + end + end + + describe '#with_min_messages' do + it 'applies the level inside the block, returns its value and restores the level' do + original = sql.select_value('SHOW client_min_messages') + inside = nil + + result = sql.with_min_messages('debug1') do + inside = sql.select_value('SHOW client_min_messages') + :block_value + end + + expect(inside).to eq('debug1') + expect(result).to eq(:block_value) + expect(sql.select_value('SHOW client_min_messages')).to eq(original) + end + + it 'restores the level even when the block raises' do + original = sql.select_value('SHOW client_min_messages') + + expect { sql.with_min_messages('debug1') { raise 'boom' } }.to raise_error('boom') + + expect(sql.select_value('SHOW client_min_messages')).to eq(original) + end + end + + describe '#with_notice_processor' do + it 'captures NOTICE output emitted during the block and returns the block value' do + messages = [] + + result = sql.with_notice_processor(->(msg) { messages << msg }) do + sql.execute("DO $$ BEGIN RAISE NOTICE 'hello from pg'; END $$") + :block_value + end + + expect(messages).to include(a_string_including('hello from pg')) + expect(result).to eq(:block_value) + end + + it 'restores the previous notice level afterward' do + original = sql.select_value('SHOW client_min_messages') + + sql.with_notice_processor(->(_msg) {}) { nil } + + expect(sql.select_value('SHOW client_min_messages')).to eq(original) + end + end +end diff --git a/spec/pg_sql_caller_base_spec.rb b/spec/pg_sql_caller_base_spec.rb deleted file mode 100644 index 3a97558..0000000 --- a/spec/pg_sql_caller_base_spec.rb +++ /dev/null @@ -1,30 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe PgSqlCaller::Base do - it 'performs select_values correctly' do - dep = Department.create! name: 'Tech' - employees = Employee.create! [ - { name: 'John Doe', department_id: dep.id }, - { name: 'Jane Doe', department_id: dep.id } - ] - - dep2 = Department.create! name: 'Sales' - Employee.create! name: 'Jake Doe', department_id: dep2.id - - expect( - PgSqlCaller::Base.select_values('select name from employees where department_id = ?', dep.id) - ).to match_array( - employees.map(&:name) - ) - end - - it 'performs transaction_open? correctly' do - expect(PgSqlCaller::Base.transaction_open?).to eq(false) - PgSqlCaller::Base.transaction do - expect(PgSqlCaller::Base.transaction_open?).to eq(true) - end - ApplicationRecord.transaction do - expect(PgSqlCaller::Base.transaction_open?).to eq(true) - end - end -end diff --git a/spec/pg_sql_caller_spec.rb b/spec/pg_sql_caller_spec.rb index 31c8b65..a6291f0 100644 --- a/spec/pg_sql_caller_spec.rb +++ b/spec/pg_sql_caller_spec.rb @@ -2,6 +2,6 @@ RSpec.describe PgSqlCaller do it 'has a version number' do - expect(PgSqlCaller::VERSION).not_to be nil + expect(PgSqlCaller::VERSION).not_to be_nil end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index e9b0cb2..224f5e2 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -15,16 +15,19 @@ # Disable RSpec exposing methods globally on `Module` and `main` config.disable_monkey_patching! + # Opt-in only: run with `--tag benchmark` (see bulk_update_spec.rb). + config.filter_run_excluding :benchmark + config.expect_with :rspec do |c| c.syntax = :expect end - config.before(:each) do + config.before do DatabaseCleaner.strategy = :truncation DatabaseCleaner.start end - config.after(:each) do + config.after do DatabaseCleaner.clean end end diff --git a/sql_caller.gemspec b/sql_caller.gemspec index cd3600d..2bf8522 100644 --- a/sql_caller.gemspec +++ b/sql_caller.gemspec @@ -9,24 +9,36 @@ Gem::Specification.new do |spec| spec.email = ['senid231@gmail.com'] spec.summary = 'Postgresql Sql Caller for ActiveRecord' - spec.description = 'Postgresql Sql Caller for ActiveRecord.' + spec.description = 'PgSqlCaller is a small, focused wrapper for running raw SQL against ' \ + 'PostgreSQL through ActiveRecord. It exposes a stable, documented API on an ' \ + 'ActiveRecord-backed class you name, covering the queries the query builder ' \ + 'makes awkward: single-scalar and single-column SELECTs, raw rows, ' \ + 'ActiveRecord::Result reads, and type-cast (serialized) variants that decode ' \ + 'PostgreSQL arrays and custom column types into Ruby objects. Every ? ' \ + 'placeholder is bound and escaped through the ActiveRecord sanitizer, so ' \ + 'statements stay injection-safe with no manual quoting. On top of that it adds ' \ + 'PostgreSQL-specific helpers — non-consuming sequence peeking, table and ' \ + 'relation sizes, EXPLAIN ANALYZE, NOTICE capture, and quoting/sanitizing ' \ + 'utilities — plus a fast, injection-safe bulk update that partially updates ' \ + 'many existing rows in a single UPDATE ... FROM unnest(...) statement and ' \ + 'round-trip. The reader API is extensible via define_sql_method, and the gem ' \ + 'runs on Ruby 3.2+ with Rails 7.1 through 8.1.' spec.homepage = 'https://github.com/didww/pg_sql_caller' spec.license = 'MIT' - spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0') + spec.required_ruby_version = Gem::Requirement.new('>= 3.2.0') spec.metadata['homepage_uri'] = spec.homepage spec.metadata['source_code_uri'] = spec.homepage spec.metadata['changelog_uri'] = spec.homepage - # Specify which files should be added to the gem when it is released. - # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + # Ship only the runtime library code plus the user-facing docs/license. + # Everything else (specs, CI config, dev tooling, binstubs) stays out of the gem. spec.files = Dir.chdir(File.expand_path(__dir__)) do - `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } + Dir['lib/**/*'].select { |f| File.file?(f) } + %w[CHANGELOG.md LICENSE.txt README.md] end - spec.bindir = 'exe' - spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.extra_rdoc_files = %w[README.md CHANGELOG.md] spec.require_paths = ['lib'] - spec.add_dependency 'activerecord' - spec.add_dependency 'activesupport' + spec.add_dependency 'activerecord', '>= 7.1' + spec.add_dependency 'activesupport', '>= 7.1' end