Skip to content

Commit 02ea183

Browse files
committed
Merge remote-tracking branch 'upstream/main' into add_view_entry
2 parents 27ca896 + 375710f commit 02ea183

File tree

6 files changed

+69
-170
lines changed

6 files changed

+69
-170
lines changed

README.md

+3-167
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ To make a Postgres database accessible to DuckDB use the `ATTACH` command:
88

99
```sql
1010
ATTACH 'dbname=postgresscanner' AS postgres_db (TYPE postgres);
11-
USE postgres_db;
1211
```
1312

1413
The `ATTACH` command takes as input a [`libpq` connection string](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING) - which is a set of `key=value` pairs separated by spaces. Below are some example connection strings and commonly used parameters. A full list of available parameters can be found [in the Postgres documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS).
@@ -32,14 +31,14 @@ host=localhost port=5432 dbname=mydb connect_timeout=10
3231
The tables in the file can be read as if they were normal DuckDB tables, but the underlying data is read directly from Postgres at query time.
3332

3433
```sql
35-
D SHOW TABLES;
34+
D SHOW ALL TABLES;
3635
┌───────────────────────────────────────┐
3736
│ name │
3837
varchar
3938
├───────────────────────────────────────┤
4039
│ uuids │
4140
└───────────────────────────────────────┘
42-
D SELECT * FROM uuids;
41+
D SELECT * FROM postgres_db.uuids;
4342
┌──────────────────────────────────────┐
4443
│ u │
4544
│ uuid │
@@ -51,170 +50,7 @@ D SELECT * FROM uuids;
5150
└──────────────────────────────────────┘
5251
```
5352

54-
It might be desirable to create a copy of the Postgres databases in DuckDB to prevent the system from re-reading the tables from Postgres continuously, particularly for large tables.
55-
56-
Data can be copied over from Postgres to DuckDB using standard SQL, for example:
57-
58-
```sql
59-
CREATE TABLE duckdb_table AS FROM postgres_db.postgres_tbl;
60-
```
61-
62-
## Writing Data to Postgres
63-
64-
In addition to reading data from Postgres, the extension allows you to create tables, ingest data into Postgres and make other modifications to a Postgres database using standard SQL queries.
65-
66-
This allows you to use DuckDB to, for example, export data that is stored in a Postgres database to Parquet, or read data from a Parquet file into Postgres.
67-
68-
Below is a brief example of how to create a new table in Postgres and load data into it.
69-
70-
```sql
71-
ATTACH 'dbname=postgresscanner' AS postgres_db (TYPE postgres);
72-
CREATE TABLE postgres_db.tbl(id INTEGER, name VARCHAR);
73-
INSERT INTO postgres_db.tbl VALUES (42, 'DuckDB');
74-
```
75-
Many operations on Postgres tables are supported. All these operations directly modify the Postgres database, and the result of subsequent operations can then be read using Postgres.
76-
Note that if modifications are not desired, `ATTACH` can be run with the `READ_ONLY` property which prevents making modifications to the underlying database. For example:
77-
78-
```sql
79-
ATTACH 'dbname=postgresscanner' AS postgres_db (TYPE postgres, READ_ONLY);
80-
```
81-
82-
Below is a list of supported operations.
83-
84-
###### CREATE TABLE
85-
```sql
86-
CREATE TABLE postgres_db.tbl(id INTEGER, name VARCHAR);
87-
```
88-
89-
###### INSERT INTO
90-
```sql
91-
INSERT INTO postgres_db.tbl VALUES (42, 'DuckDB');
92-
```
93-
94-
###### SELECT
95-
```sql
96-
SELECT * FROM postgres_db.tbl;
97-
┌───────┬─────────┐
98-
│ id │ name │
99-
│ int64 │ varchar
100-
├───────┼─────────┤
101-
42 │ DuckDB │
102-
└───────┴─────────┘
103-
```
104-
105-
###### COPY
106-
```sql
107-
COPY postgres_db.tbl TO 'data.parquet';
108-
COPY postgres_db.tbl FROM 'data.parquet';
109-
```
110-
111-
###### UPDATE
112-
```sql
113-
UPDATE postgres_db.tbl SET name='Woohoo' WHERE id=42;
114-
```
115-
116-
###### DELETE
117-
```sql
118-
DELETE FROM postgres_db.tbl WHERE id=42;
119-
```
120-
121-
###### ALTER TABLE
122-
```sql
123-
ALTER TABLE postgres_db.tbl ADD COLUMN k INTEGER;
124-
```
125-
126-
###### DROP TABLE
127-
```sql
128-
DROP TABLE postgres_db.tbl;
129-
```
130-
131-
###### CREATE VIEW
132-
```sql
133-
CREATE VIEW postgres_db.v1 AS SELECT 42;
134-
```
135-
136-
###### CREATE SCHEMA/DROP SCHEMA
137-
```sql
138-
CREATE SCHEMA postgres_db.s1;
139-
CREATE TABLE postgres_db.s1.integers(i int);
140-
INSERT INTO postgres_db.s1.integers VALUES (42);
141-
SELECT * FROM postgres_db.s1.integers;
142-
┌───────┐
143-
│ i │
144-
│ int32 │
145-
├───────┤
146-
42
147-
└───────┘
148-
DROP SCHEMA postgres_db.s1;
149-
```
150-
151-
###### Transactions
152-
```sql
153-
CREATE TABLE postgres_db.tmp(i INTEGER);
154-
BEGIN;
155-
INSERT INTO postgres_db.tmp VALUES (42);
156-
SELECT * FROM postgres_db.tmp;
157-
┌───────┐
158-
│ i │
159-
│ int64 │
160-
├───────┤
161-
42
162-
└───────┘
163-
ROLLBACK;
164-
SELECT * FROM postgres_db.tmp;
165-
┌────────┐
166-
│ i │
167-
│ int64 │
168-
├────────┤
169-
0 rows │
170-
└────────┘
171-
```
172-
173-
## Running SQL Queries in Postgres with postgres_query
174-
175-
The postgres_query function allows you to run arbitrary SQL within an attached database. `postgres_query` takes the name of the attached Postgres database to execute the query in, as well as the SQL query to execute. The result of the query is returned.
176-
177-
```sql
178-
postgres_query(attached_database::VARCHAR, query::VARCHAR)
179-
```
180-
181-
###### Example
182-
183-
```sql
184-
ATTACH 'dbname=postgresscanner' AS s (TYPE POSTGRES);
185-
SELECT * FROM postgres_query('s', 'SELECT * FROM cars LIMIT 3');
186-
┌──────────────┬───────────┬─────────┐
187-
│ brand │ model │ color │
188-
varcharvarcharvarchar
189-
├──────────────┼───────────┼─────────┤
190-
│ ferari │ testarosa │ red │
191-
│ aston martin │ db2 │ blue │
192-
│ bentley │ mulsanne │ gray │
193-
└──────────────┴───────────┴─────────┘
194-
```
195-
196-
## Settings
197-
198-
The extension exposes the following configuration parameters.
199-
200-
| name | description | default |
201-
|---------------------------------|----------------------------------------------------------------------------|---------|
202-
| pg_debug_show_queries | DEBUG SETTING: print all queries sent to Postgres to stdout | false |
203-
| pg_experimental_filter_pushdown | Whether or not to use filter pushdown (currently experimental) | false |
204-
| pg_array_as_varchar | Read Postgres arrays as varchar - enables reading mixed dimensional arrays | false |
205-
| pg_connection_cache | Whether or not to use the connection cache | true |
206-
| pg_connection_limit | The maximum amount of concurrent Postgres connections | 64 |
207-
| pg_pages_per_task | The amount of pages per task | 1000 |
208-
| pg_use_binary_copy | Whether or not to use BINARY copy to read data | true |
209-
210-
211-
## Schema Cache
212-
213-
To avoid having to continuously fetch schema data from Postgres, DuckDB keeps schema information - such as the names of tables, their columns, etc - cached. If changes are made to the schema through a different connection to the Postgres instance, such as new columns being added to a table, the cached schema information might be outdated. In this case, the function `pg_clear_cache` can be executed to clear the internal caches.
214-
215-
```sql
216-
CALL pg_clear_cache();
217-
```
53+
For more information on how to use the connector, refer to the [Postgres documentation on the website](https://duckdb.org/docs/extensions/postgres).
21854

21955
## Building & Loading the Extension
22056

src/include/postgres_scanner.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class PostgresClearCacheFunction : public TableFunction {
8383
PostgresClearCacheFunction();
8484

8585
static void ClearCacheOnSetting(ClientContext &context, SetScope scope, Value &parameter);
86+
static void ClearPostgresCaches(ClientContext &context);
8687
};
8788

8889
class PostgresQueryFunction : public TableFunction {

src/include/storage/postgres_catalog.hpp

+14
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ class PostgresCatalog : public Catalog {
7272

7373
void ClearCache();
7474

75+
//! Whether or not this catalog should search a specific type with the standard priority
76+
CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override {
77+
switch (type) {
78+
case CatalogType::INDEX_ENTRY:
79+
case CatalogType::TABLE_ENTRY:
80+
case CatalogType::TYPE_ENTRY:
81+
case CatalogType::VIEW_ENTRY:
82+
return CatalogLookupBehavior::STANDARD;
83+
default:
84+
// unsupported type (e.g. scalar functions, aggregates, ...)
85+
return CatalogLookupBehavior::NEVER_LOOKUP;
86+
}
87+
}
88+
7589
private:
7690
void DropSchema(ClientContext &context, DropInfo &info) override;
7791

src/postgres_extension.cpp

+40
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,44 @@
1212
#include "duckdb/main/attached_database.hpp"
1313
#include "storage/postgres_catalog.hpp"
1414
#include "storage/postgres_optimizer.hpp"
15+
#include "duckdb/planner/extension_callback.hpp"
16+
#include "duckdb/main/client_context.hpp"
17+
#include "duckdb/main/client_context_state.hpp"
18+
#include "duckdb/main/connection_manager.hpp"
19+
#include "duckdb/common/error_data.hpp"
1520

1621
using namespace duckdb;
1722

23+
class PostgresExtensionState : public ClientContextState {
24+
public:
25+
bool CanRequestRebind() override {
26+
return true;
27+
}
28+
RebindQueryInfo OnPlanningError(ClientContext &context, SQLStatement &statement, ErrorData &error) override {
29+
if (error.Type() != ExceptionType::BINDER) {
30+
return RebindQueryInfo::DO_NOT_REBIND;
31+
}
32+
auto &extra_info = error.ExtraInfo();
33+
auto entry = extra_info.find("error_subtype");
34+
if (entry == extra_info.end()) {
35+
return RebindQueryInfo::DO_NOT_REBIND;
36+
}
37+
if (entry->second != "COLUMN_NOT_FOUND") {
38+
return RebindQueryInfo::DO_NOT_REBIND;
39+
}
40+
// clear caches and rebind
41+
PostgresClearCacheFunction::ClearPostgresCaches(context);
42+
return RebindQueryInfo::ATTEMPT_TO_REBIND;
43+
}
44+
};
45+
46+
class PostgresExtensionCallback : public ExtensionCallback {
47+
public:
48+
void OnConnectionOpened(ClientContext &context) override {
49+
context.registered_state.insert(make_pair("postgres_extension", make_shared<PostgresExtensionState>()));
50+
}
51+
};
52+
1853
static void SetPostgresConnectionLimit(ClientContext &context, SetScope scope, Value &parameter) {
1954
if (scope == SetScope::LOCAL) {
2055
throw InvalidInputException("pg_connection_limit can only be set globally");
@@ -76,6 +111,11 @@ static void LoadInternal(DatabaseInstance &db) {
76111
OptimizerExtension postgres_optimizer;
77112
postgres_optimizer.optimize_function = PostgresOptimizer::Optimize;
78113
config.optimizer_extensions.push_back(std::move(postgres_optimizer));
114+
115+
config.extension_callbacks.push_back(make_uniq<PostgresExtensionCallback>());
116+
for(auto &connection : ConnectionManager::Get(db).GetConnectionList()) {
117+
connection->registered_state.insert(make_pair("postgres_extension", make_shared<PostgresExtensionState>()));
118+
}
79119
}
80120

81121
void PostgresScannerExtension::Load(DuckDB &db) {

src/storage/postgres_clear_cache.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static unique_ptr<FunctionData> ClearCacheBind(ClientContext &context, TableFunc
2121
return std::move(result);
2222
}
2323

24-
static void ClearPostgresCaches(ClientContext &context) {
24+
void PostgresClearCacheFunction::ClearPostgresCaches(ClientContext &context) {
2525
auto databases = DatabaseManager::Get(context).GetDatabases(context);
2626
for (auto &db_ref : databases) {
2727
auto &db = db_ref.get();
@@ -38,12 +38,12 @@ static void ClearCacheFunction(ClientContext &context, TableFunctionInput &data_
3838
if (data.finished) {
3939
return;
4040
}
41-
ClearPostgresCaches(context);
41+
PostgresClearCacheFunction::ClearPostgresCaches(context);
4242
data.finished = true;
4343
}
4444

4545
void PostgresClearCacheFunction::ClearCacheOnSetting(ClientContext &context, SetScope scope, Value &parameter) {
46-
ClearPostgresCaches(context);
46+
PostgresClearCacheFunction::ClearPostgresCaches(context);
4747
}
4848

4949
PostgresClearCacheFunction::PostgresClearCacheFunction()

test/sql/misc/autoload.test

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# name: test/sql/misc/autoloading.test
2+
# description: Test autoloading based on setting
3+
# group: [misc]
4+
5+
require postgres_scanner
6+
7+
statement ok
8+
SET pg_connection_cache=false

0 commit comments

Comments
 (0)