diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..530192a3b20 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git +.vscode +.circleci +tmp_install +compute_build diff --git a/configure b/configure index cbc193dabb6..6049bff6198 100755 --- a/configure +++ b/configure @@ -712,6 +712,7 @@ with_libxml with_uuid with_readline with_systemd +with_libseccomp with_selinux with_ldap with_krb_srvnam @@ -858,6 +859,7 @@ with_bsd_auth with_ldap with_bonjour with_selinux +with_libseccomp with_systemd with_readline with_libedit_preferred @@ -1564,6 +1566,7 @@ Optional Packages: --with-ldap build with LDAP support --with-bonjour build with Bonjour support --with-selinux build with SELinux support + --with-libseccomp build with libseccomp support --with-systemd build with systemd support --without-readline do not use GNU Readline nor BSD Libedit for editing --with-libedit-preferred @@ -8604,6 +8607,39 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_selinux" >&5 $as_echo "$with_selinux" >&6; } +# +# libseccomp +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with libseccomp support" >&5 +$as_echo_n "checking whether to build with libseccomp support... " >&6; } + + + +# Check whether --with-libseccomp was given. +if test "${with_libseccomp+set}" = set; then : + withval=$with_libseccomp; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-libseccomp option" "$LINENO" 5 + ;; + esac + +else + with_libseccomp=no + +fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_libseccomp" >&5 +$as_echo "$with_libseccomp" >&6; } + # # Systemd # @@ -14244,6 +14280,56 @@ else fi +fi + +if test "$with_libseccomp" = yes ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for seccomp_init in -lseccomp" >&5 +$as_echo_n "checking for seccomp_init in -lseccomp... " >&6; } +if ${ac_cv_lib_seccomp_seccomp_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lseccomp $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char seccomp_init (); +int +main () +{ +return seccomp_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_seccomp_seccomp_init=yes +else + ac_cv_lib_seccomp_seccomp_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_seccomp_seccomp_init" >&5 +$as_echo "$ac_cv_lib_seccomp_seccomp_init" >&6; } +if test "x$ac_cv_lib_seccomp_seccomp_init" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBSECCOMP 1 +_ACEOF + + LIBS="-lseccomp $LIBS" + +else + as_fn_error $? "library 'libseccomp' is required for Seccomp BPF support" "$LINENO" 5 +fi + fi # for contrib/uuid-ossp diff --git a/configure.ac b/configure.ac index 62a10ee510b..48e41bcee72 100644 --- a/configure.ac +++ b/configure.ac @@ -914,6 +914,14 @@ PGAC_ARG_BOOL(with, selinux, no, [build with SELinux support]) AC_SUBST(with_selinux) AC_MSG_RESULT([$with_selinux]) +# +# libseccomp +# +AC_MSG_CHECKING([whether to build with libseccomp support]) +PGAC_ARG_BOOL(with, libseccomp, no, [build with libseccomp support]) +AC_SUBST(with_libseccomp) +AC_MSG_RESULT([$with_libseccomp]) + # # Systemd # @@ -1564,6 +1572,11 @@ dnl If you want to use Apple's own Bonjour code on another platform, dnl just add -ldns_sd to LIBS manually. fi +if test "$with_libseccomp" = yes ; then + AC_CHECK_LIB(seccomp, seccomp_init, [], + [AC_MSG_ERROR([library 'libseccomp' is required for Seccomp BPF support])]) +fi + # for contrib/uuid-ossp if test "$with_uuid" = bsd ; then AC_CHECK_HEADERS(uuid.h, diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index b3f73ea92d6..dc0cb8d478e 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -53,6 +53,7 @@ #include "utils/rel.h" #include "utils/relfilenodemap.h" #include "utils/resowner.h" +#include "utils/spccache.h" #define AUTOPREWARM_FILE "autoprewarm.blocks" @@ -433,10 +434,12 @@ void autoprewarm_database_main(Datum main_arg) { int pos; + int io_concurrency; BlockInfoRecord *block_info; Relation rel = NULL; BlockNumber nblocks = 0; BlockInfoRecord *old_blk = NULL; + BlockInfoRecord *prefetch_blk = NULL; dsm_segment *seg; /* Establish signal handlers; once that's done, unblock signals. */ @@ -502,6 +505,8 @@ autoprewarm_database_main(Datum main_arg) if (!rel) CommitTransactionCommand(); + else + io_concurrency = get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace); } if (!rel) { @@ -536,6 +541,35 @@ autoprewarm_database_main(Datum main_arg) continue; } + /* if prefetching is enabled for this relation */ + if (io_concurrency > 0) + { + /* make prefetch_blk catch up */ + if (blk > prefetch_blk) + { + prefetch_blk = blk; + } + + /* now, prefetch all following blocks */ + while (prefetch_blk <= &block_info[apw_state->prewarm_stop_idx]) + { + /* unless they're of a different relfilenode */ + if (prefetch_blk->filenode != blk->filenode || + prefetch_blk->forknum != blk->forknum || + prefetch_blk->blocknum >= nblocks) + break; + + /* or unless they are more than io_concurrency blocks ahead */ + if (blk + io_concurrency <= prefetch_blk) + break; + + PrefetchBuffer(rel, prefetch_blk->forknum, prefetch_blk->blocknum); + + /* continue with the next block */ + prefetch_blk++; + } + } + /* Prewarm buffer. */ buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL, NULL); diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index 48d0132a0d0..d530f40b64b 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -18,12 +18,14 @@ #include "access/relation.h" #include "fmgr.h" #include "miscadmin.h" +#include "optimizer/cost.h" #include "storage/bufmgr.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/spccache.h" PG_MODULE_MAGIC; @@ -184,14 +186,26 @@ pg_prewarm(PG_FUNCTION_ARGS) } else if (ptype == PREWARM_BUFFER) { + BlockNumber prefetch_block = first_block; + Oid nspOid; + int io_concurrency; + + nspOid = rel->rd_rel->reltablespace; + io_concurrency = get_tablespace_maintenance_io_concurrency(nspOid); + /* * In buffer mode, we actually pull the data into shared_buffers. */ for (block = first_block; block <= last_block; ++block) { - Buffer buf; - + Buffer buf; + BlockNumber prefetch_stop = block + Min(last_block - block + 1, + io_concurrency); CHECK_FOR_INTERRUPTS(); + while (prefetch_block < prefetch_stop) + { + PrefetchBuffer(rel, forkNumber, prefetch_block++); + } buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_done; diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control index 40e3add4810..d40d1a000b7 100644 --- a/contrib/pg_prewarm/pg_prewarm.control +++ b/contrib/pg_prewarm/pg_prewarm.control @@ -3,3 +3,4 @@ comment = 'prewarm relation data' default_version = '1.2' module_pathname = '$libdir/pg_prewarm' relocatable = true +trusted = true diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 3b65f716cd2..3a2ae66989e 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -186,6 +186,7 @@ with_tcl = @with_tcl@ with_ssl = @with_ssl@ with_readline = @with_readline@ with_selinux = @with_selinux@ +with_libseccomp = @with_libseccomp@ with_systemd = @with_systemd@ with_gssapi = @with_gssapi@ with_krb_srvnam = @with_krb_srvnam@ diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index 3519038b709..3e58a1aa79f 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -69,7 +69,8 @@ brin_xlog_insert_update(XLogReaderState *record, } /* need this page's blkno to store in revmap */ - regpgno = BufferGetBlockNumber(buffer); + //ZENITH XXX Don't use BufferGetBlockNumber because wal-redo doesn't pin buffer. + XLogRecGetBlockTag(record, 0, NULL, NULL, ®pgno); /* insert the index item into the page */ if (action == BLK_NEEDS_REDO) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 0e8672c9e90..77af193cfbb 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -335,6 +335,8 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); + smgr_start_unlogged_build(index->rd_smgr); + initGinState(&buildstate.ginstate, index); buildstate.indtuples = 0; memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); @@ -408,6 +410,8 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index); ginUpdateStats(index, &buildstate.buildStats, true); + smgr_finish_unlogged_build_phase_1(index->rd_smgr); + /* * We didn't write WAL records as we built the index, so if WAL-logging is * required, write all pages to the WAL now. @@ -417,8 +421,12 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) log_newpage_range(index, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index), true); + SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } + smgr_end_unlogged_build(index->rd_smgr); + /* * Return statistics */ diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 09ce4d6a5ba..261ab868660 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -407,6 +407,7 @@ ginRedoSplit(XLogReaderState *record) rootbuf; bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0; bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0; + XLogRedoAction action; /* * First clear incomplete-split flag on child page if this finishes a @@ -415,21 +416,27 @@ ginRedoSplit(XLogReaderState *record) if (!isLeaf) ginRedoClearIncompleteSplit(record, 3); - if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED) + action = XLogReadBufferForRedo(record, 0, &lbuffer); + if (action != BLK_RESTORED && action != BLK_DONE) elog(ERROR, "GIN split record did not contain a full-page image of left page"); - if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED) + action = XLogReadBufferForRedo(record, 1, &rbuffer); + if (action != BLK_RESTORED && action != BLK_DONE) elog(ERROR, "GIN split record did not contain a full-page image of right page"); if (isRoot) { - if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED) + action = XLogReadBufferForRedo(record, 2, &rootbuf); + if (action != BLK_RESTORED && action != BLK_DONE) elog(ERROR, "GIN split record did not contain a full-page image of root page"); - UnlockReleaseBuffer(rootbuf); + if (rootbuf != InvalidBuffer) + UnlockReleaseBuffer(rootbuf); } - UnlockReleaseBuffer(rbuffer); - UnlockReleaseBuffer(lbuffer); + if (rbuffer != InvalidBuffer) + UnlockReleaseBuffer(rbuffer); + if (lbuffer != InvalidBuffer) + UnlockReleaseBuffer(lbuffer); } /* diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index ec28bfe89f0..cef605175ff 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -40,6 +40,7 @@ #include "access/tableam.h" #include "access/xloginsert.h" #include "catalog/index.h" +#include "catalog/storage.h" #include "miscadmin.h" #include "optimizer/optimizer.h" #include "storage/bufmgr.h" @@ -289,6 +290,8 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) Buffer buffer; Page page; + smgr_start_unlogged_build(index->rd_smgr); + /* initialize the root page */ buffer = gistNewBuffer(index); Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); @@ -321,6 +324,8 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) gistFreeBuildBuffers(buildstate.gfbb); } + smgr_finish_unlogged_build_phase_1(index->rd_smgr); + /* * We didn't write WAL records as we built the index, so if * WAL-logging is required, write all pages to the WAL now. @@ -330,7 +335,12 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) log_newpage_range(index, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index), true); + SetLastWrittenLSNForBlockRange(XactLastRecEnd, + index->rd_smgr->smgr_rnode.node, + MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } + smgr_end_unlogged_build(index->rd_smgr); } /* okay, all heap tuples are indexed */ @@ -456,8 +466,15 @@ gist_indexsortbuild(GISTBuildState *state) smgrwrite(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO, pagestate->page, true); if (RelationNeedsWAL(state->indexrel)) - log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO, + { + XLogRecPtr lsn; + + lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO, pagestate->page, true); + SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node, + MAIN_FORKNUM, GIST_ROOT_BLKNO); + SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); + } pfree(pagestate->page); pfree(pagestate); diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 0663193531a..5e4350fd33e 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -23,6 +23,7 @@ #include "storage/indexfsm.h" #include "storage/lmgr.h" #include "utils/memutils.h" +#include "utils/spccache.h" /* Working state needed by gistbulkdelete */ typedef struct @@ -130,8 +131,12 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, BlockNumber num_pages; bool needLock; BlockNumber blkno; + BlockNumber prefetch_blkno; + int io_concurrency; MemoryContext oldctx; + io_concurrency = get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace); + /* * Reset fields that track information about the entire index now. This * avoids double-counting in the case where a single VACUUM command @@ -203,6 +208,7 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, needLock = !RELATION_IS_LOCAL(rel); blkno = GIST_ROOT_BLKNO; + prefetch_blkno = blkno; for (;;) { /* Get the current relation length */ @@ -215,9 +221,21 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) break; + + if (prefetch_blkno < blkno) + prefetch_blkno = blkno; + for (; prefetch_blkno < num_pages && + prefetch_blkno < blkno + io_concurrency; prefetch_blkno++) + PrefetchBuffer(rel, MAIN_FORKNUM, prefetch_blkno); + /* Iterate over pages, then loop back to recheck length */ for (; blkno < num_pages; blkno++) + { + if (io_concurrency > 0 && prefetch_blkno < num_pages) + PrefetchBuffer(rel, MAIN_FORKNUM, prefetch_blkno++); + gistvacuumpage(&vstate, blkno, blkno); + } } /* diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 0752fb38a92..b672aa5021c 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -31,6 +31,7 @@ #include "utils/builtins.h" #include "utils/index_selfuncs.h" #include "utils/rel.h" +#include "utils/spccache.h" /* Working state for hashbuild and its callback */ typedef struct @@ -465,13 +466,17 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Bucket orig_maxbucket; Bucket cur_maxbucket; Bucket cur_bucket; + Bucket prf_bucket; Buffer metabuf = InvalidBuffer; HashMetaPage metap; HashMetaPage cachedmetap; + int io_concurrency; tuples_removed = 0; num_index_tuples = 0; + io_concurrency = get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace); + /* * We need a copy of the metapage so that we can use its hashm_spares[] * values to compute bucket page addresses, but a cached copy should be @@ -486,9 +491,14 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* Scan the buckets that we know exist */ cur_bucket = 0; + prf_bucket = cur_bucket; cur_maxbucket = orig_maxbucket; loop_top: + for (; prf_bucket <= cur_maxbucket && + prf_bucket < cur_bucket + io_concurrency; prf_bucket++) + PrefetchBuffer(rel, MAIN_FORKNUM, BUCKET_TO_BLKNO(cachedmetap, prf_bucket)); + while (cur_bucket <= cur_maxbucket) { BlockNumber bucket_blkno; @@ -499,6 +509,12 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Page page; bool split_cleanup = false; + if (io_concurrency > 0 && prf_bucket <= cur_maxbucket) + { + PrefetchBuffer(rel, MAIN_FORKNUM, BUCKET_TO_BLKNO(cachedmetap, prf_bucket)); + prf_bucket++; + } + /* Get address of bucket's start page */ bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index fcb7e338982..fa312418875 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -53,6 +53,7 @@ #include "access/xlogutils.h" #include "catalog/catalog.h" #include "miscadmin.h" +#include "optimizer/cost.h" #include "pgstat.h" #include "port/atomics.h" #include "port/pg_bitutils.h" @@ -316,6 +317,27 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) scan->rs_startblock = 0; } + if (enable_seqscan_prefetch) + { + /* + * Do not use tablespace setting for catalog scans, as we might have + * the tablespace settings in the catalogs locked already, which + * might result in a deadlock. + */ + if (IsCatalogRelation(scan->rs_base.rs_rd)) + scan->rs_prefetch_maximum = effective_io_concurrency; + else + scan->rs_prefetch_maximum = + get_tablespace_io_concurrency(scan->rs_base.rs_rd->rd_rel->reltablespace); + + scan->rs_prefetch_target = 1; + } + else + { + scan->rs_prefetch_maximum = -1; + scan->rs_prefetch_target = -1; + } + scan->rs_numblocks = InvalidBlockNumber; scan->rs_inited = false; scan->rs_ctup.t_data = NULL; @@ -398,6 +420,105 @@ heapgetpage(TableScanDesc sscan, BlockNumber page) */ CHECK_FOR_INTERRUPTS(); + /* Prefetch next block */ + if (scan->rs_prefetch_maximum > 0 && scan->rs_nblocks > 1) + { + int64 nblocks; + int64 rel_scan_start; + int64 rel_scan_end; /* blockno of end of scan (mod scan->rs_nblocks) */ + int64 scan_pageoff; /* page, but adjusted for scan position */ + + int64 prefetch_start; /* start block of prefetch requests this iteration */ + int64 prefetch_end; /* end block of prefetch requests this iteration, if applicable */ + ParallelBlockTableScanWorker pbscanwork = scan->rs_parallelworkerdata; + ParallelBlockTableScanDesc pbscandesc = (ParallelBlockTableScanDesc) sscan->rs_parallel; + + /* + * Parallel scans look like repeated sequential table scans for + * prefetching; with a scan start at nalloc + ch_remaining - ch_size + */ + if (pbscanwork != NULL) + { + uint64 start_offset, + end_offset; + + Assert(pbscandesc != NULL); + start_offset = pbscanwork->phsw_nallocated + + pbscanwork->phsw_chunk_remaining + 1 + - pbscanwork->phsw_chunk_size; + end_offset = Min(pbscanwork->phsw_nallocated + + pbscanwork->phsw_chunk_remaining + 1, + pbscandesc->phs_nblocks); + + rel_scan_start = (int64) (pbscandesc->phs_startblock) + start_offset; + rel_scan_end = (int64) (pbscandesc->phs_startblock) + end_offset; + nblocks = pbscandesc->phs_nblocks; + } + else + { + rel_scan_start = scan->rs_startblock; + rel_scan_end = scan->rs_startblock + scan->rs_nblocks; + nblocks = scan->rs_nblocks; + } + + prefetch_end = rel_scan_end; + + if ((uint64) page < rel_scan_start) + scan_pageoff = page + nblocks; + else + scan_pageoff = page; + + Assert(rel_scan_start <= scan_pageoff && scan_pageoff <= rel_scan_end); + + /* + * If this is the first page of this seqscan, initiate prefetch of + * pages page..page + n. On each subsequent call, prefetch the next + * page that we haven't prefetched yet, at page + n. + * If this is the last page of the prefetch, + */ + if (rel_scan_start != page) + { + prefetch_start = scan_pageoff + (int64) scan->rs_prefetch_target - 1; + prefetch_end = prefetch_start + 1; + } + else + { + prefetch_start = scan_pageoff; + prefetch_end = rel_scan_end; + } + + /* do not prefetch if the only page we're trying to prefetch is past the end of our scan window */ + if (prefetch_start > rel_scan_end) + prefetch_end = 0; + + if (prefetch_end > prefetch_start + scan->rs_prefetch_target) + prefetch_end = prefetch_start + scan->rs_prefetch_target; + + if (prefetch_end > rel_scan_end) + prefetch_end = rel_scan_end; + + RelationOpenSmgr(scan->rs_base.rs_rd); + + while (prefetch_start < prefetch_end) + { + BlockNumber blckno = (prefetch_start % nblocks); + Assert(blckno < nblocks); + Assert(blckno < INT_MAX); + PrefetchBuffer(scan->rs_base.rs_rd, MAIN_FORKNUM, blckno); + prefetch_start += 1; + } + + /* + * Use exponential growth of readahead up to prefetch_maximum, to + * make sure that a low LIMIT does not result in high IO overhead, + * but operations in general are still very fast. + */ + if (scan->rs_prefetch_target < scan->rs_prefetch_maximum / 2) + scan->rs_prefetch_target *= 2; + else if (scan->rs_prefetch_target < scan->rs_prefetch_maximum) + scan->rs_prefetch_target = scan->rs_prefetch_maximum; + } + /* read page using selected strategy */ scan->rs_cbuf = ReadBufferExtended(scan->rs_base.rs_rd, MAIN_FORKNUM, page, RBM_NORMAL, scan->rs_strategy); @@ -2217,6 +2338,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; xlhdr.t_infomask = heaptup->t_data->t_infomask; xlhdr.t_hoff = heaptup->t_data->t_hoff; + xlhdr.t_cid = HeapTupleHeaderGetRawCommandId(heaptup->t_data); /* * note we mark xlhdr as belonging to buffer; if XLogInsert decides to @@ -2240,7 +2362,18 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, END_CRIT_SECTION(); - UnlockReleaseBuffer(buffer); + if (options & HEAP_INSERT_SPECULATIVE) + { + /* + * NEON: speculative token is not stored in WAL, so if the page is evicted + * from the buffer cache, the token will be lost. To prevent that, we keep the + * buffer pinned. It will be unpinned in heapam_tuple_finish/abort_speculative. + */ + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + } + else + UnlockReleaseBuffer(buffer); + if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); @@ -2535,6 +2668,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, tuphdr->t_infomask2 = heaptup->t_data->t_infomask2; tuphdr->t_infomask = heaptup->t_data->t_infomask; + tuphdr->t_cid = HeapTupleHeaderGetRawCommandId(heaptup->t_data); tuphdr->t_hoff = heaptup->t_data->t_hoff; /* write bitmap [+ padding] [+ oid] + data */ @@ -3047,7 +3181,7 @@ heap_delete(Relation relation, ItemPointer tid, tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); xlrec.xmax = new_xmax; - + xlrec.t_cid = HeapTupleHeaderGetRawCommandId(tp.t_data); if (old_key_tuple != NULL) { if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) @@ -3068,6 +3202,7 @@ heap_delete(Relation relation, ItemPointer tid, { xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2; xlhdr.t_infomask = old_key_tuple->t_data->t_infomask; + xlhdr.t_cid = HeapTupleHeaderGetRawCommandId(old_key_tuple->t_data); xlhdr.t_hoff = old_key_tuple->t_data->t_hoff; XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader); @@ -3793,6 +3928,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, oldtup.t_data->t_infomask2); xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; + xlrec.t_cid = HeapTupleHeaderGetRawCommandId(oldtup.t_data); XLogRegisterData((char *) &xlrec, SizeOfHeapLock); recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); PageSetLSN(page, recptr); @@ -4981,6 +5117,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, xlrec.infobits_set = compute_infobits(new_infomask, tuple->t_data->t_infomask2); xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; + xlrec.t_cid = HeapTupleHeaderGetRawCommandId(tuple->t_data); XLogRegisterData((char *) &xlrec, SizeOfHeapLock); /* we don't decode row locks atm, so no need to log the origin */ @@ -5901,6 +6038,7 @@ heap_finish_speculative(Relation relation, ItemPointer tid) END_CRIT_SECTION(); + ReleaseBuffer(buffer); /* NEON: release buffer pinned by heap_insert */ UnlockReleaseBuffer(buffer); } @@ -5973,6 +6111,16 @@ heap_abort_speculative(Relation relation, ItemPointer tid) elog(ERROR, "attempted to kill a non-speculative tuple"); Assert(!HeapTupleHeaderIsHeapOnly(tp.t_data)); + /* + * NEON: release buffer pinned by heap_insert + * + * This function is also used on the toast tuples of an aborted speculative + * insertion. For those, there is no token on the tuple, and we didn' t keep + * the pin. + */ + if (HeapTupleHeaderIsSpeculative(tp.t_data)) + ReleaseBuffer(buffer); + /* * No need to check for serializable conflicts here. There is never a * need for a combo CID, either. No need to extract replica identity, or @@ -6030,6 +6178,7 @@ heap_abort_speculative(Relation relation, ItemPointer tid) tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); xlrec.xmax = xid; + xlrec.t_cid = HeapTupleHeaderGetRawCommandId(tp.t_data); XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapDelete); @@ -8217,7 +8366,7 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); - + xlrec.t_cid = HeapTupleHeaderGetRawCommandId(newtup->t_data); bufflags = REGBUF_STANDARD; if (init) bufflags |= REGBUF_WILL_INIT; @@ -8254,6 +8403,7 @@ log_heap_update(Relation reln, Buffer oldbuf, xlhdr.t_infomask2 = newtup->t_data->t_infomask2; xlhdr.t_infomask = newtup->t_data->t_infomask; xlhdr.t_hoff = newtup->t_data->t_hoff; + xlhdr.t_cid = HeapTupleHeaderGetRawCommandId(newtup->t_data); Assert(SizeofHeapTupleHeader + prefixlen + suffixlen <= newtup->t_len); /* @@ -8295,6 +8445,7 @@ log_heap_update(Relation reln, Buffer oldbuf, xlhdr_idx.t_infomask2 = old_key_tuple->t_data->t_infomask2; xlhdr_idx.t_infomask = old_key_tuple->t_data->t_infomask; xlhdr_idx.t_hoff = old_key_tuple->t_data->t_hoff; + xlhdr_idx.t_cid = HeapTupleHeaderGetRawCommandId(old_key_tuple->t_data); XLogRegisterData((char *) &xlhdr_idx, SizeOfHeapHeader); @@ -8706,6 +8857,17 @@ heap_xlog_visible(XLogReaderState *record) PageSetAllVisible(page); + /* + * NEON: despite to the comment above we need to update page LSN here. + * See discussion at hackers: https://www.postgresql.org/message-id/flat/039076d4f6cdd871691686361f83cb8a6913a86a.camel%40j-davis.com#101ba42b004f9988e3d54fce26fb3462 + * For Neon this assignment is critical because otherwise last written LSN tracked at compute doesn't + * match with page LSN assignee by WAL-redo and as a result, prefetched page is rejected. + * + * It is fixed in upstream in https://github.com/neondatabase/postgres/commit/7bf713dd2d0739fbcd4103971ed69c17ebe677ea + * but until it is merged we still need to carry a patch here. + */ + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); } else if (action == BLK_RESTORED) @@ -8926,7 +9088,7 @@ heap_xlog_delete(XLogReaderState *record) HeapTupleHeaderSetXmax(htup, xlrec->xmax); else HeapTupleHeaderSetXmin(htup, InvalidTransactionId); - HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false); /* Mark the page as a candidate for pruning */ PageSetPrunable(page, XLogRecGetXid(record)); @@ -9027,7 +9189,7 @@ heap_xlog_insert(XLogReaderState *record) htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); - HeapTupleHeaderSetCmin(htup, FirstCommandId); + HeapTupleHeaderSetCmin(htup, xlhdr.t_cid); htup->t_ctid = target_tid; if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum, @@ -9170,7 +9332,7 @@ heap_xlog_multi_insert(XLogReaderState *record) htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); - HeapTupleHeaderSetCmin(htup, FirstCommandId); + HeapTupleHeaderSetCmin(htup, xlhdr->t_cid); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); @@ -9310,7 +9472,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); - HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9443,7 +9605,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) htup->t_hoff = xlhdr.t_hoff; HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); - HeapTupleHeaderSetCmin(htup, FirstCommandId); + HeapTupleHeaderSetCmin(htup, xlhdr.t_cid); HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9584,7 +9746,7 @@ heap_xlog_lock(XLogReaderState *record) offnum); } HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); - HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + HeapTupleHeaderSetCmax(htup, xlrec->t_cid, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 66339392d75..b7b4fa38885 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -627,7 +627,7 @@ heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode) { SMgrRelation dstrel; - dstrel = smgropen(*newrnode, rel->rd_backend); + dstrel = smgropen(*newrnode, rel->rd_backend, rel->rd_rel->relpersistence); RelationOpenSmgr(rel); /* diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 8aab6e324e0..a87cb08e063 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -68,6 +68,7 @@ #include "commands/vacuum.h" #include "executor/instrument.h" #include "miscadmin.h" +#include "optimizer/cost.h" #include "optimizer/paths.h" #include "pgstat.h" #include "portability/instr_time.h" @@ -79,6 +80,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_rusage.h" +#include "utils/spccache.h" #include "utils/timestamp.h" @@ -309,6 +311,9 @@ typedef struct LVRelState Relation *indrels; int nindexes; + /* prefetch */ + int io_concurrency; + /* Wraparound failsafe has been triggered? */ bool failsafe_active; /* Consider index vacuuming bypass optimization? */ @@ -555,6 +560,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* Set up high level stuff about rel */ vacrel->rel = rel; + vacrel->io_concurrency = + get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace); vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes, &vacrel->indrels); vacrel->failsafe_active = false; @@ -912,6 +919,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) BlockNumber nblocks, blkno, next_unskippable_block, + next_prefetch_block, next_failsafe_block, next_fsm_block_to_vacuum; PGRUsage ru0; @@ -941,6 +949,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) nblocks = RelationGetNumberOfBlocks(vacrel->rel); next_unskippable_block = 0; + next_prefetch_block = 0; next_failsafe_block = 0; next_fsm_block_to_vacuum = 0; vacrel->rel_pages = nblocks; @@ -1217,6 +1226,33 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) */ visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); + if (vacrel->io_concurrency > 0) + { + /* + * Prefetch io_concurrency blocks ahead + */ + uint32 prefetch_budget = vacrel->io_concurrency; + + /* never trail behind the current scan */ + if (next_prefetch_block < blkno) + next_prefetch_block = blkno; + + /* but only up to the end of the relation */ + if (prefetch_budget > vacrel->rel_pages - next_prefetch_block) + prefetch_budget = vacrel->rel_pages - next_prefetch_block; + + /* And only up to io_concurrency ahead of the current vacuum scan */ + if (next_prefetch_block + prefetch_budget > blkno + vacrel->io_concurrency) + prefetch_budget = blkno + vacrel->io_concurrency - next_prefetch_block; + + /* And only up to the next unskippable block */ + if (next_prefetch_block + prefetch_budget > next_unskippable_block) + prefetch_budget = next_unskippable_block - next_prefetch_block; + + for (; prefetch_budget-- > 0; next_prefetch_block++) + PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, next_prefetch_block); + } + buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, vacrel->bstrategy); @@ -2314,7 +2350,8 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) static void lazy_vacuum_heap_rel(LVRelState *vacrel) { - int tupindex; + int tupindex, + ptupindex; BlockNumber vacuumed_pages; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; @@ -2337,6 +2374,7 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) vacuumed_pages = 0; tupindex = 0; + ptupindex = 0; while (tupindex < vacrel->dead_tuples->num_tuples) { BlockNumber tblk; @@ -2347,6 +2385,43 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[tupindex]); + + if (vacrel->io_concurrency > 0) + { + /* + * If we're just starting out, prefetch N consecutive blocks. + * If not, only the next 1 block + */ + if (ptupindex == 0) { + int prefetch_budget = Min(vacrel->dead_tuples->num_tuples, + Min(vacrel->rel_pages, + vacrel->io_concurrency)); + BlockNumber prev_prefetch = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[ptupindex]); + + RelationOpenSmgr(vacrel->rel); + PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, prev_prefetch); + + while (++ptupindex < vacrel->dead_tuples->num_tuples && + prefetch_budget > 0) { + ItemPointer ptr = &vacrel->dead_tuples->itemptrs[ptupindex]; + if (ItemPointerGetBlockNumber(ptr) != prev_prefetch) { + prev_prefetch = ItemPointerGetBlockNumber(ptr); + prefetch_budget -= 1; + PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, prev_prefetch); + } + } + } else if (ptupindex < vacrel->dead_tuples->num_tuples) { + BlockNumber previous = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[ptupindex]); + while (++ptupindex < vacrel->dead_tuples->num_tuples) { + BlockNumber toPrefetch = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[ptupindex]); + if (previous != toPrefetch) { + PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, toPrefetch); + break; + } + } + } + } + vacrel->blkno = tblk; buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, tblk, RBM_NORMAL, vacrel->bstrategy); diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index e198df65d82..19f42dd5cf0 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -289,7 +289,9 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, * If data checksums are enabled (or wal_log_hints=on), we * need to protect the heap page from being torn. */ + /* NEON: we have to update page LSN even if wal_log_hints=off if (XLogHintBitIsNeeded()) + */ { Page heapPage = BufferGetPage(heapBuf); @@ -652,10 +654,19 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) /* Now extend the file */ while (vm_nblocks_now < vm_nblocks) { - PageSetChecksumInplace((Page) pg.data, vm_nblocks_now); + /* + * ZENITH: Initialize VM pages through buffer cache to prevent loading + * them from pageserver. + */ + Buffer buffer = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, P_NEW, + RBM_ZERO_AND_LOCK, NULL); + Page page = BufferGetPage(buffer); + + PageInit((Page) page, BLCKSZ, 0); + PageSetChecksumInplace(page, vm_nblocks_now); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); - smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now, - pg.data, false); vm_nblocks_now++; } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 1360ab80c1e..fa4298e2734 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -36,6 +36,7 @@ #include "utils/builtins.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" +#include "utils/spccache.h" /* @@ -907,6 +908,8 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, BTVacState vstate; BlockNumber num_pages; BlockNumber scanblkno; + BlockNumber prefetch_blkno; + int io_concurrency; bool needLock; /* @@ -946,6 +949,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, vstate.maxbufsize = 0; vstate.pendingpages = NULL; vstate.npendingpages = 0; + + io_concurrency = get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace); + /* Consider applying _bt_pendingfsm_finalize optimization */ _bt_pendingfsm_init(rel, &vstate, (callback == NULL)); @@ -975,6 +981,8 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, needLock = !RELATION_IS_LOCAL(rel); scanblkno = BTREE_METAPAGE + 1; + prefetch_blkno = scanblkno; + for (;;) { /* Get the current relation length */ @@ -991,9 +999,19 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* Quit if we've scanned the whole relation */ if (scanblkno >= num_pages) break; + + if (prefetch_blkno < scanblkno) + prefetch_blkno = scanblkno; + for (; prefetch_blkno < num_pages && + prefetch_blkno < scanblkno + io_concurrency; prefetch_blkno++) + PrefetchBuffer(rel, MAIN_FORKNUM, prefetch_blkno); + /* Iterate over pages, then loop back to recheck length */ for (; scanblkno < num_pages; scanblkno++) { + if (io_concurrency > 0 && prefetch_blkno < num_pages) + PrefetchBuffer(rel, MAIN_FORKNUM, prefetch_blkno++); + btvacuumpage(&vstate, scanblkno); if (info->report_progress) pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE, diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 1af0af7da21..48990e046bd 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -85,6 +85,8 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); + smgr_start_unlogged_build(index->rd_smgr); + /* * Initialize the meta page and root pages */ @@ -131,6 +133,8 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) SpGistUpdateMetaPage(index); + smgr_finish_unlogged_build_phase_1(index->rd_smgr); + /* * We didn't write WAL records as we built the index, so if WAL-logging is * required, write all pages to the WAL now. @@ -140,8 +144,13 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) log_newpage_range(index, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index), true); + SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, + MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } + smgr_end_unlogged_build(index->rd_smgr); + result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 76fb0374c42..8db596b1cb8 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -27,6 +27,7 @@ #include "storage/indexfsm.h" #include "storage/lmgr.h" #include "utils/snapmgr.h" +#include "utils/spccache.h" /* Entry in pending-list of TIDs we need to revisit */ @@ -796,7 +797,11 @@ spgvacuumscan(spgBulkDeleteState *bds) Relation index = bds->info->index; bool needLock; BlockNumber num_pages, - blkno; + blkno, + prefetch_blkno; + int io_concurrency; + + io_concurrency = get_tablespace_maintenance_io_concurrency(index->rd_rel->reltablespace); /* Finish setting up spgBulkDeleteState */ initSpGistState(&bds->spgstate, index); @@ -824,6 +829,8 @@ spgvacuumscan(spgBulkDeleteState *bds) * in btvacuumscan(). */ blkno = SPGIST_METAPAGE_BLKNO + 1; + prefetch_blkno = blkno; + for (;;) { /* Get the current relation length */ @@ -836,9 +843,19 @@ spgvacuumscan(spgBulkDeleteState *bds) /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) break; + + if (prefetch_blkno < blkno) + prefetch_blkno = blkno; + for (; prefetch_blkno < num_pages && + prefetch_blkno < blkno + io_concurrency; prefetch_blkno++) + PrefetchBuffer(index, MAIN_FORKNUM, prefetch_blkno); + /* Iterate over pages, then loop back to recheck length */ for (; blkno < num_pages; blkno++) { + if (io_concurrency > 0 && prefetch_blkno < num_pages) + PrefetchBuffer(index, MAIN_FORKNUM, prefetch_blkno++); + spgvacuumpage(bds, blkno); /* empty the pending-list after each page */ if (bds->pendingList != NULL) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 134a6481b99..2f39227e3e7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -61,6 +61,7 @@ #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" +#include "storage/buf_internals.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/large_object.h" @@ -112,6 +113,8 @@ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; bool track_wal_io_timing = false; +uint64 predefined_sysidentifier; +int lastWrittenLsnCacheSize; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -181,6 +184,26 @@ const struct config_enum_entry recovery_target_action_options[] = { {NULL, 0, false} }; + +typedef struct LastWrittenLsnCacheEntry +{ + BufferTag key; + XLogRecPtr lsn; + /* double linked list for LRU replacement algorithm */ + dlist_node lru_node; +} LastWrittenLsnCacheEntry; + + +/* + * Cache of last written LSN for each relation page. + * Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last + * relation metadata update. + * Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"), + * pages are replaced using LRU algorithm, based on L2-list. + * Access to this cache is protected by 'LastWrittenLsnLock'. + */ +static HTAB *lastWrittenLsnCache; + /* * Statistics for current checkpoint are collected in this global struct. * Because only the checkpointer or a stand-alone backend can perform @@ -281,6 +304,13 @@ bool InArchiveRecovery = false; static bool standby_signal_file_found = false; static bool recovery_signal_file_found = false; +/* + * Variables read from 'zenith.signal' file. + */ +bool ZenithRecoveryRequested = false; +XLogRecPtr zenithLastRec = InvalidXLogRecPtr; +bool zenithWriteOk = false; + /* Was the last xlog file restored from archive, or local? */ static bool restoredFromArchive = false; @@ -742,6 +772,25 @@ typedef struct XLogCtlData */ XLogRecPtr lastFpwDisableRecPtr; + /* + * Maximal last written LSN for pages not present in lastWrittenLsnCache + */ + XLogRecPtr maxLastWrittenLsn; + + /* + * Double linked list to implement LRU replacement policy for last written LSN cache. + * Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'. + */ + dlist_head lastWrittenLsnLRU; + + /* neon: copy of startup's RedoStartLSN for walproposer's use */ + XLogRecPtr RedoStartLSN; + + /* + * size of a timeline in zenith pageserver. + * used to enforce timeline size limit. + */ + uint64 zenithCurrentClusterSize; slock_t info_lck; /* locks shared variables shown above */ } XLogCtlData; @@ -963,7 +1012,6 @@ static bool CheckForStandbyTrigger(void); static void xlog_outrec(StringInfo buf, XLogReaderState *record); #endif static void xlog_block_info(StringInfo buf, XLogReaderState *record); -static void xlog_outdesc(StringInfo buf, XLogReaderState *record); static void pg_start_backup_callback(int code, Datum arg); static void pg_stop_backup_callback(int code, Datum arg); static bool read_backup_label(XLogRecPtr *checkPointLoc, @@ -5125,11 +5173,8 @@ LocalProcessControlFile(bool reset) ReadControlFile(); } -/* - * Initialization of shared memory for XLOG - */ -Size -XLOGShmemSize(void) +static Size +XLOGCtlShmemSize(void) { Size size; @@ -5169,6 +5214,16 @@ XLOGShmemSize(void) return size; } +/* + * Initialization of shared memory for XLOG + */ +Size +XLOGShmemSize(void) +{ + return XLOGCtlShmemSize() + + hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry)); +} + void XLOGShmemInit(void) { @@ -5196,8 +5251,17 @@ XLOGShmemInit(void) XLogCtl = (XLogCtlData *) - ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog); + ShmemInitStruct("XLOG Ctl", XLOGCtlShmemSize(), &foundXLog); + { + static HASHCTL info; + info.keysize = sizeof(BufferTag); + info.entrysize = sizeof(LastWrittenLsnCacheEntry); + lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache", + lastWrittenLsnCacheSize, lastWrittenLsnCacheSize, + &info, + HASH_ELEM | HASH_BLOBS); + } localControlFile = ControlFile; ControlFile = (ControlFileData *) ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile); @@ -5308,10 +5372,16 @@ BootStrapXLOG(void) * perhaps be useful sometimes. */ gettimeofday(&tv, NULL); - sysidentifier = ((uint64) tv.tv_sec) << 32; - sysidentifier |= ((uint64) tv.tv_usec) << 12; - sysidentifier |= getpid() & 0xFFF; - + if (predefined_sysidentifier != 0) + { + sysidentifier = predefined_sysidentifier; + } + else + { + sysidentifier = ((uint64) tv.tv_sec) << 32; + sysidentifier |= ((uint64) tv.tv_usec) << 12; + sysidentifier |= getpid() & 0xFFF; + } /* First timeline ID is always 1 */ ThisTimeLineID = 1; @@ -5548,6 +5618,81 @@ readRecoverySignalFile(void) errmsg("standby mode is not supported by single-user servers"))); } +static void +readZenithSignalFile(void) +{ + int fd; + + fd = BasicOpenFile(ZENITH_SIGNAL_FILE, O_RDONLY | PG_BINARY); + if (fd >= 0) + { + struct stat statbuf; + char *content; + char prev_lsn_str[20]; + + /* Slurp the file into a string */ + if (stat(ZENITH_SIGNAL_FILE, &statbuf) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + ZENITH_SIGNAL_FILE))); + content = palloc(statbuf.st_size + 1); + if (read(fd, content, statbuf.st_size) != statbuf.st_size) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + ZENITH_SIGNAL_FILE))); + content[statbuf.st_size] = '\0'; + + /* Parse it */ + if (sscanf(content, "PREV LSN: %19s", prev_lsn_str) != 1) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid data in file \"%s\"", ZENITH_SIGNAL_FILE))); + + if (strcmp(prev_lsn_str, "invalid") == 0) + { + /* No prev LSN. Forbid starting up in read-write mode */ + zenithLastRec = InvalidXLogRecPtr; + zenithWriteOk = false; + } + else if (strcmp(prev_lsn_str, "none") == 0) + { + /* + * The page server had no valid prev LSN, but assured that it's ok + * to start without it. This happens when you start the compute + * node for the first time on a new branch. + */ + zenithLastRec = InvalidXLogRecPtr; + zenithWriteOk = true; + } + else + { + uint32 hi, + lo; + + if (sscanf(prev_lsn_str, "%X/%X", &hi, &lo) != 2) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid data in file \"%s\"", ZENITH_SIGNAL_FILE))); + zenithLastRec = ((uint64) hi) << 32 | lo; + + /* If prev LSN is given, it better be valid */ + if (zenithLastRec == InvalidXLogRecPtr) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid prev-LSN in file \"%s\"", ZENITH_SIGNAL_FILE))); + zenithWriteOk = true; + } + ZenithRecoveryRequested = true; + close(fd); + + elog(LOG, + "[ZENITH] found 'zenith.signal' file. setting prev LSN to %X/%X", + LSN_FORMAT_ARGS(zenithLastRec)); + } +} + static void validateRecoveryParameters(void) { @@ -6528,10 +6673,15 @@ StartupXLOG(void) CurrentResourceOwner == AuxProcessResourceOwner); CurrentResourceOwner = AuxProcessResourceOwner; + /* + * Read zenith.signal before anything else. + */ + readZenithSignalFile(); + /* * Check that contents look valid. */ - if (!XRecOffIsValid(ControlFile->checkPoint)) + if (!XRecOffIsValid(ControlFile->checkPoint) && !ZenithRecoveryRequested) ereport(FATAL, (errmsg("control file contains invalid checkpoint location"))); @@ -6661,6 +6811,9 @@ StartupXLOG(void) else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE) ereport(LOG, (errmsg("starting point-in-time recovery to earliest consistent point"))); + else if (ZenithRecoveryRequested) + ereport(LOG, + (errmsg("starting zenith recovery"))); else ereport(LOG, (errmsg("starting archive recovery"))); @@ -6791,6 +6944,31 @@ StartupXLOG(void) /* set flag to delete it later */ haveBackupLabel = true; } + else if (ZenithRecoveryRequested) + { + /* + * Zenith hacks to spawn compute node without WAL. Pretend that we + * just finished reading the record that started at 'zenithLastRec' + * and ended at checkpoint.redo + */ + elog(LOG, "starting with zenith basebackup at LSN %X/%X, prev %X/%X", + LSN_FORMAT_ARGS(ControlFile->checkPointCopy.redo), + LSN_FORMAT_ARGS(zenithLastRec)); + + checkPointLoc = zenithLastRec; + RedoStartLSN = ControlFile->checkPointCopy.redo; + /* make basebackup LSN available for walproposer */ + XLogCtl->RedoStartLSN = RedoStartLSN; + EndRecPtr = ControlFile->checkPointCopy.redo; + + memcpy(&checkPoint, &ControlFile->checkPointCopy, sizeof(CheckPoint)); + wasShutdown = true; + + /* Initialize expectedTLEs, like ReadRecord() does */ + expectedTLEs = readTimeLineHistory(checkPoint.ThisTimeLineID); + + XLogBeginRead(xlogreader, EndRecPtr); + } else { /* @@ -6851,6 +7029,7 @@ StartupXLOG(void) /* Get the last valid checkpoint record. */ checkPointLoc = ControlFile->checkPoint; RedoStartLSN = ControlFile->checkPointCopy.redo; + XLogCtl->RedoStartLSN = RedoStartLSN; record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true); if (record != NULL) { @@ -7049,7 +7228,7 @@ StartupXLOG(void) RedoRecPtr = XLogCtl->RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; doPageWrites = lastFullPageWrites; - if (RecPtr < checkPoint.redo) + if (RecPtr < checkPoint.redo && !ZenithRecoveryRequested) ereport(PANIC, (errmsg("invalid redo in checkpoint record"))); @@ -7716,9 +7895,54 @@ StartupXLOG(void) * that and continue after it. In all other cases, re-fetch the last * valid or last applied record, so we can identify the exact endpoint of * what we consider the valid portion of WAL. + * + * When starting from a zenith base backup, we don't have WAL. Initialize + * the WAL page where we will start writing new records from scratch, + * instead. */ - XLogBeginRead(xlogreader, LastRec); - record = ReadRecord(xlogreader, PANIC, false); + if (ZenithRecoveryRequested) + { + if (!zenithWriteOk) + { + /* + * We cannot start generating new WAL if we don't have a valid prev-LSN + * to use for the first new WAL record. (Shouldn't happen.) + */ + ereport(ERROR, + (errmsg("cannot start in read-write mode from this base backup"))); + } + else + { + int offs = (EndRecPtr % XLOG_BLCKSZ); + XLogRecPtr lastPage = EndRecPtr - offs; + int lastPageSize = ((lastPage % wal_segment_size) == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD; + int idx = XLogRecPtrToBufIdx(lastPage); + XLogPageHeader xlogPageHdr = (XLogPageHeader) (XLogCtl->pages + idx * XLOG_BLCKSZ); + + xlogPageHdr->xlp_pageaddr = lastPage; + xlogPageHdr->xlp_magic = XLOG_PAGE_MAGIC; + xlogPageHdr->xlp_tli = ThisTimeLineID; + /* + * If we start writing with offset from page beginning, pretend in + * page header there is a record ending where actual data will + * start. + */ + xlogPageHdr->xlp_rem_len = offs - lastPageSize; + xlogPageHdr->xlp_info = (xlogPageHdr->xlp_rem_len > 0) ? XLP_FIRST_IS_CONTRECORD : 0; + readOff = XLogSegmentOffset(lastPage, wal_segment_size); + + elog(LOG, "Continue writing WAL at %X/%X", LSN_FORMAT_ARGS(EndRecPtr)); + + // FIXME: should we unlink zenith.signal? + } + } + else + { + XLogBeginRead(xlogreader, LastRec); + record = ReadRecord(xlogreader, PANIC, false); + if (!record) + elog(PANIC, "could not re-read last record"); + } EndOfLog = EndRecPtr; /* @@ -7913,7 +8137,8 @@ StartupXLOG(void) /* Copy the valid part of the last block, and zero the rest */ page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ]; len = EndOfLog % XLOG_BLCKSZ; - memcpy(page, xlogreader->readBuf, len); + if (!ZenithRecoveryRequested) + memcpy(page, xlogreader->readBuf, len); memset(page + len, 0, XLOG_BLCKSZ - len); XLogCtl->xlblocks[firstIdx] = pageBeginPtr + XLOG_BLCKSZ; @@ -7935,6 +8160,8 @@ StartupXLOG(void) XLogCtl->LogwrtRqst.Write = EndOfLog; XLogCtl->LogwrtRqst.Flush = EndOfLog; + XLogCtl->maxLastWrittenLsn = EndOfLog; + dlist_init(&XLogCtl->lastWrittenLsnLRU); LocalSetXLogInsertAllowed(); @@ -8705,6 +8932,175 @@ GetInsertRecPtr(void) return recptr; } +/* + * GetLastWrittenLSN -- Returns maximal LSN of written page. + * It returns an upper bound for the last written LSN of a given page, + * either from a cached last written LSN or a global maximum last written LSN. + * If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn. + * If cache is large enough, iterating through all hash items may be rather expensive. + * But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical. + */ +XLogRecPtr +GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno) +{ + XLogRecPtr lsn; + LastWrittenLsnCacheEntry* entry; + + LWLockAcquire(LastWrittenLsnLock, LW_SHARED); + + /* Maximal last written LSN among all non-cached pages */ + lsn = XLogCtl->maxLastWrittenLsn; + + if (rnode.relNode != InvalidOid) + { + BufferTag key; + key.rnode = rnode; + key.forkNum = forknum; + key.blockNum = blkno; + entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL); + if (entry != NULL) + lsn = entry->lsn; + } + else + { + HASH_SEQ_STATUS seq; + /* Find maximum of all cached LSNs */ + hash_seq_init(&seq, lastWrittenLsnCache); + while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL) + { + if (entry->lsn > lsn) + lsn = entry->lsn; + } + } + LWLockRelease(LastWrittenLsnLock); + + return lsn; +} + +/* + * SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range. + * We maintain cache of last written LSNs with limited size and LRU replacement + * policy. Keeping last written LSN for each page allows to use old LSN when + * requesting pages of unchanged or appended relations. Also it is critical for + * efficient work of prefetch in case massive update operations (like vacuum or remove). + * + * rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated. + * SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions. + */ +void +SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks) +{ + if (lsn == InvalidXLogRecPtr || n_blocks == 0) + return; + + LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE); + if (rnode.relNode == InvalidOid) + { + if (lsn > XLogCtl->maxLastWrittenLsn) + XLogCtl->maxLastWrittenLsn = lsn; + } + else + { + LastWrittenLsnCacheEntry* entry; + BufferTag key; + bool found; + BlockNumber i; + + key.rnode = rnode; + key.forkNum = forknum; + for (i = 0; i < n_blocks; i++) + { + key.blockNum = from + i; + entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found); + if (found) + { + if (lsn > entry->lsn) + entry->lsn = lsn; + /* Unlink from LRU list */ + dlist_delete(&entry->lru_node); + } + else + { + entry->lsn = lsn; + if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize) + { + /* Replace least recently used entry */ + LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU)); + /* Adjust max LSN for not cached relations/chunks if needed */ + if (victim->lsn > XLogCtl->maxLastWrittenLsn) + XLogCtl->maxLastWrittenLsn = victim->lsn; + + hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL); + } + } + /* Link to the end of LRU list */ + dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node); + } + } + LWLockRelease(LastWrittenLsnLock); +} + +/* + * SetLastWrittenLSNForBlock -- Set maximal LSN for block + */ +void +SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno) +{ + SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, 1); +} + +/* + * SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata + */ +void +SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum) +{ + SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO); +} + +/* + * SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database + */ +void +SetLastWrittenLSNForDatabase(XLogRecPtr lsn) +{ + RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid}; + SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0); +} + +/* + * RedoStartLsn is set only once by startup process, locking is not required + * after its exit. + */ +XLogRecPtr +GetRedoStartLsn(void) +{ + return XLogCtl->RedoStartLSN; +} + + +uint64 +GetZenithCurrentClusterSize(void) +{ + uint64 size; + SpinLockAcquire(&XLogCtl->info_lck); + size = XLogCtl->zenithCurrentClusterSize; + SpinLockRelease(&XLogCtl->info_lck); + + return size; +} + + +void +SetZenithCurrentClusterSize(uint64 size) +{ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->zenithCurrentClusterSize = size; + SpinLockRelease(&XLogCtl->info_lck); +} + + + /* * GetFlushRecPtr -- Returns the current flush position, ie, the last WAL * position known to be fsync'd to disk. @@ -10566,10 +10962,20 @@ xlog_redo(XLogReaderState *record) for (uint8 block_id = 0; block_id <= record->max_block_id; block_id++) { Buffer buffer; + XLogRedoAction result; - if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED) + result = XLogReadBufferForRedo(record, block_id, &buffer); + if (result == BLK_DONE && !IsUnderPostmaster) + { + /* + * In the special WAL process, blocks that are being ignored + * return BLK_DONE. Accept that. + */ + } + else if (result != BLK_RESTORED) elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block"); - UnlockReleaseBuffer(buffer); + if (buffer != InvalidBuffer) + UnlockReleaseBuffer(buffer); } } else if (info == XLOG_BACKUP_END) @@ -10752,7 +11158,7 @@ xlog_block_info(StringInfo buf, XLogReaderState *record) * Returns a string describing an XLogRecord, consisting of its identity * optionally followed by a colon, a space, and a further description. */ -static void +void xlog_outdesc(StringInfo buf, XLogReaderState *record) { RmgrId rmid = XLogRecGetRmid(record); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index b153fad594d..596f6f3567c 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -29,9 +29,11 @@ #include "miscadmin.h" #include "pg_trace.h" #include "replication/origin.h" +#include "replication/walsender.h" #include "storage/bufmgr.h" #include "storage/proc.h" #include "utils/memutils.h" +#include "utils/wait_event.h" /* Buffer size required to store a compressed version of backup block image */ #define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ) @@ -61,6 +63,11 @@ typedef struct char compressed_page[PGLZ_MAX_BLCKSZ]; } registered_buffer; +/* GUCs */ +int max_replication_apply_lag; +int max_replication_flush_lag; +int max_replication_write_lag; + static registered_buffer *registered_buffers; static int max_registered_buffers; /* allocated size */ static int max_registered_block_id = 0; /* highest block_id + 1 currently @@ -451,6 +458,11 @@ XLogInsert(RmgrId rmid, uint8 info) return EndPos; } + if (delay_backend_us != NULL && delay_backend_us() > 0) + { + InterruptPending = true; + } + do { XLogRecPtr RedoRecPtr; diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index d797d9d5087..9f52c6fca18 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -239,7 +239,7 @@ WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr) { - Assert(!XLogRecPtrIsInvalid(RecPtr)); + Assert(!XLogRecPtrIsInvalid(RecPtr) || state->skip_lsn_checks); ResetDecoder(state); @@ -279,6 +279,14 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) bool gotheader; int readOff; +#define SKIP_INVALID_RECORD(rec_ptr) do { \ + rec_ptr = MAXALIGN(rec_ptr + 1); \ + if (rec_ptr % XLOG_BLCKSZ <= MAXALIGN(1)) \ + goto restart; \ + else \ + goto skip_invalid; \ + } while (0); + /* * randAccess indicates whether to verify the previous-record pointer of * the record we're reading. We only do this if we're reading @@ -315,7 +323,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) * In this case, EndRecPtr should already be pointing to a valid * record starting position. */ - Assert(XRecOffIsValid(RecPtr)); + Assert(XRecOffIsValid(RecPtr) || state->skip_lsn_checks); randAccess = true; } @@ -351,17 +359,23 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) } else if (targetRecOff < pageHeaderSize) { - report_invalid_record(state, "invalid record offset at %X/%X", + if(!state->skip_page_validation) + { + report_invalid_record(state, "invalid record offset at %X/%X", LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } } if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize) { - report_invalid_record(state, "contrecord is requested by %X/%X", + if(!state->skip_page_validation) + { + report_invalid_record(state, "contrecord is requested by %X/%X", LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } } /* ReadPageInternal has verified the page header */ @@ -376,6 +390,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) * cannot access any other fields until we've verified that we got the * whole header. */ +skip_invalid: record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ); total_len = record->xl_tot_len; @@ -391,7 +406,13 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) { if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record, randAccess)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } + gotheader = true; } else @@ -399,12 +420,19 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) /* XXX: more validation should be done here */ if (total_len < SizeOfXLogRecord) { - report_invalid_record(state, - "invalid record length at %X/%X: wanted %u, got %u", - LSN_FORMAT_ARGS(RecPtr), - (uint32) SizeOfXLogRecord, total_len); - goto err; + if(!state->skip_invalid_records) + { + report_invalid_record(state, + "invalid record length at %X/%X: wanted %u, got %u", + LSN_FORMAT_ARGS(RecPtr), + (uint32) SizeOfXLogRecord, total_len); + + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } + gotheader = false; } @@ -425,10 +453,16 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) if (total_len > state->readRecordBufSize && !allocate_recordbuf(state, total_len)) { - /* We treat this as a "bogus data" condition */ - report_invalid_record(state, "record length %u at %X/%X too long", - total_len, LSN_FORMAT_ARGS(RecPtr)); - goto err; + + if(!state->skip_invalid_records) + { + /* We treat this as a "bogus data" condition */ + report_invalid_record(state, "record length %u at %X/%X too long", + total_len, LSN_FORMAT_ARGS(RecPtr)); + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } /* Copy the first fragment of the record from the first page. */ @@ -473,10 +507,15 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) /* Check that the continuation on next page looks valid */ if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD)) { - report_invalid_record(state, + if(!state->skip_invalid_records) + { + report_invalid_record(state, "there is no contrecord flag at %X/%X", LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } /* @@ -486,12 +525,17 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) if (pageHeader->xlp_rem_len == 0 || total_len != (pageHeader->xlp_rem_len + gotlen)) { - report_invalid_record(state, + if(!state->skip_invalid_records) + { + report_invalid_record(state, "invalid contrecord length %u (expected %lld) at %X/%X", pageHeader->xlp_rem_len, ((long long) total_len) - gotlen, LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } /* Append the continuation from this page to the buffer */ @@ -522,7 +566,13 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) record = (XLogRecord *) state->readRecordBuf; if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record, randAccess)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } + gotheader = true; } } while (gotlen < total_len); @@ -531,7 +581,12 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) record = (XLogRecord *) state->readRecordBuf; if (!ValidXLogRecord(state, record, RecPtr)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf); state->ReadRecPtr = RecPtr; @@ -548,7 +603,12 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) /* Record does not cross a page boundary */ if (!ValidXLogRecord(state, record, RecPtr)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } state->EndRecPtr = RecPtr + MAXALIGN(total_len); @@ -652,8 +712,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) /* we can be sure to have enough WAL available, we scrolled back */ Assert(readLen == XLOG_BLCKSZ); - if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, - state->readBuf)) + if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, state->readBuf) && !state->skip_page_validation) goto err; } @@ -690,7 +749,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) /* * Now that we know we have the full header, validate it. */ - if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr)) + if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr) && !state->skip_page_validation) goto err; /* update read state information */ @@ -748,7 +807,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, * We can't exactly verify the prev-link, but surely it should be less * than the record's own address. */ - if (!(record->xl_prev < RecPtr)) + if (!(record->xl_prev < RecPtr) && !state->skip_lsn_checks) { report_invalid_record(state, "record with incorrect prev-link %X/%X at %X/%X", @@ -764,7 +823,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, * check guards against torn WAL pages where a stale but valid-looking * WAL record starts on a sector boundary. */ - if (record->xl_prev != PrevRecPtr) + if (record->xl_prev != PrevRecPtr && !state->skip_lsn_checks) { report_invalid_record(state, "record with incorrect prev-link %X/%X at %X/%X", @@ -907,7 +966,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, * check typically fails when an old WAL segment is recycled, and hasn't * yet been overwritten with new data yet. */ - if (hdr->xlp_pageaddr != recaddr) + if (hdr->xlp_pageaddr != recaddr && !state->skip_lsn_checks) { char fname[MAXFNAMELEN]; diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index d17d660f460..c5d03cd4b83 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -31,6 +31,8 @@ #include "utils/rel.h" +bool (*redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id); + /* GUC variable */ bool ignore_invalid_pages = false; @@ -345,6 +347,21 @@ XLogReadBufferForRedoExtended(XLogReaderState *record, elog(PANIC, "failed to locate backup block with ID %d", block_id); } + if (redo_read_buffer_filter && redo_read_buffer_filter(record, block_id)) + { + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) + { + *buf = ReadBufferWithoutRelcache(rnode, forknum, + blkno, mode, NULL); + return BLK_DONE; + } + else + { + *buf = InvalidBuffer; + return BLK_DONE; + } + } + /* * Make sure that if the block is marked with WILL_INIT, the caller is * going to initialize it. And vice versa. @@ -446,7 +463,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, Assert(blkno != P_NEW); /* Open the relation at smgr level */ - smgr = smgropen(rnode, InvalidBackendId); + smgr = smgropen(rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT); /* * Create the target file if it doesn't already exist. This lets us cope diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 94ab5ca0954..97546f34e9a 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -53,6 +53,7 @@ uint32 bootstrap_data_checksum_version = 0; /* No checksum */ +extern uint64 predefined_sysidentifier; static void CheckerModeMain(void); static void BootstrapModeMain(void); @@ -225,7 +226,7 @@ AuxiliaryProcessMain(int argc, char *argv[]) /* If no -x argument, we are a CheckerProcess */ MyAuxProcType = CheckerProcess; - while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:x:X:-:")) != -1) + while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:s:x:X:-:")) != -1) { switch (flag) { @@ -272,6 +273,16 @@ AuxiliaryProcessMain(int argc, char *argv[]) PGC_S_OVERRIDE); } break; + case 's': + { + char* endptr; +#ifdef HAVE_STRTOULL + predefined_sysidentifier = strtoull(optarg, &endptr, 10); +#else + predefined_sysidentifier = strtoul(optarg, &endptr, 10); +#endif + break; + } case 'c': case '-': { diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index b7fc491a684..b5996db89b4 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -143,7 +143,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) return NULL; /* placate compiler */ } - srel = smgropen(rnode, backend); + srel = smgropen(rnode, backend, relpersistence); smgrcreate(srel, MAIN_FORKNUM, false); if (needs_wal) @@ -175,6 +175,7 @@ void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum) { xl_smgr_create xlrec; + XLogRecPtr lsn; /* * Make an XLOG entry reporting the file creation. @@ -184,7 +185,8 @@ log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE); + lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE); + SetLastWrittenLSNForRelation(lsn, *rnode, forkNum); } /* @@ -651,7 +653,7 @@ smgrDoPendingDeletes(bool isCommit) { SMgrRelation srel; - srel = smgropen(pending->relnode, pending->backend); + srel = smgropen(pending->relnode, pending->backend, 0); /* allocate the initial array, or extend it, if needed */ if (maxrels == 0) @@ -732,7 +734,7 @@ smgrDoPendingSyncs(bool isCommit, bool isParallelWorker) BlockNumber total_blocks = 0; SMgrRelation srel; - srel = smgropen(pendingsync->rnode, InvalidBackendId); + srel = smgropen(pendingsync->rnode, InvalidBackendId, 0); /* * We emit newpage WAL records for smaller relations. @@ -941,7 +943,7 @@ smgr_redo(XLogReaderState *record) xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; - reln = smgropen(xlrec->rnode, InvalidBackendId); + reln = smgropen(xlrec->rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT); smgrcreate(reln, xlrec->forkNum, true); } else if (info == XLOG_SMGR_TRUNCATE) @@ -954,7 +956,7 @@ smgr_redo(XLogReaderState *record) int nforks = 0; bool need_fsm_vacuum = false; - reln = smgropen(xlrec->rnode, InvalidBackendId); + reln = smgropen(xlrec->rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT); /* * Forcibly create relation if it doesn't exist (which suggests that diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index d4ab736c690..680bed295fd 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -664,7 +664,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) /* Record the filesystem change in XLOG */ { xl_dbase_create_rec xlrec; - + XLogRecPtr lsn; xlrec.db_id = dboid; xlrec.tablespace_id = dsttablespace; xlrec.src_db_id = src_dboid; @@ -673,8 +673,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); - (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); + lsn = XLogInsert(RM_DBASE_ID, + XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); + SetLastWrittenLSNForDatabase(lsn); } } table_endscan(scan); @@ -2285,6 +2286,15 @@ dbase_redo(XLogReaderState *record) * We don't need to copy subdirectories */ copydir(src_path, dst_path, false); + + /* + * Make sure any future requests to the page server see the new + * database. + */ + { + XLogRecPtr lsn = record->EndRecPtr; + SetLastWrittenLSNForDatabase(lsn); + } } else if (info == XLOG_DBASE_DROP) { diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 70551522dac..7bc1da96226 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -47,7 +47,6 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 #define X_CLOSING 1 @@ -121,6 +120,7 @@ static void show_eval_params(Bitmapset *bms_params, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static void show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning); +static void show_prefetch_info(ExplainState *es, const PrefetchInfo* prefetch_info); static void show_wal_usage(ExplainState *es, const WalUsage *usage); static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, ExplainState *es); @@ -186,6 +186,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt, es->costs = defGetBoolean(opt); else if (strcmp(opt->defname, "buffers") == 0) es->buffers = defGetBoolean(opt); + else if (strcmp(opt->defname, "prefetch") == 0) + es->prefetch = defGetBoolean(opt); else if (strcmp(opt->defname, "wal") == 0) es->wal = defGetBoolean(opt); else if (strcmp(opt->defname, "settings") == 0) @@ -534,7 +536,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, else if (es->analyze) instrument_option |= INSTRUMENT_ROWS; - if (es->buffers) + if (es->buffers || es->prefetch) instrument_option |= INSTRUMENT_BUFFERS; if (es->wal) instrument_option |= INSTRUMENT_WAL; @@ -2055,6 +2057,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->wal && planstate->instrument) show_wal_usage(es, &planstate->instrument->walusage); + /* Show prefetch usage */ + if (es->prefetch && planstate->instrument) + show_prefetch_info(es, &planstate->instrument->bufusage.prefetch); + /* Prepare per-worker buffer/WAL usage */ if (es->workers_state && (es->buffers || es->wal) && es->verbose) { @@ -3490,6 +3496,34 @@ explain_get_index_name(Oid indexId) return result; } +/* + * Show prefetch statistics + */ +static void +show_prefetch_info(ExplainState *es, const PrefetchInfo* prefetch_info) +{ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Prefetch: hits=%lld misses=%lld expired=%lld duplicates=%lld\n", + (long long) prefetch_info->hits, + (long long) prefetch_info->misses, + (long long) prefetch_info->expired, + (long long) prefetch_info->duplicates); + } + else + { + ExplainPropertyInteger("Prefetch Hits", NULL, + prefetch_info->hits, es); + ExplainPropertyInteger("Prefetch Misses", NULL, + prefetch_info->misses, es); + ExplainPropertyInteger("Prefetch Expired Requests", NULL, + prefetch_info->expired, es); + ExplainPropertyInteger("Prefetch Duplicated Requests", NULL, + prefetch_info->duplicates, es); + } +} + /* * Show buffer usage details. */ diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 98649986e15..109dbd6a944 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -53,7 +53,9 @@ * so we pre-log a few fetches in advance. In the event of * crash we can lose (skip over) as many values as we pre-logged. */ -#define SEQ_LOG_VALS 32 +/* Zenith XXX: to ensure sequence order of sequence in Zenith we need to WAL log each sequence update. */ +/* #define SEQ_LOG_VALS 32 */ +#define SEQ_LOG_VALS 0 /* * The "special area" of a sequence's buffer page looks like this. diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 89fc297fe31..eb7d3b4dee7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14173,7 +14173,7 @@ index_copy_data(Relation rel, RelFileNode newrnode) { SMgrRelation dstrel; - dstrel = smgropen(newrnode, rel->rd_backend); + dstrel = smgropen(newrnode, rel->rd_backend, rel->rd_rel->relpersistence); RelationOpenSmgr(rel); /* diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 2b106d8473c..55cb22250be 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -235,6 +235,10 @@ BufferUsageAdd(BufferUsage *dst, const BufferUsage *add) dst->local_blks_written += add->local_blks_written; dst->temp_blks_read += add->temp_blks_read; dst->temp_blks_written += add->temp_blks_written; + dst->prefetch.hits += add->prefetch.hits; + dst->prefetch.misses += add->prefetch.misses; + dst->prefetch.expired += add->prefetch.expired; + dst->prefetch.duplicates += add->prefetch.duplicates; INSTR_TIME_ADD(dst->blk_read_time, add->blk_read_time); INSTR_TIME_ADD(dst->blk_write_time, add->blk_write_time); } @@ -255,6 +259,10 @@ BufferUsageAccumDiff(BufferUsage *dst, dst->local_blks_written += add->local_blks_written - sub->local_blks_written; dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read; dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written; + dst->prefetch.hits += add->prefetch.hits - sub->prefetch.hits; + dst->prefetch.misses += add->prefetch.misses - sub->prefetch.misses; + dst->prefetch.expired += add->prefetch.expired - sub->prefetch.expired; + dst->prefetch.duplicates += add->prefetch.duplicates - sub->prefetch.duplicates; INSTR_TIME_ACCUM_DIFF(dst->blk_read_time, add->blk_read_time, sub->blk_read_time); INSTR_TIME_ACCUM_DIFF(dst->blk_write_time, diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 350ef5b7d95..1d93111bd03 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -33,6 +33,7 @@ #include "bootstrap/bootstrap.h" #include "common/username.h" +#include "miscadmin.h" #include "port/atomics.h" #include "postmaster/postmaster.h" #include "storage/spin.h" @@ -51,6 +52,41 @@ static void init_locale(const char *categoryname, int category, const char *loca static void help(const char *progname); static void check_root(const char *progname); +typedef int (*MainFunc) (int argc, char *argv[]); + +static int +CallExtMain(char *library_name, char *main_func_name, int argc, char *argv[]) +{ + MainFunc main_func; + + /* + * Perform just enough initialization that we can load external libraries + */ + InitStandaloneProcess(argv[0]); + + SetProcessingMode(InitProcessing); + + /* + * Set default values for command-line options. + */ + InitializeGUCOptions(); + + /* Acquire configuration parameters */ + if (!SelectConfigFiles(NULL, progname)) + exit(1); + + /* + * Imitate we are early in bootstrap loading shared_preload_libraries; + * neon extension sets PGC_POSTMASTER gucs requiring this. + */ + process_shared_preload_libraries_in_progress = true; + + main_func = load_external_function(library_name, main_func_name, true, NULL); + + process_shared_preload_libraries_in_progress = false; + + return main_func(argc, argv); +} /* * Any Postgres server process begins execution here. @@ -205,6 +241,10 @@ main(int argc, char *argv[]) PostgresMain(argc, argv, NULL, /* no dbname */ strdup(get_user_name_or_exit(progname))); /* does not return */ + else if (argc > 1 && strcmp(argv[1], "--wal-redo") == 0) + CallExtMain("neon_walredo", "WalRedoMain", argc, argv); + else if (argc > 1 && strcmp(argv[1], "--sync-safekeepers") == 0) + CallExtMain("neon", "WalProposerSync", argc, argv); else PostmasterMain(argc, argv); /* does not return */ abort(); /* should not get here */ diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 006f91f0a87..ba539cc15b5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -150,6 +150,7 @@ bool enable_parallel_append = true; bool enable_parallel_hash = true; bool enable_partition_pruning = true; bool enable_async_append = true; +bool enable_seqscan_prefetch = true; typedef struct { diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 35cce89e9c9..2dc4ec3d26e 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -156,6 +156,21 @@ InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size) } #endif + /* + * NEON: do not create shared memory segments for single user wal redo + * postgres. Many spawned instances of wal redo may exhaust kernel.shmmni + */ + if (am_wal_redo_postgres) + { + void *ptr = malloc(size); + + if (ptr == NULL) + { + ereport(FATAL, + (errmsg("could not create shared memory segment with size %zu for WAL redo process", size))); + } + return ptr; + } shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection); if (shmid < 0) diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 4831a259c48..9b3f01207f8 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -494,6 +494,13 @@ WalReceiverMain(void) if (endofwal) break; + /* + * Update WAL statistics, which are produced inside + * issue_xlog_fsync function. This is useful for counting + * WAL flushes, by querying pg_stat_wal. + */ + pgstat_send_wal(true); + /* * Ideally we would reuse a WaitEventSet object repeatedly * here to avoid the overheads of WaitLatchOrSocket on epoll diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 28f0a294736..23572609144 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -129,6 +129,12 @@ bool log_replication_commands = false; */ bool wake_wal_senders = false; + +/* + * Backpressure hook, detecting how much we should delay. + */ +uint64 (*delay_backend_us)(void) = NULL; + /* * xlogreader used for replication. Note that a WAL sender doing physical * replication does not need xlogreader to read WAL, but it needs one to @@ -250,8 +256,6 @@ static void WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tran static void WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write); static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid); static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); -static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); -static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch); static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, @@ -1880,7 +1884,7 @@ ProcessStandbyMessage(void) /* * Remember that a walreceiver just confirmed receipt of lsn `lsn`. */ -static void +void PhysicalConfirmReceivedLocation(XLogRecPtr lsn) { bool changed = false; @@ -1919,21 +1923,41 @@ ProcessStandbyReplyMessage(void) flushPtr, applyPtr; bool replyRequested; - TimeOffset writeLag, - flushLag, - applyLag; - bool clearLagTimes; - TimestampTz now; TimestampTz replyTime; - static bool fullyAppliedLastTime = false; - /* the caller already consumed the msgtype byte */ writePtr = pq_getmsgint64(&reply_message); flushPtr = pq_getmsgint64(&reply_message); applyPtr = pq_getmsgint64(&reply_message); replyTime = pq_getmsgint64(&reply_message); replyRequested = pq_getmsgbyte(&reply_message); + ProcessStandbyReply(writePtr, + flushPtr, + applyPtr, + replyTime, + replyRequested); + + elog(LOG, "ProcessStandbyReplyMessage: writelsn %X/%X", + LSN_FORMAT_ARGS(writePtr)); + elog(LOG, "ProcessStandbyReplyMessage: flushlsn %X/%X", + LSN_FORMAT_ARGS(flushPtr)); + elog(LOG, "ProcessStandbyReplyMessage: applylsn %X/%X", + LSN_FORMAT_ARGS(applyPtr)); +} + +void +ProcessStandbyReply(XLogRecPtr writePtr, + XLogRecPtr flushPtr, + XLogRecPtr applyPtr, + TimestampTz replyTime, + bool replyRequested) +{ + TimeOffset writeLag, + flushLag, + applyLag; + bool clearLagTimes; + TimestampTz now; + static bool fullyAppliedLastTime = false; if (message_level_is_interesting(DEBUG2)) { @@ -2116,7 +2140,16 @@ ProcessStandbyHSFeedbackMessage(void) feedbackEpoch = pq_getmsgint(&reply_message, 4); feedbackCatalogXmin = pq_getmsgint(&reply_message, 4); feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4); + ProcessStandbyHSFeedback(replyTime, feedbackXmin, feedbackEpoch, feedbackCatalogXmin, feedbackCatalogEpoch); +} +void +ProcessStandbyHSFeedback(TimestampTz replyTime, + TransactionId feedbackXmin, + uint32 feedbackEpoch, + TransactionId feedbackCatalogXmin, + uint32 feedbackCatalogEpoch) +{ if (message_level_is_interesting(DEBUG2)) { char *replyTimeStr; @@ -2783,30 +2816,33 @@ XLogSendPhysical(void) Assert(nbytes <= MAX_SEND_SIZE); /* - * OK to read and send the slice. - */ - resetStringInfo(&output_message); - pq_sendbyte(&output_message, 'w'); + * OK to read and send the slice. + */ + if (output_message.data) + resetStringInfo(&output_message); + else + initStringInfo(&output_message); + pq_sendbyte(&output_message, 'w'); pq_sendint64(&output_message, startptr); /* dataStart */ pq_sendint64(&output_message, SendRqstPtr); /* walEnd */ pq_sendint64(&output_message, 0); /* sendtime, filled in last */ /* - * Read the log directly into the output buffer to avoid extra memcpy - * calls. - */ + * Read the log directly into the output buffer to avoid extra memcpy + * calls. + */ enlargeStringInfo(&output_message, nbytes); retry: if (!WALRead(xlogreader, - &output_message.data[output_message.len], - startptr, - nbytes, - xlogreader->seg.ws_tli, /* Pass the current TLI because - * only WalSndSegmentOpen controls - * whether new TLI is needed. */ - &errinfo)) + &output_message.data[output_message.len], + startptr, + nbytes, + xlogreader->seg.ws_tli, /* Pass the current TLI because + * only WalSndSegmentOpen controls + * whether new TLI is needed. */ + &errinfo)) WALReadRaiseError(&errinfo); /* See logical_read_xlog_page(). */ @@ -2814,11 +2850,11 @@ XLogSendPhysical(void) CheckXLogRemoved(segno, xlogreader->seg.ws_tli); /* - * During recovery, the currently-open WAL file might be replaced with the - * file of the same name retrieved from archive. So we always need to - * check what we read was valid after reading into the buffer. If it's - * invalid, we try to open and read the file again. - */ + * During recovery, the currently-open WAL file might be replaced with the + * file of the same name retrieved from archive. So we always need to + * check what we read was valid after reading into the buffer. If it's + * invalid, we try to open and read the file again. + */ if (am_cascading_walsender) { WalSnd *walsnd = MyWalSnd; @@ -2968,8 +3004,8 @@ WalSndDone(WalSndSendDataCallback send_data) * flush location if valid, write otherwise. Tools like pg_receivewal will * usually (unless in synchronous mode) return an invalid flush location. */ - replicatedPtr = XLogRecPtrIsInvalid(MyWalSnd->flush) ? - MyWalSnd->write : MyWalSnd->flush; + // XXX Zenith uses flush_lsn to pass extra payload, so use write_lsn here + replicatedPtr = MyWalSnd->write; if (WalSndCaughtUp && sentPtr == replicatedPtr && !pq_is_send_pending()) @@ -3565,7 +3601,7 @@ WalSndKeepaliveIfNecessary(void) * eventually reported to have been written, flushed and applied by the * standby in a reply message. */ -static void +void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time) { bool buffer_full; @@ -3630,7 +3666,7 @@ LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time) * Return -1 if no new sample data is available, and otherwise the elapsed * time in microseconds. */ -static TimeOffset +TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now) { TimestampTz time = 0; diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index a299be10430..c601970cd97 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -23,6 +23,14 @@ ConditionVariableMinimallyPadded *BufferIOCVArray; WritebackContext BackendWritebackContext; CkptSortItem *CkptBufferIds; +/* + * Buffer with target WAL redo page. + * We must not evict this page from the buffer pool, but we cannot just keep it pinned because + * some WAL redo functions expect the page to not be pinned. So we have a special check in + * localbuf.c to prevent this buffer from being evicted. + */ +Buffer wal_redo_buffer; +bool am_wal_redo_postgres = false; /* * Data Structures: diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index d2eb69b5b0c..d2c5ba639cf 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -54,6 +54,7 @@ #include "utils/rel.h" #include "utils/resowner_private.h" #include "utils/timestamp.h" +#include "replication/walsender.h" /* Note: these two macros only work on shared buffers, not local ones! */ @@ -157,6 +158,9 @@ int checkpoint_flush_after = 0; int bgwriter_flush_after = 0; int backend_flush_after = 0; +/* Evict unpinned pages (for better test coverage) */ +bool zenith_test_evict = false; + /* local state for StartBufferIO and related functions */ static BufferDesc *InProgressBuf = NULL; static bool IsForInput; @@ -799,7 +803,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, { bool hit; - SMgrRelation smgr = smgropen(rnode, InvalidBackendId); + SMgrRelation smgr = smgropen(rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT); Assert(InRecovery); @@ -807,7 +811,6 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, mode, strategy, &hit); } - /* * ReadBuffer_common -- common logic for all ReadBuffer variants * @@ -822,7 +825,11 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, Block bufBlock; bool found; bool isExtend; - bool isLocalBuf = SmgrIsTemp(smgr); + /* + * wal_redo postgres is working in single user mode, we do not need to synchronize access to shared buffer, + * so let's use local buffers instead + */ + bool isLocalBuf = SmgrIsTemp(smgr) || am_wal_redo_postgres; *hit = false; @@ -932,11 +939,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); if (!PageIsNew((Page) bufBlock)) - ereport(ERROR, + { + // XXX-ZENITH + MemSet((char *) bufBlock, 0, BLCKSZ); + ereport(DEBUG1, (errmsg("unexpected data beyond EOF in block %u of relation %s", blockNum, relpath(smgr->smgr_rnode, forkNum)), errhint("This has been seen to occur with buggy kernels; consider updating your system."))); - + } /* * We *must* do smgrextend before succeeding, else the page will not * be reserved by the kernel, and the next P_NEW call will decide to @@ -1925,6 +1935,32 @@ UnpinBuffer(BufferDesc *buf, bool fixOwner) UnlockBufHdr(buf, buf_state); } ForgetPrivateRefCountEntry(ref); + + if (zenith_test_evict && !InRecovery) + { + buf_state = LockBufHdr(buf); + if (BUF_STATE_GET_REFCOUNT(buf_state) == 0) + { + if (buf_state & BM_DIRTY) + { + ReservePrivateRefCountEntry(); + PinBuffer_Locked(buf); + if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), + LW_SHARED)) + { + FlushOneBuffer(b); + LWLockRelease(BufferDescriptorGetContentLock(buf)); + } + UnpinBuffer(buf, true); + } + else + { + InvalidateBuffer(buf); + } + } + else + UnlockBufHdr(buf, buf_state); + } } } @@ -2860,7 +2896,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) /* Find smgr relation for buffer */ if (reln == NULL) - reln = smgropen(buf->tag.rnode, InvalidBackendId); + reln = smgropen(buf->tag.rnode, InvalidBackendId, 0); TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum, buf->tag.blockNum, @@ -4867,7 +4903,7 @@ IssuePendingWritebacks(WritebackContext *context) i += ahead; /* and finally tell the kernel to write the data to storage */ - reln = smgropen(tag.rnode, InvalidBackendId); + reln = smgropen(tag.rnode, InvalidBackendId, 0); smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks); } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 04b3558ea33..f22ec0d82df 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -18,12 +18,15 @@ #include "access/parallel.h" #include "catalog/catalog.h" #include "executor/instrument.h" +#include "miscadmin.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner_private.h" +/* ZENITH: prevent eviction of the buffer of target page */ +extern Buffer wal_redo_buffer; /*#define LBDEBUG*/ @@ -182,6 +185,12 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, if (LocalRefCount[b] == 0) { + if (-b - 1 == wal_redo_buffer) + { + /* ZENITH: Prevent eviction of the buffer with target wal redo page */ + continue; + } + buf_state = pg_atomic_read_u32(&bufHdr->state); if (BUF_STATE_GET_USAGECOUNT(buf_state) > 0) @@ -215,7 +224,10 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, Page localpage = (char *) LocalBufHdrGetBlock(bufHdr); /* Find smgr relation for buffer */ - oreln = smgropen(bufHdr->tag.rnode, MyBackendId); + if (am_wal_redo_postgres && MyBackendId == InvalidBackendId) + oreln = smgropen(bufHdr->tag.rnode, MyBackendId, RELPERSISTENCE_PERMANENT); + else + oreln = smgropen(bufHdr->tag.rnode, MyBackendId, RELPERSISTENCE_TEMP); PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index a4be5fe5135..7f27423b091 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -58,8 +58,13 @@ * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE. * The reason is that we'd like large BufFiles to be spread across multiple * tablespaces when available. - */ #define MAX_PHYSICAL_FILESIZE 0x40000000 + */ +/* + * NEON: use smaller temp file to allows swapping them to pageserver. + * 1Gb files cause palloc failure and requires sending file in multiple requests + */ +#define MAX_PHYSICAL_FILESIZE 0x10000000 #define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ) /* diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index e76daff4951..2227a681eb8 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -98,6 +98,7 @@ #include "portability/mem.h" #include "storage/fd.h" #include "storage/ipc.h" +#include "storage/smgr.h" #include "utils/guc.h" #include "utils/resowner_private.h" @@ -188,6 +189,8 @@ int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC; #define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */ #define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */ #define FD_TEMP_FILE_LIMIT (1 << 2) /* T = respect temp_file_limit */ +#define FD_TEMP_FILE_PINNED (1 << 3) /* T = file pinned from been offloaded */ +#define FD_TEMP_FILE_OFFLOADED (1 << 4) /* T = offloaded to pageserver */ typedef struct vfd { @@ -211,6 +214,7 @@ typedef struct vfd */ static Vfd *VfdCache; static Size SizeVfdCache = 0; +static int LastOffloadedFile = 0; /* * Number of file descriptors known to be in use by VFD entries. @@ -293,6 +297,7 @@ static int nextTempTableSpace = 0; * ReleaseLruFiles - Release fd(s) until we're under the max_safe_fds limit * AllocateVfd - grab a free (or new) file record (from VfdCache) * FreeVfd - free a file record + * OffloadTempFiles- NEON: offload temp files to pageserver * * The Least Recently Used ring is a doubly linked list that begins and * ends on element zero. Element zero is special -- it doesn't represent @@ -1404,19 +1409,128 @@ FreeVfd(File file) VfdCache[0].nextFree = file; } +static char const* GetFileName(char const* path) +{ + char const* sep = strrchr(path, '/'); + Assert(sep != NULL); + return sep + 1; +} + +/* NEON: Offload largerst temp file to pageserver. + * In principle, it is not mandatory to locate largest one. We expect that largest number of temp files + * is reated by external sort and we can swap any completely filled file (which size is 1Gb). + * But number of files is not expected to be large in a any case, so locating largest one should not add large exra overhead. + * Attempt to implement some kind of LRU here seems to be useless, because sort is using files as streams. + */ +static void +OffloadTempFiles(File pinned) +{ + VfdCache[pinned].fdstate |= FD_TEMP_FILE_PINNED; + Delete(pinned); /* protect from closing */ + + while (temporary_files_size > (uint64) temp_file_limit * (uint64) 1024) + { + File victimFile = 0; + Vfd* victim; + char* buf; + char const* file_name; + size_t len; + + for (File i = 0; i < SizeVfdCache; i++) + { + int j = LastOffloadedFile + i + 1; + if (j >= SizeVfdCache) + j -= SizeVfdCache; + if ((VfdCache[j].fdstate & (FD_TEMP_FILE_LIMIT | FD_TEMP_FILE_OFFLOADED | FD_TEMP_FILE_PINNED)) == FD_TEMP_FILE_LIMIT) + { + victimFile = j; + break; + } + } + if (victimFile == 0) + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("temporary file size exceeds temp_file_limit (%dkB)", + temp_file_limit))); + + /* Offload file to pageserver */ + victim = &VfdCache[victimFile]; + temporary_files_size -= victim->fileSize; + victim->fdstate |= FD_TEMP_FILE_PINNED; + file_name = GetFileName(victim->fileName); + len = strlen(file_name); + buf = palloc(len + 1 + victim->fileSize); + buf[0] = (char)len; + memcpy(buf+1, file_name, len); + if (FileRead(victimFile, buf + len + 1, victim->fileSize, 0, WAIT_EVENT_DATA_FILE_READ) != victim->fileSize) + { + pfree(buf); + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("Failed to read temp file %s: %m", + victim->fileName))); + } + else + { + RelFileNode dummy_rnode = {0, 0, 0}; + SMgrRelation rel = smgropen(dummy_rnode, InvalidBackendId, 'p'); + smgr_fcntl(rel, SMGR_FCNTL_WRITE_TEMP_FILE, buf, len + 1 + victim->fileSize); + pfree(buf); + } + LruDelete(victimFile); /* close this file descriptor */ + victim->fdstate |= FD_TEMP_FILE_OFFLOADED; + victim->fdstate &= ~FD_TEMP_FILE_PINNED; + /* Need offloaded out file to be writtable */ + victim->fileMode |= O_RDWR; + LastOffloadedFile = victimFile; + + if (truncate(victim->fileName, 0) < 0) + ereport(WARNING, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("Failed to truncate temp file %s: %m", + victim->fileName))); + + } + Insert(pinned); + VfdCache[pinned].fdstate &= ~FD_TEMP_FILE_PINNED; +} + /* returns 0 on success, -1 on re-open failure (with errno set) */ static int FileAccess(File file) { int returnValue; + Vfd* vfdP = &VfdCache[file]; DO_DB(elog(LOG, "FileAccess %d (%s)", - file, VfdCache[file].fileName)); - - /* - * Is the file open? If not, open it and put it at the head of the LRU - * ring (possibly closing the least recently used file to get an FD). - */ + file, vfdP->fileName)); + + /* NEON: download offloaded file */ + if (vfdP->fdstate & FD_TEMP_FILE_OFFLOADED) + { + RelFileNode dummy_rnode = {0, 0, 0}; + SMgrRelation rel = smgropen(dummy_rnode, InvalidBackendId, 'p'); + char const* file_name = GetFileName(vfdP->fileName); + size_t len = strlen(file_name); + char* buf = (char*)palloc(Max(vfdP->fileSize, len + 1)); + buf[0] = (char)len; + memcpy(buf + 1, file_name, len); + returnValue = smgr_fcntl(rel, SMGR_FCNTL_READ_TEMP_FILE, buf, len + 1); + if (returnValue != vfdP->fileSize) + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("Receive %d bytes of offloaded file %s instead of %d expected: %m", + returnValue, vfdP->fileName, (int)vfdP->fileSize))); + vfdP->fdstate &= ~FD_TEMP_FILE_OFFLOADED; + if (FileWrite(file, buf, vfdP->fileSize, 0, WAIT_EVENT_DATA_FILE_WRITE) != vfdP->fileSize) + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("Failed to write offloaded file %s: %m", + vfdP->fileName))); + pfree(buf); + temporary_files_size += vfdP->fileSize; + OffloadTempFiles(file); + } if (FileIsNotOpen(file)) { @@ -1467,7 +1581,7 @@ RegisterTemporaryFile(File file) VfdCache[file].resowner = CurrentResourceOwner; /* Backup mechanism for closing at end of xact. */ - VfdCache[file].fdstate |= FD_CLOSE_AT_EOXACT; + VfdCache[file].fdstate |= FD_CLOSE_AT_EOXACT | FD_TEMP_FILE_LIMIT; have_xact_temporary_files = true; } @@ -1671,6 +1785,7 @@ OpenTemporaryFile(bool interXact) /* Mark it for deletion at close and temporary file size limit */ VfdCache[file].fdstate |= FD_DELETE_AT_CLOSE | FD_TEMP_FILE_LIMIT; + VfdCache[file].fileSize = 0; /* Register it with the current resource owner */ if (!interXact) @@ -1811,7 +1926,33 @@ PathNameOpenTemporaryFile(const char *path, int mode) file = PathNameOpenFile(path, mode | PG_BINARY); - /* If no such file, then we don't raise an error. */ + if (file <= 0 && errno == ENOENT) + { + char* offloaded_path = psprintf("%s.offloaded", path); + struct stat filestats; + + if (stat(offloaded_path, &filestats) == 0) + { + unlink(offloaded_path); + file = PathNameOpenFile(path, (mode & ~O_RDONLY) | PG_BINARY | O_RDWR | O_CREAT | O_TRUNC); + if (file > 0) + { + VfdCache[file].fdstate |= FD_TEMP_FILE_OFFLOADED; + VfdCache[file].fileSize = filestats.st_size; + } + } + } + else if (file > 0) + { + VfdCache[file].fileSize = lseek(VfdCache[file].fd, 0, SEEK_END); + if (VfdCache[file].fileSize < 0) + elog(ERROR, "could get temporary file size \"%s\": %m", + VfdCache[file].fileName); + temporary_files_size += VfdCache[file].fileSize; + OffloadTempFiles(file); + } + + /* If no such file, then we don't raise an error. */ if (file <= 0 && errno != ENOENT) ereport(ERROR, (errcode_for_file_access(), @@ -1827,6 +1968,23 @@ PathNameOpenTemporaryFile(const char *path, int mode) return file; } + +static void UnlinkOffloadedFile(char const* file_path) +{ + RelFileNode dummy_rnode = {0, 0, 0}; + char const* file_name = GetFileName(file_path); + size_t len = strlen(file_name); + char* buf = palloc(len + 1); + SMgrRelation rel; + + buf[0] = (char)len; + memcpy(buf + 1, file_name, len); + rel = smgropen(dummy_rnode, InvalidBackendId, 'p'); + if (smgr_fcntl(rel, SMGR_FCNTL_UNLINK_TEMP_FILE, buf, len + 1) < 0) + elog(ERROR, "Failed to send unlink temp file request to pageserver"); + pfree(buf); +} + /* * Delete a file by pathname. Return true if the file existed, false if * didn't. @@ -1849,16 +2007,40 @@ PathNameDeleteTemporaryFile(const char *path, bool error_on_failure) * many segments it has to delete until it runs out. */ if (stat_errno == ENOENT) - return false; - - if (unlink(path) < 0) { - if (errno != ENOENT) + char* offloaded_path; + offloaded_path = psprintf("%s.offloaded", path); + + if (stat(offloaded_path, &filestats) != 0) + { + pfree(offloaded_path); + return false; + } + if (unlink(offloaded_path) <= 0) + { + pfree(offloaded_path); ereport(error_on_failure ? ERROR : LOG, (errcode_for_file_access(), errmsg("could not unlink temporary file \"%s\": %m", path))); - return false; + return false; + } + UnlinkOffloadedFile(path); + pfree(offloaded_path); + stat_errno = 0; + } + else + { + if (unlink(path) < 0) + { + if (errno != ENOENT) + { + ereport(error_on_failure ? ERROR : LOG, + (errcode_for_file_access(), + errmsg("could not unlink temporary file \"%s\": %m", + path))); + } + } } if (stat_errno == 0) @@ -1911,8 +2093,30 @@ FileClose(File file) if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) { - /* Subtract its size from current usage (do first in case of error) */ - temporary_files_size -= vfdP->fileSize; + if ((vfdP->fdstate & (FD_TEMP_FILE_OFFLOADED | FD_DELETE_AT_CLOSE)) == FD_TEMP_FILE_OFFLOADED) + { + int fd; + char* offloaded_path = psprintf("%s.offloaded", vfdP->fileName); + + unlink(vfdP->fileName); + fd = BasicOpenFile(offloaded_path, O_RDWR | O_CREAT | O_TRUNC); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + offloaded_path))); + /* We need to remember offloaded files size: assume FS support sparse files and will not actually allocate all requested space */ + if (ftruncate(fd, vfdP->fileSize) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", + offloaded_path))); + close(fd); + pfree(offloaded_path); + } + /* Subtract its size from current usage (do first in case of error) */ + if (!(vfdP->fdstate & FD_TEMP_FILE_OFFLOADED)) + temporary_files_size -= vfdP->fileSize; vfdP->fileSize = 0; } @@ -1940,6 +2144,10 @@ FileClose(File file) else stat_errno = 0; + + if (vfdP->fdstate & FD_TEMP_FILE_OFFLOADED) + UnlinkOffloadedFile(vfdP->fileName); + /* in any case do the unlink */ if (unlink(vfdP->fileName)) ereport(LOG, @@ -2104,31 +2312,6 @@ FileWrite(File file, char *buffer, int amount, off_t offset, vfdP = &VfdCache[file]; - /* - * If enforcing temp_file_limit and it's a temp file, check to see if the - * write would overrun temp_file_limit, and throw error if so. Note: it's - * really a modularity violation to throw error here; we should set errno - * and return -1. However, there's no way to report a suitable error - * message if we do that. All current callers would just throw error - * immediately anyway, so this is safe at present. - */ - if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT)) - { - off_t past_write = offset + amount; - - if (past_write > vfdP->fileSize) - { - uint64 newTotal = temporary_files_size; - - newTotal += past_write - vfdP->fileSize; - if (newTotal > (uint64) temp_file_limit * (uint64) 1024) - ereport(ERROR, - (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), - errmsg("temporary file size exceeds temp_file_limit (%dkB)", - temp_file_limit))); - } - } - retry: errno = 0; pgstat_report_wait_start(wait_event_info); @@ -2152,6 +2335,8 @@ FileWrite(File file, char *buffer, int amount, off_t offset, { temporary_files_size += past_write - vfdP->fileSize; vfdP->fileSize = past_write; + /* NEON: instead of throwing error, swap-out them to page server.*/ + OffloadTempFiles(file); } } } @@ -2211,6 +2396,9 @@ FileSize(File file) DO_DB(elog(LOG, "FileSize %d (%s)", file, VfdCache[file].fileName)); + if (VfdCache[file].fdstate & FD_TEMP_FILE_LIMIT) + return VfdCache[file].fileSize; + if (FileIsNotOpen(file)) { if (FileAccess(file) < 0) @@ -2238,11 +2426,14 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info) returnCode = ftruncate(VfdCache[file].fd, offset); pgstat_report_wait_end(); - if (returnCode == 0 && VfdCache[file].fileSize > offset) + if (returnCode == 0) { - /* adjust our state for truncation of a temp file */ - Assert(VfdCache[file].fdstate & FD_TEMP_FILE_LIMIT); - temporary_files_size -= VfdCache[file].fileSize - offset; + if (VfdCache[file].fileSize > offset) + { + /* adjust our state for truncation of a temp file */ + Assert(VfdCache[file].fdstate & FD_TEMP_FILE_LIMIT); + temporary_files_size -= VfdCache[file].fileSize - offset; + } VfdCache[file].fileSize = offset; } diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 8c12dda2380..abd5f3de0cf 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -637,10 +637,18 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) while (fsm_nblocks_now < fsm_nblocks) { - PageSetChecksumInplace((Page) pg.data, fsm_nblocks_now); + /* + * ZENITH: Initialize FSM pages through buffer cache to prevent loading + * them from pageserver. + */ + Buffer buffer = ReadBufferExtended(rel, FSM_FORKNUM, P_NEW, RBM_ZERO_AND_LOCK, NULL); + Page page = BufferGetPage(buffer); + + PageInit((Page) page, BLCKSZ, 0); + PageSetChecksumInplace(page, fsm_nblocks_now); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); - smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now, - pg.data, false); fsm_nblocks_now++; } diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 3e4ec53a97e..de498c21dba 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -150,6 +150,7 @@ CreateSharedMemoryAndSemaphores(void) size = add_size(size, BTreeShmemSize()); size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); + #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index 6c7cf6c2956..b4652c33ff6 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -53,3 +53,4 @@ XactTruncationLock 44 # 45 was XactTruncationLock until removal of BackendRandomLock WrapLimitsVacuumLock 46 NotifyQueueTailLock 47 +LastWrittenLsnLock 48 diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index b4bca7eed6f..b20933b6713 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -574,6 +574,16 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) return true; } + +/* + * mdfcntl() -- SMGR specific operation + */ +int +mdfcntl(SMgrRelation reln, int cmd, void* data, size_t size) +{ + return 0; +} + /* * mdwriteback() -- Tell the kernel to write pages back to storage. * @@ -766,7 +776,13 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) BlockNumber nblocks; BlockNumber segno; - mdopenfork(reln, forknum, EXTENSION_FAIL); + /* NEON: md smgr is used in Neon for unlogged and temp relations. + * After compute node restart their data is deleted but unlogged tables are still present in system catalog. + * This is a difference with Vanilla Postgres where unlogged relations are truncated only after abnormal termination. + * To avoid "could not open file" we have to use EXTENSION_RETURN_NULL hear instead of EXTENSION_FAIL + */ + if (!mdopenfork(reln, forknum, RelFileNodeBackendIsTemp(reln->smgr_rnode) ? EXTENSION_FAIL : EXTENSION_RETURN_NULL)) + return 0; /* mdopen has opened the first segment */ Assert(reln->md_num_open_segs[forknum] > 0); @@ -1055,7 +1071,7 @@ DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo) srels = palloc(sizeof(SMgrRelation) * ndelrels); for (i = 0; i < ndelrels; i++) { - SMgrRelation srel = smgropen(delrels[i], InvalidBackendId); + SMgrRelation srel = smgropen(delrels[i], InvalidBackendId, 0); if (isRedo) { @@ -1333,7 +1349,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) int mdsyncfiletag(const FileTag *ftag, char *path) { - SMgrRelation reln = smgropen(ftag->rnode, InvalidBackendId); + SMgrRelation reln = smgropen(ftag->rnode, InvalidBackendId, 0); File file; bool need_to_close; int result, diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 4dc24649df9..022eeb476b3 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "access/xlog.h" +#include "catalog/pg_tablespace.h" #include "lib/ilist.h" #include "storage/bufmgr.h" #include "storage/ipc.h" @@ -26,47 +27,8 @@ #include "utils/hsearch.h" #include "utils/inval.h" - -/* - * This struct of function pointers defines the API between smgr.c and - * any individual storage manager module. Note that smgr subfunctions are - * generally expected to report problems via elog(ERROR). An exception is - * that smgr_unlink should use elog(WARNING), rather than erroring out, - * because we normally unlink relations during post-commit/abort cleanup, - * and so it's too late to raise an error. Also, various conditions that - * would normally be errors should be allowed during bootstrap and/or WAL - * recovery --- see comments in md.c for details. - */ -typedef struct f_smgr -{ - void (*smgr_init) (void); /* may be NULL */ - void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, - bool isRedo); - void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool skipFsync); - bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); - void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer); - void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool skipFsync); - void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); -} f_smgr; - -static const f_smgr smgrsw[] = { +static const f_smgr smgr_md = { /* magnetic disk */ - { .smgr_init = mdinit, .smgr_shutdown = NULL, .smgr_open = mdopen, @@ -82,11 +44,9 @@ static const f_smgr smgrsw[] = { .smgr_nblocks = mdnblocks, .smgr_truncate = mdtruncate, .smgr_immedsync = mdimmedsync, - } + .smgr_fcntl = mdfcntl, }; -static const int NSmgr = lengthof(smgrsw); - /* * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unowned" SMgrRelation objects are chained together in a list. @@ -96,7 +56,7 @@ static HTAB *SMgrRelationHash = NULL; static dlist_head unowned_relns; /* local function prototypes */ -static void smgrshutdown(int code, Datum arg); +//static void smgrshutdown(int code, Datum arg); /* @@ -110,40 +70,80 @@ static void smgrshutdown(int code, Datum arg); void smgrinit(void) { - int i; + (*smgr_init_hook)(); - for (i = 0; i < NSmgr; i++) - { - if (smgrsw[i].smgr_init) - smgrsw[i].smgr_init(); - } + /* + * ZENITH XXX + * This doesn't work with inmem_smgr, so temporarily disable. + * Anyway, we don't have any real smgrshutdown function. + */ + // /* register the shutdown proc */ + // on_proc_exit(smgrshutdown, 0); +} - /* register the shutdown proc */ - on_proc_exit(smgrshutdown, 0); +//ZENITH XXX See comment above. Silence compiler warning. +// /* +// * on_proc_exit hook for smgr cleanup during backend shutdown +// */ +// static void +// smgrshutdown(int code, Datum arg) +// { +// if (smgr_shutdown_hook) +// (*smgr_shutdown_hook)(); + +// smgr_shutdown_standard(); +// } + +/* Hook for plugins to get control in smgr */ +smgr_hook_type smgr_hook = NULL; +smgr_init_hook_type smgr_init_hook = smgr_init_standard; +smgr_shutdown_hook_type smgr_shutdown_hook = NULL; + +const f_smgr * +smgr_standard(BackendId backend, RelFileNode rnode) +{ + return &smgr_md; } -/* - * on_proc_exit hook for smgr cleanup during backend shutdown - */ -static void -smgrshutdown(int code, Datum arg) +void +smgr_init_standard(void) { - int i; + mdinit(); +} + +void +smgr_shutdown_standard(void) +{ +} - for (i = 0; i < NSmgr; i++) +const f_smgr * +smgr(BackendId backend, RelFileNode rnode) +{ + const f_smgr *result; + + if (smgr_hook) { - if (smgrsw[i].smgr_shutdown) - smgrsw[i].smgr_shutdown(); + result = (*smgr_hook)(backend, rnode); } + else + result = smgr_standard(backend, rnode); + + return result; } + /* * smgropen() -- Return an SMgrRelation object, creating it if need be. * * This does not attempt to actually open the underlying file. + * + * The caller should pass the value of pg_class.relpersistence, if they know + * it, or 0 if unknown. Some operations, like smgrwrite() and smgrunlink() + * are allowed when relpersistence is not known, but others like smgrread() + * require it. */ SMgrRelation -smgropen(RelFileNode rnode, BackendId backend) +smgropen(RelFileNode rnode, BackendId backend, char relpersistence) { RelFileNodeBackend brnode; SMgrRelation reln; @@ -174,16 +174,33 @@ smgropen(RelFileNode rnode, BackendId backend) /* hash_search already filled in the lookup key */ reln->smgr_owner = NULL; reln->smgr_targblock = InvalidBlockNumber; + reln->smgr_relpersistence = relpersistence; for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = 0; /* we only have md.c at present */ + + reln->smgr = smgr(backend, rnode); /* implementation-specific initialization */ - smgrsw[reln->smgr_which].smgr_open(reln); + (*reln->smgr).smgr_open(reln); /* it has no owner yet */ dlist_push_tail(&unowned_relns, &reln->node); } + else + { + /* + * If the caller passed a valid 'relpersistence', and it was unknown + * before, update it. + */ + if (reln->smgr_relpersistence == 0) + reln->smgr_relpersistence = relpersistence; + else + { + if (!(relpersistence == 0 || reln->smgr_relpersistence == relpersistence)) + elog(ERROR, "relpersistence mismatch: smgropen %c vs SmgrRelation %c", + relpersistence, reln->smgr_relpersistence); + } + } return reln; } @@ -246,7 +263,7 @@ smgrclearowner(SMgrRelation *owner, SMgrRelation reln) bool smgrexists(SMgrRelation reln, ForkNumber forknum) { - return smgrsw[reln->smgr_which].smgr_exists(reln, forknum); + return (*reln->smgr).smgr_exists(reln, forknum); } /* @@ -259,7 +276,7 @@ smgrclose(SMgrRelation reln) ForkNumber forknum; for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[reln->smgr_which].smgr_close(reln, forknum); + (*reln->smgr).smgr_close(reln, forknum); owner = reln->smgr_owner; @@ -332,7 +349,7 @@ smgrclosenode(RelFileNodeBackend rnode) void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) { - smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); + (*reln->smgr).smgr_create(reln, forknum, isRedo); } /* @@ -360,12 +377,10 @@ smgrdosyncall(SMgrRelation *rels, int nrels) */ for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) { - if (smgrsw[which].smgr_exists(rels[i], forknum)) - smgrsw[which].smgr_immedsync(rels[i], forknum); + if ((*rels[i]->smgr).smgr_exists(rels[i], forknum)) + (*rels[i]->smgr).smgr_immedsync(rels[i], forknum); } } } @@ -404,13 +419,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { RelFileNodeBackend rnode = rels[i]->smgr_rnode; - int which = rels[i]->smgr_which; rnodes[i] = rnode; /* Close the forks at smgr level */ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_close(rels[i], forknum); + (*rels[i]->smgr).smgr_close(rels[i], forknum); } /* @@ -439,10 +453,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo); + (*rels[i]->smgr).smgr_unlink(rnodes[i], forknum, isRedo); } pfree(rnodes); @@ -462,7 +474,7 @@ void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, + (*reln->smgr).smgr_extend(reln, forknum, blocknum, buffer, skipFsync); /* @@ -486,7 +498,16 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) { - return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum); + return (*reln->smgr).smgr_prefetch(reln, forknum, blocknum); +} + +/* + * smgr_fcntl() -- Perform storage manager specific call + */ +int +smgr_fcntl(SMgrRelation reln, int cmd, void* data, size_t size) +{ + return (*reln->smgr).smgr_fcntl(reln, cmd, data, size); } /* @@ -501,7 +522,7 @@ void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer) { - smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer); + (*reln->smgr).smgr_read(reln, forknum, blocknum, buffer); } /* @@ -523,7 +544,7 @@ void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum, + (*reln->smgr).smgr_write(reln, forknum, blocknum, buffer, skipFsync); } @@ -536,7 +557,7 @@ void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { - smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, + (*reln->smgr).smgr_writeback(reln, forknum, blocknum, nblocks); } @@ -554,7 +575,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) if (result != InvalidBlockNumber) return result; - result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + result = (*reln->smgr).smgr_nblocks(reln, forknum); reln->smgr_cached_nblocks[forknum] = result; @@ -620,7 +641,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb /* Make the cached size is invalid if we encounter an error. */ reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber; - smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]); + (*reln->smgr).smgr_truncate(reln, forknum[i], nblocks[i]); /* * We might as well update the local smgr_cached_nblocks values. The @@ -659,7 +680,31 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb void smgrimmedsync(SMgrRelation reln, ForkNumber forknum) { - smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); + (*reln->smgr).smgr_immedsync(reln, forknum); +} + +/* + * Zenith-added functions to mark the phases of an unlogged index build. + */ +void +smgr_start_unlogged_build(SMgrRelation reln) +{ + if ((*reln->smgr).smgr_start_unlogged_build) + (*reln->smgr).smgr_start_unlogged_build(reln); +} + +void +smgr_finish_unlogged_build_phase_1(SMgrRelation reln) +{ + if ((*reln->smgr).smgr_finish_unlogged_build_phase_1) + (*reln->smgr).smgr_finish_unlogged_build_phase_1(reln); +} + +void +smgr_end_unlogged_build(SMgrRelation reln) +{ + if ((*reln->smgr).smgr_end_unlogged_build) + (*reln->smgr).smgr_end_unlogged_build(reln); } /* diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2278d8569ed..aa0cea03ff1 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -181,6 +181,8 @@ static ProcSignalReason RecoveryConflictReason; static MemoryContext row_description_context = NULL; static StringInfoData row_description_buf; +process_interrupts_callback_t ProcessInterruptsCallback; + /* ---------------------------------------------------------------- * decls for routines only used in this file * ---------------------------------------------------------------- @@ -3134,6 +3136,7 @@ ProcessInterrupts(void) return; InterruptPending = false; + Retry: if (ProcDiePending) { ProcDiePending = false; @@ -3367,6 +3370,13 @@ ProcessInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Call registered callback if any */ + if (ProcessInterruptsCallback) + { + if (ProcessInterruptsCallback()) + goto Retry; + } } diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 1a30faf8ad4..1060ddf3037 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -491,6 +491,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w) case WAIT_EVENT_VACUUM_DELAY: event_name = "VacuumDelay"; break; + case WAIT_EVENT_BACK_PRESSURE: + event_name = "BackPressure"; + break; /* no default case, so that compiler will warn */ } diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 9de2ed09d99..9f4edbc60d8 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -23,6 +23,7 @@ #include "commands/tablespace.h" #include "miscadmin.h" #include "storage/fd.h" +#include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/numeric.h" @@ -76,6 +77,8 @@ db_dir_size(const char *path) return dirsize; } +dbsize_hook_type dbsize_hook = NULL; + /* * calculate size of database in all tablespaces */ @@ -105,6 +108,13 @@ calculate_database_size(Oid dbOid) /* Include pg_default storage */ snprintf(pathname, sizeof(pathname), "base/%u", dbOid); + + if (dbsize_hook) + { + totalsize = (*dbsize_hook)(dbOid); + return totalsize; + } + totalsize = db_dir_size(pathname); /* Scan the non-default tablespaces */ @@ -270,41 +280,17 @@ pg_tablespace_size_name(PG_FUNCTION_ARGS) * is no check here or at the call sites for that. */ static int64 -calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum) +calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum, char relpersistence) { - int64 totalsize = 0; - char *relationpath; - char pathname[MAXPGPATH]; - unsigned int segcount = 0; + SMgrRelation srel = smgropen(*rfn, backend, relpersistence); - relationpath = relpathbackend(*rfn, backend, forknum); - - for (segcount = 0;; segcount++) + if (smgrexists(srel, forknum)) { - struct stat fst; - - CHECK_FOR_INTERRUPTS(); - - if (segcount == 0) - snprintf(pathname, MAXPGPATH, "%s", - relationpath); - else - snprintf(pathname, MAXPGPATH, "%s.%u", - relationpath, segcount); + BlockNumber n = smgrnblocks(srel, forknum); - if (stat(pathname, &fst) < 0) - { - if (errno == ENOENT) - break; - else - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", pathname))); - } - totalsize += fst.st_size; + return (int64) n * BLCKSZ; } - - return totalsize; + return 0; } Datum @@ -328,7 +314,8 @@ pg_relation_size(PG_FUNCTION_ARGS) PG_RETURN_NULL(); size = calculate_relation_size(&(rel->rd_node), rel->rd_backend, - forkname_to_number(text_to_cstring(forkName))); + forkname_to_number(text_to_cstring(forkName)), + rel->rd_rel->relpersistence); relation_close(rel, AccessShareLock); @@ -353,7 +340,8 @@ calculate_toast_table_size(Oid toastrelid) /* toast heap size, including FSM and VM size */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(toastRel->rd_node), - toastRel->rd_backend, forkNum); + toastRel->rd_backend, forkNum, + toastRel->rd_rel->relpersistence); /* toast index size, including FSM and VM size */ indexlist = RelationGetIndexList(toastRel); @@ -367,7 +355,8 @@ calculate_toast_table_size(Oid toastrelid) AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(toastIdxRel->rd_node), - toastIdxRel->rd_backend, forkNum); + toastIdxRel->rd_backend, forkNum, + toastIdxRel->rd_rel->relpersistence); relation_close(toastIdxRel, AccessShareLock); } @@ -396,7 +385,8 @@ calculate_table_size(Relation rel) */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(rel->rd_node), rel->rd_backend, - forkNum); + forkNum, + rel->rd_rel->relpersistence); /* * Size of toast relation @@ -436,7 +426,8 @@ calculate_indexes_size(Relation rel) for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(idxRel->rd_node), idxRel->rd_backend, - forkNum); + forkNum, + idxRel->rd_rel->relpersistence); relation_close(idxRel, AccessShareLock); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index ef62bb81885..a2718706129 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -87,6 +87,7 @@ #include "storage/pg_shmem.h" #include "storage/predicate.h" #include "storage/proc.h" +#include "storage/smgr.h" #include "storage/standby.h" #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" @@ -954,6 +955,16 @@ static const unit_conversion time_unit_conversion_table[] = static struct config_bool ConfigureNamesBool[] = { + { + {"enable_seqscan_prefetch", PGC_USERSET, RESOURCES_ASYNCHRONOUS, + gettext_noop("Enables prefetching of next pages in sequential scans."), + NULL, + GUC_EXPLAIN + }, + &enable_seqscan_prefetch, + true, + NULL, NULL, NULL + }, { {"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of sequential-scan plans."), @@ -2123,6 +2134,16 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"neon_test_evict", PGC_POSTMASTER, UNGROUPED, + gettext_noop("Evict unpinned pages (for better test coverage)"), + }, + &zenith_test_evict, + false, + NULL, NULL, NULL + }, + + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -2344,6 +2365,16 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"lsn_cache_size", PGC_POSTMASTER, UNGROUPED, + gettext_noop("Size of last written LSN cache used by Neon."), + NULL + }, + &lastWrittenLsnCacheSize, + 128*1024, 1024, INT_MAX, + NULL, NULL, NULL + }, + { {"temp_buffers", PGC_USERSET, RESOURCES_MEM, gettext_noop("Sets the maximum number of temporary buffers used by each session."), @@ -2888,6 +2919,42 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"max_replication_apply_lag", PGC_POSTMASTER, REPLICATION_SENDING, + gettext_noop("Maximal write lag between master and replicas."), + gettext_noop("When lag between minimal apply position of replica and current LSN exceeds this value," + "backends are blocked."), + GUC_UNIT_MB, + }, + &max_replication_apply_lag, + -1, -1, INT_MAX, /* it should not be smaller than maximal size of WAL record */ + NULL, NULL, NULL + }, + + { + {"max_replication_flush_lag", PGC_POSTMASTER, REPLICATION_SENDING, + gettext_noop("Maximal flush lag between master and replicas."), + gettext_noop("When lag between minimal flush position of replica and current LSN exceeds this value," + "backends are blocked"), + GUC_UNIT_MB, + }, + &max_replication_flush_lag, + -1, -1, INT_MAX, /* it should not be smaller than maximal size of WAL record */ + NULL, NULL, NULL + }, + + { + {"max_replication_write_lag", PGC_POSTMASTER, REPLICATION_SENDING, + gettext_noop("Maximal write lag between master and replicas."), + gettext_noop("When lag between minimal write position of replica and current LSN exceeds this value," + "backends are blocked"), + GUC_UNIT_MB, + }, + &max_replication_write_lag, + -1, -1, INT_MAX, /* it should not be smaller than maximal size of WAL record */ + NULL, NULL, NULL + }, + { {"max_slot_wal_keep_size", PGC_SIGHUP, REPLICATION_SENDING, gettext_noop("Sets the maximum WAL size that can be reserved by replication slots."), diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 77e621a7679..6e09e22062c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2948,6 +2948,7 @@ main(int argc, char *argv[]) {"data-checksums", no_argument, NULL, 'k'}, {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, + {"sysid", required_argument, NULL, 15}, {NULL, 0, NULL, 0} }; @@ -3094,6 +3095,9 @@ main(int argc, char *argv[]) extra_options, "-c debug_discard_caches=1"); break; + case 15: + boot_options = psprintf("%s -s %s", boot_options, optarg); + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 2daed328e7d..bb4062e17ea 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -13,9 +13,11 @@ #include "postgres.h" #include +#include #include #include + #include "access/transam.h" #include "access/xlog_internal.h" #include "access/xlogreader.h" @@ -23,8 +25,11 @@ #include "common/fe_memutils.h" #include "common/logging.h" #include "getopt_long.h" +#include "port/pg_bitutils.h" #include "rmgrdesc.h" +#define OFFSET_INVALID ((size_t)-1) + static const char *progname; static int WalSegSz; @@ -35,6 +40,7 @@ typedef struct XLogDumpPrivate XLogRecPtr startptr; XLogRecPtr endptr; bool endptr_reached; + char* input_filename; } XLogDumpPrivate; typedef struct XLogDumpConfig @@ -52,6 +58,7 @@ typedef struct XLogDumpConfig int filter_by_rmgr; TransactionId filter_by_xid; bool filter_by_xid_enabled; + bool ignore_format_errors; } XLogDumpConfig; typedef struct Stats @@ -70,8 +77,36 @@ typedef struct XLogDumpStats Stats record_stats[RM_NEXT_ID][MAX_XLINFO_TYPES]; } XLogDumpStats; + #define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0) +/* calculate ceil(log base 2) of num */ +static int +my_log2(long num) +{ + /* + * guard against too-large input, which would be invalid for + * pg_ceil_log2_*() + */ + if (num > LONG_MAX / 2) + num = LONG_MAX / 2; + +#if SIZEOF_LONG < 8 + return pg_ceil_log2_32(num); +#else + return pg_ceil_log2_64(num); +#endif +} + +/* calculate first power of 2 >= num, bounded to what will fit in an int */ +static int +next_pow2_int(long num) +{ + if (num > INT_MAX / 2) + num = INT_MAX / 2; + return 1 << my_log2(num); +} + static void print_rmgr_list(void) { @@ -284,6 +319,18 @@ WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID tli = *tli_p; char fname[MAXPGPATH]; int tries; + XLogDumpPrivate *private = state->private_data; + + if(private->input_filename) + { + Assert(nextSegNo == 0); + + state->seg.ws_file = open_file_in_directory(state->segcxt.ws_dir, private->input_filename); + if (state->seg.ws_file >= 0) + return; + + fatal_error("could not open file \"%s\": %m", private->input_filename); + } XLogFileName(fname, tli, nextSegNo, state->segcxt.ws_segsize); @@ -354,6 +401,7 @@ WALDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, { WALOpenSegment *seg = &errinfo.wre_seg; char fname[MAXPGPATH]; + char *actual_fname = private->input_filename ? private->input_filename : fname; XLogFileName(fname, seg->ws_tli, seg->ws_segno, state->segcxt.ws_segsize); @@ -362,11 +410,11 @@ WALDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, { errno = errinfo.wre_errno; fatal_error("could not read from file %s, offset %u: %m", - fname, errinfo.wre_off); + actual_fname, errinfo.wre_off); } else fatal_error("could not read from file %s, offset %u: read %d of %zu", - fname, errinfo.wre_off, errinfo.wre_read, + actual_fname, errinfo.wre_off, errinfo.wre_read, (Size) errinfo.wre_req); } @@ -465,16 +513,25 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) int block_id; uint8 info = XLogRecGetInfo(record); XLogRecPtr xl_prev = XLogRecGetPrev(record); + XLogDumpPrivate *private = record->private_data; StringInfoData s; XLogDumpRecordLen(record, &rec_len, &fpi_len); - printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", + if(private->input_filename) + printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, offset: 0x%lX, prev %X/%08X, ", desc->rm_name, rec_len, XLogRecGetTotalLen(record), XLogRecGetXid(record), - LSN_FORMAT_ARGS(record->ReadRecPtr), + record->ReadRecPtr, LSN_FORMAT_ARGS(xl_prev)); + else + printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", + desc->rm_name, + rec_len, XLogRecGetTotalLen(record), + XLogRecGetXid(record), + LSN_FORMAT_ARGS(record->ReadRecPtr), + LSN_FORMAT_ARGS(xl_prev)); id = desc->rm_identify(info); if (id == NULL) @@ -726,7 +783,10 @@ usage(void) printf(_(" -b, --bkp-details output detailed information about backup blocks\n")); printf(_(" -e, --end=RECPTR stop reading at WAL location RECPTR\n")); printf(_(" -f, --follow keep retrying after reaching end of WAL\n")); + printf(_(" -F, --file=FNAME dump log records from a single file\n")); + printf(_(" -i, --ignore ignore format errors, skip invalid structures\n")); printf(_(" -n, --limit=N number of records to display\n")); + printf(_(" -o, --offset=OFFSET offset of the first record to in a file to dump\n")); printf(_(" -p, --path=PATH directory in which to find log segment files or a\n" " directory with a ./pg_wal that contains such files\n" " (default: current directory, ./pg_wal, $PGDATA/pg_wal)\n")); @@ -757,14 +817,20 @@ main(int argc, char **argv) XLogRecord *record; XLogRecPtr first_record; char *waldir = NULL; + char *fname = NULL; char *errormsg; + bool single_file = false; + size_t start_offset = OFFSET_INVALID; static struct option long_options[] = { {"bkp-details", no_argument, NULL, 'b'}, {"end", required_argument, NULL, 'e'}, {"follow", no_argument, NULL, 'f'}, + {"file", required_argument, NULL, 'F'}, {"help", no_argument, NULL, '?'}, + {"ignore", no_argument, NULL, 'i'}, {"limit", required_argument, NULL, 'n'}, + {"offset", required_argument, NULL, 'o'}, {"path", required_argument, NULL, 'p'}, {"quiet", no_argument, NULL, 'q'}, {"rmgr", required_argument, NULL, 'r'}, @@ -805,6 +871,7 @@ main(int argc, char **argv) private.startptr = InvalidXLogRecPtr; private.endptr = InvalidXLogRecPtr; private.endptr_reached = false; + private.input_filename = NULL; config.quiet = false; config.bkp_details = false; @@ -816,6 +883,7 @@ main(int argc, char **argv) config.filter_by_xid_enabled = false; config.stats = false; config.stats_per_record = false; + config.ignore_format_errors = false; if (argc <= 1) { @@ -823,7 +891,7 @@ main(int argc, char **argv) goto bad_argument; } - while ((option = getopt_long(argc, argv, "be:fn:p:qr:s:t:x:z", + while ((option = getopt_long(argc, argv, "be:fF:in:o:p:qr:s:t:x:z", long_options, &optindex)) != -1) { switch (option) @@ -843,6 +911,13 @@ main(int argc, char **argv) case 'f': config.follow = true; break; + case 'F': + fname = pg_strdup(optarg); + single_file = true; + break; + case 'i': + config.ignore_format_errors = true; + break; case 'n': if (sscanf(optarg, "%d", &config.stop_after_records) != 1) { @@ -850,6 +925,13 @@ main(int argc, char **argv) goto bad_argument; } break; + case 'o': + if (sscanf(optarg, "%zu", &start_offset) != 1) + { + pg_log_error("could not parse offset \"%s\"", optarg); + goto bad_argument; + } + break; case 'p': waldir = pg_strdup(optarg); break; @@ -936,6 +1018,73 @@ main(int argc, char **argv) goto bad_argument; } + if (start_offset != OFFSET_INVALID) + { + if(!XLogRecPtrIsInvalid(private.startptr) || !XLogRecPtrIsInvalid(private.endptr)) + { + pg_log_error("either file offset or start/end pointers should be specified"); + goto bad_argument; + } + + if(!single_file) + { + pg_log_error("offset option could only be used with filename option"); + goto bad_argument; + } + + /* Log records are maxaligned, start at the closest next position */ + private.startptr = MAXALIGN(start_offset); + } + + if(single_file) + { + char *directory = NULL; + int fd; + struct stat stat; + + if(config.follow) + { + pg_log_error("Follow could not be used in file dump mode"); + goto bad_argument; + } + + if (waldir != NULL) + { + pg_log_error("either single file or wal directory should be specified"); + goto bad_argument; + } + + split_path(fname, &directory, &private.input_filename); + waldir = directory; + + if(waldir == NULL) + { + char *cwd = malloc(MAXPGPATH); + + if (!getcwd(cwd, MAXPGPATH)) + fatal_error("could identify current directory: %m"); + + waldir = cwd; + } + + if (!verify_directory(waldir)) + fatal_error("could not open directory \"%s\": %m", waldir); + + fd = open_file_in_directory(waldir, private.input_filename); + if (fd < 0) + fatal_error("could not open file \"%s\"", private.input_filename); + + if(fstat(fd, &stat) != 0) + fatal_error("could not stat file \"%s\"", private.input_filename); + + private.endptr = stat.st_size; + + /* Round up segment size to next power of 2 or 1MB */ + WalSegSz = Max(next_pow2_int(private.endptr), 1024 * 1024); + + close(fd); + } + if (waldir != NULL) { /* validate path points to directory */ @@ -954,6 +1103,12 @@ main(int argc, char **argv) int fd; XLogSegNo segno; + if(single_file) + { + pg_log_error("either single file or start/end boundaries should be specified"); + goto bad_argument; + } + split_path(argv[optind], &directory, &fname); if (waldir == NULL && directory != NULL) @@ -1026,10 +1181,11 @@ main(int argc, char **argv) } } else - waldir = identify_target_directory(waldir, NULL); + if (!single_file) + waldir = identify_target_directory(waldir, NULL); /* we don't know what to print */ - if (XLogRecPtrIsInvalid(private.startptr)) + if (XLogRecPtrIsInvalid(private.startptr) && !single_file) { pg_log_error("no start WAL location given"); goto bad_argument; @@ -1047,12 +1203,28 @@ main(int argc, char **argv) if (!xlogreader_state) fatal_error("out of memory"); - /* first find a valid recptr to start from */ - first_record = XLogFindNextRecord(xlogreader_state, private.startptr); + if(single_file) + { + if(config.ignore_format_errors) + { + xlogreader_state->skip_page_validation = true; + xlogreader_state->skip_invalid_records = true; + } + + xlogreader_state->skip_lsn_checks = true; - if (first_record == InvalidXLogRecPtr) - fatal_error("could not find a valid record after %X/%X", + first_record = private.startptr; + XLogBeginRead(xlogreader_state, first_record); + } + else + { + /* first find a valid recptr to start from */ + first_record = XLogFindNextRecord(xlogreader_state, private.startptr); + + if (first_record == InvalidXLogRecPtr) + fatal_error("could not find a valid record after %X/%X", LSN_FORMAT_ARGS(private.startptr)); + } /* * Display a message that we're skipping data if `from` wasn't a pointer diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 4f1dff9ca1b..fd67d500dc6 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -71,6 +71,10 @@ typedef struct HeapScanDescData */ ParallelBlockTableScanWorkerData *rs_parallelworkerdata; + /* prefetch info. rs_prefetch_maximum = -1 when disabled. */ + int rs_prefetch_maximum; /* io_concurrency of tablespace */ + int rs_prefetch_target; /* current readahead target */ + /* these fields only used in page-at-a-time mode and for bitmap scans */ int rs_cindex; /* current tuple's index in vistuples */ int rs_ntuples; /* number of visible tuples on page */ diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 27db48184e6..e6d31be5222 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -108,6 +108,7 @@ typedef struct xl_heap_delete { TransactionId xmax; /* xmax of the deleted tuple */ OffsetNumber offnum; /* deleted tuple's offset */ + uint32 t_cid; uint8 infobits_set; /* infomask bits */ uint8 flags; } xl_heap_delete; @@ -145,6 +146,7 @@ typedef struct xl_heap_header { uint16 t_infomask2; uint16 t_infomask; + uint32 t_cid; uint8 t_hoff; } xl_heap_header; @@ -186,6 +188,7 @@ typedef struct xl_multi_insert_tuple uint16 datalen; /* size of tuple data that follows */ uint16 t_infomask2; uint16 t_infomask; + uint32 t_cid; uint8 t_hoff; /* TUPLE DATA FOLLOWS AT END OF STRUCT */ } xl_multi_insert_tuple; @@ -215,9 +218,9 @@ typedef struct xl_heap_update OffsetNumber old_offnum; /* old tuple's offset */ uint8 old_infobits_set; /* infomask bits to set on old tuple */ uint8 flags; + uint32 t_cid; TransactionId new_xmax; /* xmax of the new tuple */ OffsetNumber new_offnum; /* new tuple's offset */ - /* * If XLH_UPDATE_CONTAINS_OLD_TUPLE or XLH_UPDATE_CONTAINS_OLD_KEY flags * are set, xl_heap_header and tuple data for the old tuple follow. @@ -279,6 +282,7 @@ typedef struct xl_heap_lock { TransactionId locking_xid; /* might be a MultiXactId not xid */ OffsetNumber offnum; /* locked tuple's offset on page */ + uint32 t_cid; int8 infobits_set; /* infomask and infomask2 bits to set */ uint8 flags; /* XLH_LOCK_* flag bits */ } xl_heap_lock; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ee3e369b79f..b784cbf0493 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -31,6 +31,11 @@ extern int sync_method; extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */ +/* + * Pseudo block number used to associate LSN with relation metadata (relation size) + */ +#define REL_METADATA_PSEUDO_BLOCKNO InvalidBlockNumber + /* * Prior to 8.4, all activity during recovery was carried out by the startup * process. This local variable continues to be used in many parts of the @@ -132,6 +137,7 @@ extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; extern bool track_wal_io_timing; +extern int lastWrittenLsnCacheSize; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; @@ -307,6 +313,7 @@ extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn); extern void xlog_redo(XLogReaderState *record); extern void xlog_desc(StringInfo buf, XLogReaderState *record); extern const char *xlog_identify(uint8 info); +extern void xlog_outdesc(StringInfo buf, XLogReaderState *record); extern void issue_xlog_fsync(int fd, XLogSegNo segno); @@ -350,6 +357,17 @@ extern XLogRecPtr GetFlushRecPtr(void); extern XLogRecPtr GetLastImportantRecPtr(void); extern void RemovePromoteSignalFiles(void); +extern void SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode relfilenode, ForkNumber forknum, BlockNumber blkno); +extern void SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode relfilenode, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks); +extern void SetLastWrittenLSNForDatabase(XLogRecPtr lsn); +extern void SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode relfilenode, ForkNumber forknum); +extern XLogRecPtr GetLastWrittenLSN(RelFileNode relfilenode, ForkNumber forknum, BlockNumber blkno); + +extern XLogRecPtr GetRedoStartLsn(void); + +extern void SetZenithCurrentClusterSize(uint64 size); +extern uint64 GetZenithCurrentClusterSize(void); + extern bool PromoteIsTriggered(void); extern bool CheckPromoteSignal(void); extern void WakeupRecovery(void); @@ -400,6 +418,8 @@ extern SessionBackupState get_backup_status(void); #define TABLESPACE_MAP "tablespace_map" #define TABLESPACE_MAP_OLD "tablespace_map.old" +#define ZENITH_SIGNAL_FILE "zenith.signal" + /* files to signal promotion to primary */ #define PROMOTE_SIGNAL_FILE "promote" diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index f1d8c39edf1..391c1a2716a 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -38,6 +38,10 @@ #define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image * is taken */ +extern int max_replication_apply_lag; +extern int max_replication_flush_lag; +extern int max_replication_write_lag; + /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); extern void XLogSetRecordFlags(uint8 flags); diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 10458c23eda..3dd4df7bafa 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -262,6 +262,11 @@ struct XLogReaderState XLogRecPtr missingContrecPtr; /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */ XLogRecPtr overwrittenRecPtr; + + /* Disable validation to allow dumping corrupt WAL */ + bool skip_page_validation; + bool skip_invalid_records; + bool skip_lsn_checks; }; /* Get a new XLogReader */ diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 9ac602b674d..7cebdf3af6d 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -33,6 +33,8 @@ typedef enum * need to be replayed) */ } XLogRedoAction; +extern bool (*redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id); + extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 buffer_id, Buffer *buf); extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id); diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index e94d9e49cf6..960dbf33dd8 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -46,6 +46,7 @@ typedef struct ExplainState bool timing; /* print detailed node timing */ bool summary; /* print total planning and execution timing */ bool settings; /* print modified settings */ + bool prefetch; /* print prefetch statistic */ ExplainFormat format; /* output format */ /* state for output formatting --- not reset for each new plan tree */ int indent; /* current indentation level */ diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 2f9905b7c8e..602c7347a6c 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -15,6 +15,14 @@ #include "portability/instr_time.h" +/* Prefeth statistics */ +typedef struct +{ + int64 hits; + int64 misses; + int64 expired; + int64 duplicates; +} PrefetchInfo; /* * BufferUsage and WalUsage counters keep being incremented infinitely, @@ -35,6 +43,7 @@ typedef struct BufferUsage int64 temp_blks_written; /* # of temp blocks written */ instr_time blk_read_time; /* time spent reading */ instr_time blk_write_time; /* time spent writing */ + PrefetchInfo prefetch; /* prefetch statistics */ } BufferUsage; /* diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 3f155ce4f84..fd5e542d95c 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -106,6 +106,10 @@ extern PGDLLIMPORT volatile uint32 CritSectionCount; /* in tcop/postgres.c */ extern void ProcessInterrupts(void); +/* Callback called by ProcessInterrupts in the loop while it is returning true. */ +typedef bool (*process_interrupts_callback_t)(void); +extern process_interrupts_callback_t ProcessInterruptsCallback; + /* Test whether an interrupt is pending */ #ifndef WIN32 #define INTERRUPTS_PENDING_CONDITION() \ @@ -489,4 +493,7 @@ extern void CancelBackup(void); extern size_t get_hash_memory_limit(void); extern int get_hash_mem(void); +/* in storage/buffer/buf_init.c */ +extern bool am_wal_redo_postgres; + #endif /* MISCADMIN_H */ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 2113bc82de0..8639b59687c 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -67,6 +67,7 @@ extern PGDLLIMPORT bool enable_parallel_append; extern PGDLLIMPORT bool enable_parallel_hash; extern PGDLLIMPORT bool enable_partition_pruning; extern PGDLLIMPORT bool enable_async_append; +extern PGDLLIMPORT bool enable_seqscan_prefetch; extern PGDLLIMPORT int constraint_exclusion; extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 5d12b21ccda..e2d94f4664d 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -342,6 +342,9 @@ /* Define if you have a function readline library */ #undef HAVE_LIBREADLINE +/* Define to 1 if you have the `seccomp' library (-lseccomp). */ +#undef HAVE_LIBSECCOMP + /* Define to 1 if you have the `selinux' library (-lselinux). */ #undef HAVE_LIBSELINUX diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index 828106933ca..c740b938494 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -12,6 +12,7 @@ #ifndef _WALSENDER_H #define _WALSENDER_H +#include "access/xlog.h" #include /* @@ -48,6 +49,25 @@ extern void WalSndWaitStopping(void); extern void HandleWalSndInitStopping(void); extern void WalSndRqstFileReload(void); +/* + * Hook to check for WAL receiving backpressure. + * Return value in microseconds + */ +extern uint64 (*delay_backend_us)(void); + +/* expose these so that they can be reused by the neon walproposer extension */ +extern void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); +extern TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); +extern void ProcessStandbyReply(XLogRecPtr writePtr, XLogRecPtr flushPtr, + XLogRecPtr applyPtr, TimestampTz replyTime, + bool replyRequested); +extern void PhysicalConfirmReceivedLocation(XLogRecPtr lsn); +extern void ProcessStandbyHSFeedback(TimestampTz replyTime, + TransactionId feedbackXmin, + uint32 feedbackEpoch, + TransactionId feedbackCatalogXmin, + uint32 feedbackCatalogEpoch); + /* * Remember that we want to wakeup walsenders later * diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 33fcaf5c9a8..753ff907452 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -278,6 +278,8 @@ typedef struct WritebackContext extern PGDLLIMPORT BufferDescPadded *BufferDescriptors; extern PGDLLIMPORT WritebackContext BackendWritebackContext; +extern Buffer wal_redo_buffer; + /* in localbuf.c */ extern BufferDesc *LocalBufferDescriptors; diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index aa64fb42ec4..6d140786c74 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -76,6 +76,8 @@ extern int checkpoint_flush_after; extern int backend_flush_after; extern int bgwriter_flush_after; +extern bool zenith_test_evict; + /* in buf_init.c */ extern PGDLLIMPORT char *BufferBlocks; diff --git a/src/include/storage/md.h b/src/include/storage/md.h index 752b440864d..da53e504aee 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -30,6 +30,7 @@ extern void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); +extern int mdfcntl(SMgrRelation reln, int cmd, void* data, size_t size); extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void mdwrite(SMgrRelation reln, ForkNumber forknum, diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index a6fbf7b6a6c..b96ccd396eb 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -18,6 +18,8 @@ #include "storage/block.h" #include "storage/relfilenode.h" +struct f_smgr; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) @@ -41,6 +43,9 @@ typedef struct SMgrRelationData /* rnode is the hashtable lookup key, so it must be first! */ RelFileNodeBackend smgr_rnode; /* relation physical identifier */ + /* copy of pg_class.relpersistence, or 0 if not known */ + char smgr_relpersistence; + /* pointer to owning pointer, or NULL if none */ struct SMgrRelationData **smgr_owner; @@ -59,7 +64,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - int smgr_which; /* storage manager selector */ + const struct f_smgr *smgr; /* * for md.c; per-fork arrays of the number of open segments @@ -77,8 +82,68 @@ typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ RelFileNodeBackendIsTemp((smgr)->smgr_rnode) + +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to report problems via elog(ERROR). An exception is + * that smgr_unlink should use elog(WARNING), rather than erroring out, + * because we normally unlink relations during post-commit/abort cleanup, + * and so it's too late to raise an error. Also, various conditions that + * would normally be errors should be allowed during bootstrap and/or WAL + * recovery --- see comments in md.c for details. + */ +typedef struct f_smgr +{ + void (*smgr_init) (void); /* may be NULL */ + void (*smgr_shutdown) (void); /* may be NULL */ + void (*smgr_open) (SMgrRelation reln); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + bool isRedo); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, + bool isRedo); + void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); + bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); + void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer); + void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); + void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); + + void (*smgr_start_unlogged_build) (SMgrRelation reln); + void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln); + void (*smgr_end_unlogged_build) (SMgrRelation reln); + int (*smgr_fcntl)(SMgrRelation reln, int cmd, void* data, size_t size); +} f_smgr; + +typedef void (*smgr_init_hook_type) (void); +typedef void (*smgr_shutdown_hook_type) (void); +extern PGDLLIMPORT smgr_init_hook_type smgr_init_hook; +extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook; +extern void smgr_init_standard(void); +extern void smgr_shutdown_standard(void); + +// Alternative implementation of calculate_database_size() +typedef int64 (*dbsize_hook_type) (Oid dbOid); +extern PGDLLIMPORT dbsize_hook_type dbsize_hook; + +typedef const f_smgr *(*smgr_hook_type) (BackendId backend, RelFileNode rnode); +extern PGDLLIMPORT smgr_hook_type smgr_hook; +extern const f_smgr *smgr_standard(BackendId backend, RelFileNode rnode); + +extern const f_smgr *smgr(BackendId backend, RelFileNode rnode); + extern void smgrinit(void); -extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); +extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend, char relpersistence); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrclearowner(SMgrRelation *owner, SMgrRelation reln); @@ -105,4 +170,13 @@ extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum); extern void AtEOXact_SMgr(void); +extern void smgr_start_unlogged_build(SMgrRelation reln); +extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln); +extern void smgr_end_unlogged_build(SMgrRelation reln); +extern int smgr_fcntl(SMgrRelation reln, int cmd, void* data, size_t size); + +#define SMGR_FCNTL_READ_TEMP_FILE 2 +#define SMGR_FCNTL_WRITE_TEMP_FILE 3 +#define SMGR_FCNTL_UNLINK_TEMP_FILE 4 + #endif /* SMGR_H */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index d62ac86c520..83e1602d0c7 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -533,7 +533,8 @@ typedef struct ViewOptions #define RelationOpenSmgr(relation) \ do { \ if ((relation)->rd_smgr == NULL) \ - smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node, (relation)->rd_backend)); \ + smgrsetowner(&((relation)->rd_smgr), \ + smgropen((relation)->rd_node, (relation)->rd_backend, (relation)->rd_rel->relpersistence)); \ } while (0) /* diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 4b1cea65938..c28cf949f9e 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -142,7 +142,8 @@ typedef enum WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL, WAIT_EVENT_VACUUM_DELAY, WAIT_EVENT_CHECKPOINT_WRITE_DELAY, - WAIT_EVENT_REGISTER_SYNC_REQUEST + WAIT_EVENT_REGISTER_SYNC_REQUEST, + WAIT_EVENT_BACK_PRESSURE } WaitEventTimeout; /* ---------- diff --git a/src/test/regress/expected/alter_table_1.out b/src/test/regress/expected/alter_table_1.out new file mode 100644 index 00000000000..0f116d8750d --- /dev/null +++ b/src/test/regress/expected/alter_table_1.out @@ -0,0 +1,4487 @@ +-- +-- ALTER_TABLE +-- +-- Clean up in case a prior regression run failed +SET client_min_messages TO 'warning'; +DROP ROLE IF EXISTS regress_alter_table_user1; +RESET client_min_messages; +CREATE USER regress_alter_table_user1; +-- +-- add attribute +-- +CREATE TABLE attmp (initial int4); +COMMENT ON TABLE attmp_wrong IS 'table comment'; +ERROR: relation "attmp_wrong" does not exist +COMMENT ON TABLE attmp IS 'table comment'; +COMMENT ON TABLE attmp IS NULL; +ALTER TABLE attmp ADD COLUMN xmin integer; -- fails +ERROR: column name "xmin" conflicts with a system column name +ALTER TABLE attmp ADD COLUMN a int4 default 3; +ALTER TABLE attmp ADD COLUMN b name; +ALTER TABLE attmp ADD COLUMN c text; +ALTER TABLE attmp ADD COLUMN d float8; +ALTER TABLE attmp ADD COLUMN e float4; +ALTER TABLE attmp ADD COLUMN f int2; +ALTER TABLE attmp ADD COLUMN g polygon; +ALTER TABLE attmp ADD COLUMN i char; +ALTER TABLE attmp ADD COLUMN k int4; +ALTER TABLE attmp ADD COLUMN l tid; +ALTER TABLE attmp ADD COLUMN m xid; +ALTER TABLE attmp ADD COLUMN n oidvector; +--ALTER TABLE attmp ADD COLUMN o lock; +ALTER TABLE attmp ADD COLUMN p boolean; +ALTER TABLE attmp ADD COLUMN q point; +ALTER TABLE attmp ADD COLUMN r lseg; +ALTER TABLE attmp ADD COLUMN s path; +ALTER TABLE attmp ADD COLUMN t box; +ALTER TABLE attmp ADD COLUMN v timestamp; +ALTER TABLE attmp ADD COLUMN w interval; +ALTER TABLE attmp ADD COLUMN x float8[]; +ALTER TABLE attmp ADD COLUMN y float4[]; +ALTER TABLE attmp ADD COLUMN z int2[]; +INSERT INTO attmp (a, b, c, d, e, f, g, i, k, l, m, n, p, q, r, s, t, + v, w, x, y, z) + VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)', + 'c', + 314159, '(1,1)', '512', + '1 2 3 4 5 6 7 8', true, '(1.1,1.1)', '(4.1,4.1,3.1,3.1)', + '(0,2,4.1,4.1,3.1,3.1)', '(4.1,4.1,3.1,3.1)', + 'epoch', '01:00:10', '{1.0,2.0,3.0,4.0}', '{1.0,2.0,3.0,4.0}', '{1,2,3,4}'); +SELECT * FROM attmp; + initial | a | b | c | d | e | f | g | i | k | l | m | n | p | q | r | s | t | v | w | x | y | z +---------+---+------+------+-----+-----+---+-----------------------+---+--------+-------+-----+-----------------+---+-----------+-----------------------+-----------------------------+---------------------+--------------------------+------------------+-----------+-----------+----------- + | 4 | name | text | 4.1 | 4.1 | 2 | ((4.1,4.1),(3.1,3.1)) | c | 314159 | (1,1) | 512 | 1 2 3 4 5 6 7 8 | t | (1.1,1.1) | [(4.1,4.1),(3.1,3.1)] | ((0,2),(4.1,4.1),(3.1,3.1)) | (4.1,4.1),(3.1,3.1) | Thu Jan 01 00:00:00 1970 | @ 1 hour 10 secs | {1,2,3,4} | {1,2,3,4} | {1,2,3,4} +(1 row) + +DROP TABLE attmp; +-- the wolf bug - schema mods caused inconsistent row descriptors +CREATE TABLE attmp ( + initial int4 +); +ALTER TABLE attmp ADD COLUMN a int4; +ALTER TABLE attmp ADD COLUMN b name; +ALTER TABLE attmp ADD COLUMN c text; +ALTER TABLE attmp ADD COLUMN d float8; +ALTER TABLE attmp ADD COLUMN e float4; +ALTER TABLE attmp ADD COLUMN f int2; +ALTER TABLE attmp ADD COLUMN g polygon; +ALTER TABLE attmp ADD COLUMN i char; +ALTER TABLE attmp ADD COLUMN k int4; +ALTER TABLE attmp ADD COLUMN l tid; +ALTER TABLE attmp ADD COLUMN m xid; +ALTER TABLE attmp ADD COLUMN n oidvector; +--ALTER TABLE attmp ADD COLUMN o lock; +ALTER TABLE attmp ADD COLUMN p boolean; +ALTER TABLE attmp ADD COLUMN q point; +ALTER TABLE attmp ADD COLUMN r lseg; +ALTER TABLE attmp ADD COLUMN s path; +ALTER TABLE attmp ADD COLUMN t box; +ALTER TABLE attmp ADD COLUMN v timestamp; +ALTER TABLE attmp ADD COLUMN w interval; +ALTER TABLE attmp ADD COLUMN x float8[]; +ALTER TABLE attmp ADD COLUMN y float4[]; +ALTER TABLE attmp ADD COLUMN z int2[]; +INSERT INTO attmp (a, b, c, d, e, f, g, i, k, l, m, n, p, q, r, s, t, + v, w, x, y, z) + VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)', + 'c', + 314159, '(1,1)', '512', + '1 2 3 4 5 6 7 8', true, '(1.1,1.1)', '(4.1,4.1,3.1,3.1)', + '(0,2,4.1,4.1,3.1,3.1)', '(4.1,4.1,3.1,3.1)', + 'epoch', '01:00:10', '{1.0,2.0,3.0,4.0}', '{1.0,2.0,3.0,4.0}', '{1,2,3,4}'); +SELECT * FROM attmp; + initial | a | b | c | d | e | f | g | i | k | l | m | n | p | q | r | s | t | v | w | x | y | z +---------+---+------+------+-----+-----+---+-----------------------+---+--------+-------+-----+-----------------+---+-----------+-----------------------+-----------------------------+---------------------+--------------------------+------------------+-----------+-----------+----------- + | 4 | name | text | 4.1 | 4.1 | 2 | ((4.1,4.1),(3.1,3.1)) | c | 314159 | (1,1) | 512 | 1 2 3 4 5 6 7 8 | t | (1.1,1.1) | [(4.1,4.1),(3.1,3.1)] | ((0,2),(4.1,4.1),(3.1,3.1)) | (4.1,4.1),(3.1,3.1) | Thu Jan 01 00:00:00 1970 | @ 1 hour 10 secs | {1,2,3,4} | {1,2,3,4} | {1,2,3,4} +(1 row) + +CREATE INDEX attmp_idx ON attmp (a, (d + e), b); +ALTER INDEX attmp_idx ALTER COLUMN 0 SET STATISTICS 1000; +ERROR: column number must be in range from 1 to 32767 +LINE 1: ALTER INDEX attmp_idx ALTER COLUMN 0 SET STATISTICS 1000; + ^ +ALTER INDEX attmp_idx ALTER COLUMN 1 SET STATISTICS 1000; +ERROR: cannot alter statistics on non-expression column "a" of index "attmp_idx" +HINT: Alter statistics on table column instead. +ALTER INDEX attmp_idx ALTER COLUMN 2 SET STATISTICS 1000; +\d+ attmp_idx + Index "public.attmp_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+------------------+------+------------+---------+-------------- + a | integer | yes | a | plain | + expr | double precision | yes | (d + e) | plain | 1000 + b | cstring | yes | b | plain | +btree, for table "public.attmp" + +ALTER INDEX attmp_idx ALTER COLUMN 3 SET STATISTICS 1000; +ERROR: cannot alter statistics on non-expression column "b" of index "attmp_idx" +HINT: Alter statistics on table column instead. +ALTER INDEX attmp_idx ALTER COLUMN 4 SET STATISTICS 1000; +ERROR: column number 4 of relation "attmp_idx" does not exist +ALTER INDEX attmp_idx ALTER COLUMN 2 SET STATISTICS -1; +DROP TABLE attmp; +-- +-- rename - check on both non-temp and temp tables +-- +CREATE TABLE attmp (regtable int); +CREATE TEMP TABLE attmp (attmptable int); +ALTER TABLE attmp RENAME TO attmp_new; +SELECT * FROM attmp; + regtable +---------- +(0 rows) + +SELECT * FROM attmp_new; + attmptable +------------ +(0 rows) + +ALTER TABLE attmp RENAME TO attmp_new2; +SELECT * FROM attmp; -- should fail +ERROR: relation "attmp" does not exist +LINE 1: SELECT * FROM attmp; + ^ +SELECT * FROM attmp_new; + attmptable +------------ +(0 rows) + +SELECT * FROM attmp_new2; + regtable +---------- +(0 rows) + +DROP TABLE attmp_new; +DROP TABLE attmp_new2; +-- check rename of partitioned tables and indexes also +CREATE TABLE part_attmp (a int primary key) partition by range (a); +CREATE TABLE part_attmp1 PARTITION OF part_attmp FOR VALUES FROM (0) TO (100); +ALTER INDEX part_attmp_pkey RENAME TO part_attmp_index; +ALTER INDEX part_attmp1_pkey RENAME TO part_attmp1_index; +ALTER TABLE part_attmp RENAME TO part_at2tmp; +ALTER TABLE part_attmp1 RENAME TO part_at2tmp1; +SET ROLE regress_alter_table_user1; +ALTER INDEX part_attmp_index RENAME TO fail; +ERROR: must be owner of index part_attmp_index +ALTER INDEX part_attmp1_index RENAME TO fail; +ERROR: must be owner of index part_attmp1_index +ALTER TABLE part_at2tmp RENAME TO fail; +ERROR: must be owner of table part_at2tmp +ALTER TABLE part_at2tmp1 RENAME TO fail; +ERROR: must be owner of table part_at2tmp1 +RESET ROLE; +DROP TABLE part_at2tmp; +-- +-- check renaming to a table's array type's autogenerated name +-- (the array type's name should get out of the way) +-- +CREATE TABLE attmp_array (id int); +CREATE TABLE attmp_array2 (id int); +SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; + typname +-------------- + _attmp_array +(1 row) + +SELECT typname FROM pg_type WHERE oid = 'attmp_array2[]'::regtype; + typname +--------------- + _attmp_array2 +(1 row) + +ALTER TABLE attmp_array2 RENAME TO _attmp_array; +SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; + typname +--------------- + __attmp_array +(1 row) + +SELECT typname FROM pg_type WHERE oid = '_attmp_array[]'::regtype; + typname +---------------- + ___attmp_array +(1 row) + +DROP TABLE _attmp_array; +DROP TABLE attmp_array; +-- renaming to table's own array type's name is an interesting corner case +CREATE TABLE attmp_array (id int); +SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; + typname +-------------- + _attmp_array +(1 row) + +ALTER TABLE attmp_array RENAME TO _attmp_array; +SELECT typname FROM pg_type WHERE oid = '_attmp_array[]'::regtype; + typname +--------------- + __attmp_array +(1 row) + +DROP TABLE _attmp_array; +-- ALTER TABLE ... RENAME on non-table relations +-- renaming indexes (FIXME: this should probably test the index's functionality) +ALTER INDEX IF EXISTS __onek_unique1 RENAME TO attmp_onek_unique1; +NOTICE: relation "__onek_unique1" does not exist, skipping +ALTER INDEX IF EXISTS __attmp_onek_unique1 RENAME TO onek_unique1; +NOTICE: relation "__attmp_onek_unique1" does not exist, skipping +ALTER INDEX onek_unique1 RENAME TO attmp_onek_unique1; +ALTER INDEX attmp_onek_unique1 RENAME TO onek_unique1; +SET ROLE regress_alter_table_user1; +ALTER INDEX onek_unique1 RENAME TO fail; -- permission denied +ERROR: must be owner of index onek_unique1 +RESET ROLE; +-- renaming views +CREATE VIEW attmp_view (unique1) AS SELECT unique1 FROM tenk1; +ALTER TABLE attmp_view RENAME TO attmp_view_new; +SET ROLE regress_alter_table_user1; +ALTER VIEW attmp_view_new RENAME TO fail; -- permission denied +ERROR: must be owner of view attmp_view_new +RESET ROLE; +-- hack to ensure we get an indexscan here +set enable_seqscan to off; +set enable_bitmapscan to off; +-- 5 values, sorted +SELECT unique1 FROM tenk1 WHERE unique1 < 5; + unique1 +--------- + 0 + 1 + 2 + 3 + 4 +(5 rows) + +reset enable_seqscan; +reset enable_bitmapscan; +DROP VIEW attmp_view_new; +-- toast-like relation name +alter table stud_emp rename to pg_toast_stud_emp; +alter table pg_toast_stud_emp rename to stud_emp; +-- renaming index should rename constraint as well +ALTER TABLE onek ADD CONSTRAINT onek_unique1_constraint UNIQUE (unique1); +ALTER INDEX onek_unique1_constraint RENAME TO onek_unique1_constraint_foo; +ALTER TABLE onek DROP CONSTRAINT onek_unique1_constraint_foo; +-- renaming constraint +ALTER TABLE onek ADD CONSTRAINT onek_check_constraint CHECK (unique1 >= 0); +ALTER TABLE onek RENAME CONSTRAINT onek_check_constraint TO onek_check_constraint_foo; +ALTER TABLE onek DROP CONSTRAINT onek_check_constraint_foo; +-- renaming constraint should rename index as well +ALTER TABLE onek ADD CONSTRAINT onek_unique1_constraint UNIQUE (unique1); +DROP INDEX onek_unique1_constraint; -- to see whether it's there +ERROR: cannot drop index onek_unique1_constraint because constraint onek_unique1_constraint on table onek requires it +HINT: You can drop constraint onek_unique1_constraint on table onek instead. +ALTER TABLE onek RENAME CONSTRAINT onek_unique1_constraint TO onek_unique1_constraint_foo; +DROP INDEX onek_unique1_constraint_foo; -- to see whether it's there +ERROR: cannot drop index onek_unique1_constraint_foo because constraint onek_unique1_constraint_foo on table onek requires it +HINT: You can drop constraint onek_unique1_constraint_foo on table onek instead. +ALTER TABLE onek DROP CONSTRAINT onek_unique1_constraint_foo; +-- renaming constraints vs. inheritance +CREATE TABLE constraint_rename_test (a int CONSTRAINT con1 CHECK (a > 0), b int, c int); +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | +Check constraints: + "con1" CHECK (a > 0) + +CREATE TABLE constraint_rename_test2 (a int CONSTRAINT con1 CHECK (a > 0), d int) INHERITS (constraint_rename_test); +NOTICE: merging column "a" with inherited definition +NOTICE: merging constraint "con1" with inherited definition +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1" CHECK (a > 0) +Inherits: constraint_rename_test + +ALTER TABLE constraint_rename_test2 RENAME CONSTRAINT con1 TO con1foo; -- fail +ERROR: cannot rename inherited constraint "con1" +ALTER TABLE ONLY constraint_rename_test RENAME CONSTRAINT con1 TO con1foo; -- fail +ERROR: inherited constraint "con1" must be renamed in child tables too +ALTER TABLE constraint_rename_test RENAME CONSTRAINT con1 TO con1foo; -- ok +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Number of child tables: 1 (Use \d+ to list them.) + +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Inherits: constraint_rename_test + +ALTER TABLE constraint_rename_test ADD CONSTRAINT con2 CHECK (b > 0) NO INHERIT; +ALTER TABLE ONLY constraint_rename_test RENAME CONSTRAINT con2 TO con2foo; -- ok +ALTER TABLE constraint_rename_test RENAME CONSTRAINT con2foo TO con2bar; -- ok +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) + "con2bar" CHECK (b > 0) NO INHERIT +Number of child tables: 1 (Use \d+ to list them.) + +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Inherits: constraint_rename_test + +ALTER TABLE constraint_rename_test ADD CONSTRAINT con3 PRIMARY KEY (a); +ALTER TABLE constraint_rename_test RENAME CONSTRAINT con3 TO con3foo; -- ok +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | integer | | | + c | integer | | | +Indexes: + "con3foo" PRIMARY KEY, btree (a) +Check constraints: + "con1foo" CHECK (a > 0) + "con2bar" CHECK (b > 0) NO INHERIT +Number of child tables: 1 (Use \d+ to list them.) + +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Inherits: constraint_rename_test + +DROP TABLE constraint_rename_test2; +DROP TABLE constraint_rename_test; +ALTER TABLE IF EXISTS constraint_not_exist RENAME CONSTRAINT con3 TO con3foo; -- ok +NOTICE: relation "constraint_not_exist" does not exist, skipping +ALTER TABLE IF EXISTS constraint_rename_test ADD CONSTRAINT con4 UNIQUE (a); +NOTICE: relation "constraint_rename_test" does not exist, skipping +-- renaming constraints with cache reset of target relation +CREATE TABLE constraint_rename_cache (a int, + CONSTRAINT chk_a CHECK (a > 0), + PRIMARY KEY (a)); +ALTER TABLE constraint_rename_cache + RENAME CONSTRAINT chk_a TO chk_a_new; +ALTER TABLE constraint_rename_cache + RENAME CONSTRAINT constraint_rename_cache_pkey TO constraint_rename_pkey_new; +CREATE TABLE like_constraint_rename_cache + (LIKE constraint_rename_cache INCLUDING ALL); +\d like_constraint_rename_cache + Table "public.like_constraint_rename_cache" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | +Indexes: + "like_constraint_rename_cache_pkey" PRIMARY KEY, btree (a) +Check constraints: + "chk_a_new" CHECK (a > 0) + +DROP TABLE constraint_rename_cache; +DROP TABLE like_constraint_rename_cache; +-- FOREIGN KEY CONSTRAINT adding TEST +CREATE TABLE attmp2 (a int primary key); +CREATE TABLE attmp3 (a int, b int); +CREATE TABLE attmp4 (a int, b int, unique(a,b)); +CREATE TABLE attmp5 (a int, b int); +-- Insert rows into attmp2 (pktable) +INSERT INTO attmp2 values (1); +INSERT INTO attmp2 values (2); +INSERT INTO attmp2 values (3); +INSERT INTO attmp2 values (4); +-- Insert rows into attmp3 +INSERT INTO attmp3 values (1,10); +INSERT INTO attmp3 values (1,20); +INSERT INTO attmp3 values (5,50); +-- Try (and fail) to add constraint due to invalid source columns +ALTER TABLE attmp3 add constraint attmpconstr foreign key(c) references attmp2 match full; +ERROR: column "c" referenced in foreign key constraint does not exist +-- Try (and fail) to add constraint due to invalid destination columns explicitly given +ALTER TABLE attmp3 add constraint attmpconstr foreign key(a) references attmp2(b) match full; +ERROR: column "b" referenced in foreign key constraint does not exist +-- Try (and fail) to add constraint due to invalid data +ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full; +ERROR: insert or update on table "attmp3" violates foreign key constraint "attmpconstr" +DETAIL: Key (a)=(5) is not present in table "attmp2". +-- Delete failing row +DELETE FROM attmp3 where a=5; +-- Try (and succeed) +ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full; +ALTER TABLE attmp3 drop constraint attmpconstr; +INSERT INTO attmp3 values (5,50); +-- Try NOT VALID and then VALIDATE CONSTRAINT, but fails. Delete failure then re-validate +ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full NOT VALID; +ALTER TABLE attmp3 validate constraint attmpconstr; +ERROR: insert or update on table "attmp3" violates foreign key constraint "attmpconstr" +DETAIL: Key (a)=(5) is not present in table "attmp2". +-- Delete failing row +DELETE FROM attmp3 where a=5; +-- Try (and succeed) and repeat to show it works on already valid constraint +ALTER TABLE attmp3 validate constraint attmpconstr; +ALTER TABLE attmp3 validate constraint attmpconstr; +-- Try a non-verified CHECK constraint +ALTER TABLE attmp3 ADD CONSTRAINT b_greater_than_ten CHECK (b > 10); -- fail +ERROR: check constraint "b_greater_than_ten" of relation "attmp3" is violated by some row +ALTER TABLE attmp3 ADD CONSTRAINT b_greater_than_ten CHECK (b > 10) NOT VALID; -- succeeds +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- fails +ERROR: check constraint "b_greater_than_ten" of relation "attmp3" is violated by some row +DELETE FROM attmp3 WHERE NOT b > 10; +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- succeeds +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- succeeds +-- Test inherited NOT VALID CHECK constraints +select * from attmp3; + a | b +---+---- + 1 | 20 +(1 row) + +CREATE TABLE attmp6 () INHERITS (attmp3); +CREATE TABLE attmp7 () INHERITS (attmp3); +INSERT INTO attmp6 VALUES (6, 30), (7, 16); +ALTER TABLE attmp3 ADD CONSTRAINT b_le_20 CHECK (b <= 20) NOT VALID; +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_le_20; -- fails +ERROR: check constraint "b_le_20" of relation "attmp6" is violated by some row +DELETE FROM attmp6 WHERE b > 20; +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_le_20; -- succeeds +-- An already validated constraint must not be revalidated +CREATE FUNCTION boo(int) RETURNS int IMMUTABLE STRICT LANGUAGE plpgsql AS $$ BEGIN RAISE NOTICE 'boo: %', $1; RETURN $1; END; $$; +INSERT INTO attmp7 VALUES (8, 18); +ALTER TABLE attmp7 ADD CONSTRAINT identity CHECK (b = boo(b)); +NOTICE: boo: 18 +ALTER TABLE attmp3 ADD CONSTRAINT IDENTITY check (b = boo(b)) NOT VALID; +NOTICE: merging constraint "identity" with inherited definition +ALTER TABLE attmp3 VALIDATE CONSTRAINT identity; +NOTICE: boo: 20 +NOTICE: boo: 16 +-- A NO INHERIT constraint should not be looked for in children during VALIDATE CONSTRAINT +create table parent_noinh_convalid (a int); +create table child_noinh_convalid () inherits (parent_noinh_convalid); +insert into parent_noinh_convalid values (1); +insert into child_noinh_convalid values (1); +alter table parent_noinh_convalid add constraint check_a_is_2 check (a = 2) no inherit not valid; +-- fail, because of the row in parent +alter table parent_noinh_convalid validate constraint check_a_is_2; +ERROR: check constraint "check_a_is_2" of relation "parent_noinh_convalid" is violated by some row +delete from only parent_noinh_convalid; +-- ok (parent itself contains no violating rows) +alter table parent_noinh_convalid validate constraint check_a_is_2; +select convalidated from pg_constraint where conrelid = 'parent_noinh_convalid'::regclass and conname = 'check_a_is_2'; + convalidated +-------------- + t +(1 row) + +-- cleanup +drop table parent_noinh_convalid, child_noinh_convalid; +-- Try (and fail) to create constraint from attmp5(a) to attmp4(a) - unique constraint on +-- attmp4 is a,b +ALTER TABLE attmp5 add constraint attmpconstr foreign key(a) references attmp4(a) match full; +ERROR: there is no unique constraint matching given keys for referenced table "attmp4" +DROP TABLE attmp7; +DROP TABLE attmp6; +DROP TABLE attmp5; +DROP TABLE attmp4; +DROP TABLE attmp3; +DROP TABLE attmp2; +-- NOT VALID with plan invalidation -- ensure we don't use a constraint for +-- exclusion until validated +set constraint_exclusion TO 'partition'; +create table nv_parent (d date, check (false) no inherit not valid); +-- not valid constraint added at creation time should automatically become valid +\d nv_parent + Table "public.nv_parent" + Column | Type | Collation | Nullable | Default +--------+------+-----------+----------+--------- + d | date | | | +Check constraints: + "nv_parent_check" CHECK (false) NO INHERIT + +create table nv_child_2010 () inherits (nv_parent); +create table nv_child_2011 () inherits (nv_parent); +alter table nv_child_2010 add check (d between '2010-01-01'::date and '2010-12-31'::date) not valid; +alter table nv_child_2011 add check (d between '2011-01-01'::date and '2011-12-31'::date) not valid; +explain (costs off) select * from nv_parent where d between '2011-08-01' and '2011-08-31'; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2011 nv_parent_3 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) +(7 rows) + +create table nv_child_2009 (check (d between '2009-01-01'::date and '2009-12-31'::date)) inherits (nv_parent); +explain (costs off) select * from nv_parent where d between '2011-08-01'::date and '2011-08-31'::date; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2011 nv_parent_3 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) +(7 rows) + +explain (costs off) select * from nv_parent where d between '2009-08-01'::date and '2009-08-31'::date; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2011 nv_parent_3 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2009 nv_parent_4 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) +(9 rows) + +-- after validation, the constraint should be used +alter table nv_child_2011 VALIDATE CONSTRAINT nv_child_2011_d_check; +explain (costs off) select * from nv_parent where d between '2009-08-01'::date and '2009-08-31'::date; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2009 nv_parent_3 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) +(7 rows) + +-- add an inherited NOT VALID constraint +alter table nv_parent add check (d between '2001-01-01'::date and '2099-12-31'::date) not valid; +\d nv_child_2009 + Table "public.nv_child_2009" + Column | Type | Collation | Nullable | Default +--------+------+-----------+----------+--------- + d | date | | | +Check constraints: + "nv_child_2009_d_check" CHECK (d >= '01-01-2009'::date AND d <= '12-31-2009'::date) + "nv_parent_d_check" CHECK (d >= '01-01-2001'::date AND d <= '12-31-2099'::date) NOT VALID +Inherits: nv_parent + +-- we leave nv_parent and children around to help test pg_dump logic +-- Foreign key adding test with mixed types +-- Note: these tables are TEMP to avoid name conflicts when this test +-- is run in parallel with foreign_key.sql. +CREATE TEMP TABLE PKTABLE (ptest1 int PRIMARY KEY); +INSERT INTO PKTABLE VALUES(42); +CREATE TEMP TABLE FKTABLE (ftest1 inet); +-- This next should fail, because int=inet does not exist +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: inet and integer. +-- This should also fail for the same reason, but here we +-- give the column name +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable(ptest1); +ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: inet and integer. +DROP TABLE FKTABLE; +-- This should succeed, even though they are different types, +-- because int=int8 exists and is a member of the integer opfamily +CREATE TEMP TABLE FKTABLE (ftest1 int8); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +-- Check it actually works +INSERT INTO FKTABLE VALUES(42); -- should succeed +INSERT INTO FKTABLE VALUES(43); -- should fail +ERROR: insert or update on table "fktable" violates foreign key constraint "fktable_ftest1_fkey" +DETAIL: Key (ftest1)=(43) is not present in table "pktable". +DROP TABLE FKTABLE; +-- This should fail, because we'd have to cast numeric to int which is +-- not an implicit coercion (or use numeric=numeric, but that's not part +-- of the integer opfamily) +CREATE TEMP TABLE FKTABLE (ftest1 numeric); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: numeric and integer. +DROP TABLE FKTABLE; +DROP TABLE PKTABLE; +-- On the other hand, this should work because int implicitly promotes to +-- numeric, and we allow promotion on the FK side +CREATE TEMP TABLE PKTABLE (ptest1 numeric PRIMARY KEY); +INSERT INTO PKTABLE VALUES(42); +CREATE TEMP TABLE FKTABLE (ftest1 int); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +-- Check it actually works +INSERT INTO FKTABLE VALUES(42); -- should succeed +INSERT INTO FKTABLE VALUES(43); -- should fail +ERROR: insert or update on table "fktable" violates foreign key constraint "fktable_ftest1_fkey" +DETAIL: Key (ftest1)=(43) is not present in table "pktable". +DROP TABLE FKTABLE; +DROP TABLE PKTABLE; +CREATE TEMP TABLE PKTABLE (ptest1 int, ptest2 inet, + PRIMARY KEY(ptest1, ptest2)); +-- This should fail, because we just chose really odd types +CREATE TEMP TABLE FKTABLE (ftest1 cidr, ftest2 timestamp); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) references pktable; +ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: cidr and integer. +DROP TABLE FKTABLE; +-- Again, so should this... +CREATE TEMP TABLE FKTABLE (ftest1 cidr, ftest2 timestamp); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) + references pktable(ptest1, ptest2); +ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: cidr and integer. +DROP TABLE FKTABLE; +-- This fails because we mixed up the column ordering +CREATE TEMP TABLE FKTABLE (ftest1 int, ftest2 inet); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) + references pktable(ptest2, ptest1); +ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest2" are of incompatible types: integer and inet. +-- As does this... +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest2, ftest1) + references pktable(ptest1, ptest2); +ERROR: foreign key constraint "fktable_ftest2_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest2" and "ptest1" are of incompatible types: inet and integer. +DROP TABLE FKTABLE; +DROP TABLE PKTABLE; +-- Test that ALTER CONSTRAINT updates trigger deferrability properly +CREATE TEMP TABLE PKTABLE (ptest1 int primary key); +CREATE TEMP TABLE FKTABLE (ftest1 int); +ALTER TABLE FKTABLE ADD CONSTRAINT fknd FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; +ALTER TABLE FKTABLE ADD CONSTRAINT fkdd FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE FKTABLE ADD CONSTRAINT fkdi FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY IMMEDIATE; +ALTER TABLE FKTABLE ADD CONSTRAINT fknd2 FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE FKTABLE ALTER CONSTRAINT fknd2 NOT DEFERRABLE; +ALTER TABLE FKTABLE ADD CONSTRAINT fkdd2 FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; +ALTER TABLE FKTABLE ALTER CONSTRAINT fkdd2 DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE FKTABLE ADD CONSTRAINT fkdi2 FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; +ALTER TABLE FKTABLE ALTER CONSTRAINT fkdi2 DEFERRABLE INITIALLY IMMEDIATE; +SELECT conname, tgfoid::regproc, tgtype, tgdeferrable, tginitdeferred +FROM pg_trigger JOIN pg_constraint con ON con.oid = tgconstraint +WHERE tgrelid = 'pktable'::regclass +ORDER BY 1,2,3; + conname | tgfoid | tgtype | tgdeferrable | tginitdeferred +---------+------------------------+--------+--------------+---------------- + fkdd | "RI_FKey_cascade_del" | 9 | f | f + fkdd | "RI_FKey_noaction_upd" | 17 | t | t + fkdd2 | "RI_FKey_cascade_del" | 9 | f | f + fkdd2 | "RI_FKey_noaction_upd" | 17 | t | t + fkdi | "RI_FKey_cascade_del" | 9 | f | f + fkdi | "RI_FKey_noaction_upd" | 17 | t | f + fkdi2 | "RI_FKey_cascade_del" | 9 | f | f + fkdi2 | "RI_FKey_noaction_upd" | 17 | t | f + fknd | "RI_FKey_cascade_del" | 9 | f | f + fknd | "RI_FKey_noaction_upd" | 17 | f | f + fknd2 | "RI_FKey_cascade_del" | 9 | f | f + fknd2 | "RI_FKey_noaction_upd" | 17 | f | f +(12 rows) + +SELECT conname, tgfoid::regproc, tgtype, tgdeferrable, tginitdeferred +FROM pg_trigger JOIN pg_constraint con ON con.oid = tgconstraint +WHERE tgrelid = 'fktable'::regclass +ORDER BY 1,2,3; + conname | tgfoid | tgtype | tgdeferrable | tginitdeferred +---------+---------------------+--------+--------------+---------------- + fkdd | "RI_FKey_check_ins" | 5 | t | t + fkdd | "RI_FKey_check_upd" | 17 | t | t + fkdd2 | "RI_FKey_check_ins" | 5 | t | t + fkdd2 | "RI_FKey_check_upd" | 17 | t | t + fkdi | "RI_FKey_check_ins" | 5 | t | f + fkdi | "RI_FKey_check_upd" | 17 | t | f + fkdi2 | "RI_FKey_check_ins" | 5 | t | f + fkdi2 | "RI_FKey_check_upd" | 17 | t | f + fknd | "RI_FKey_check_ins" | 5 | f | f + fknd | "RI_FKey_check_upd" | 17 | f | f + fknd2 | "RI_FKey_check_ins" | 5 | f | f + fknd2 | "RI_FKey_check_upd" | 17 | f | f +(12 rows) + +-- temp tables should go away by themselves, need not drop them. +-- test check constraint adding +create table atacc1 ( test int ); +-- add a check constraint +alter table atacc1 add constraint atacc_test1 check (test>3); +-- should fail +insert into atacc1 (test) values (2); +ERROR: new row for relation "atacc1" violates check constraint "atacc_test1" +DETAIL: Failing row contains (2). +-- should succeed +insert into atacc1 (test) values (4); +drop table atacc1; +-- let's do one where the check fails when added +create table atacc1 ( test int ); +-- insert a soon to be failing row +insert into atacc1 (test) values (2); +-- add a check constraint (fails) +alter table atacc1 add constraint atacc_test1 check (test>3); +ERROR: check constraint "atacc_test1" of relation "atacc1" is violated by some row +insert into atacc1 (test) values (4); +drop table atacc1; +-- let's do one where the check fails because the column doesn't exist +create table atacc1 ( test int ); +-- add a check constraint (fails) +alter table atacc1 add constraint atacc_test1 check (test1>3); +ERROR: column "test1" does not exist +HINT: Perhaps you meant to reference the column "atacc1.test". +drop table atacc1; +-- something a little more complicated +create table atacc1 ( test int, test2 int, test3 int); +-- add a check constraint (fails) +alter table atacc1 add constraint atacc_test1 check (test+test23), test2 int); +alter table atacc1 add check (test2>test); +-- should fail for $2 +insert into atacc1 (test2, test) values (3, 4); +ERROR: new row for relation "atacc1" violates check constraint "atacc1_check" +DETAIL: Failing row contains (4, 3). +drop table atacc1; +-- inheritance related tests +create table atacc1 (test int); +create table atacc2 (test2 int); +create table atacc3 (test3 int) inherits (atacc1, atacc2); +alter table atacc2 add constraint foo check (test2>0); +-- fail and then succeed on atacc2 +insert into atacc2 (test2) values (-3); +ERROR: new row for relation "atacc2" violates check constraint "foo" +DETAIL: Failing row contains (-3). +insert into atacc2 (test2) values (3); +-- fail and then succeed on atacc3 +insert into atacc3 (test2) values (-3); +ERROR: new row for relation "atacc3" violates check constraint "foo" +DETAIL: Failing row contains (null, -3, null). +insert into atacc3 (test2) values (3); +drop table atacc3; +drop table atacc2; +drop table atacc1; +-- same things with one created with INHERIT +create table atacc1 (test int); +create table atacc2 (test2 int); +create table atacc3 (test3 int) inherits (atacc1, atacc2); +alter table atacc3 no inherit atacc2; +-- fail +alter table atacc3 no inherit atacc2; +ERROR: relation "atacc2" is not a parent of relation "atacc3" +-- make sure it really isn't a child +insert into atacc3 (test2) values (3); +select test2 from atacc2; + test2 +------- +(0 rows) + +-- fail due to missing constraint +alter table atacc2 add constraint foo check (test2>0); +alter table atacc3 inherit atacc2; +ERROR: child table is missing constraint "foo" +-- fail due to missing column +alter table atacc3 rename test2 to testx; +alter table atacc3 inherit atacc2; +ERROR: child table is missing column "test2" +-- fail due to mismatched data type +alter table atacc3 add test2 bool; +alter table atacc3 inherit atacc2; +ERROR: child table "atacc3" has different type for column "test2" +alter table atacc3 drop test2; +-- succeed +alter table atacc3 add test2 int; +update atacc3 set test2 = 4 where test2 is null; +alter table atacc3 add constraint foo check (test2>0); +alter table atacc3 inherit atacc2; +-- fail due to duplicates and circular inheritance +alter table atacc3 inherit atacc2; +ERROR: relation "atacc2" would be inherited from more than once +alter table atacc2 inherit atacc3; +ERROR: circular inheritance not allowed +DETAIL: "atacc3" is already a child of "atacc2". +alter table atacc2 inherit atacc2; +ERROR: circular inheritance not allowed +DETAIL: "atacc2" is already a child of "atacc2". +-- test that we really are a child now (should see 4 not 3 and cascade should go through) +select test2 from atacc2; + test2 +------- + 4 +(1 row) + +drop table atacc2 cascade; +NOTICE: drop cascades to table atacc3 +drop table atacc1; +-- adding only to a parent is allowed as of 9.2 +create table atacc1 (test int); +create table atacc2 (test2 int) inherits (atacc1); +-- ok: +alter table atacc1 add constraint foo check (test>0) no inherit; +-- check constraint is not there on child +insert into atacc2 (test) values (-3); +-- check constraint is there on parent +insert into atacc1 (test) values (-3); +ERROR: new row for relation "atacc1" violates check constraint "foo" +DETAIL: Failing row contains (-3). +insert into atacc1 (test) values (3); +-- fail, violating row: +alter table atacc2 add constraint foo check (test>0) no inherit; +ERROR: check constraint "foo" of relation "atacc2" is violated by some row +drop table atacc2; +drop table atacc1; +-- test unique constraint adding +create table atacc1 ( test int ) ; +-- add a unique constraint +alter table atacc1 add constraint atacc_test1 unique (test); +-- insert first value +insert into atacc1 (test) values (2); +-- should fail +insert into atacc1 (test) values (2); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test)=(2) already exists. +-- should succeed +insert into atacc1 (test) values (4); +-- try to create duplicates via alter table using - should fail +alter table atacc1 alter column test type integer using 0; +ERROR: could not create unique index "atacc_test1" +DETAIL: Key (test)=(0) is duplicated. +drop table atacc1; +-- let's do one where the unique constraint fails when added +create table atacc1 ( test int ); +-- insert soon to be failing rows +insert into atacc1 (test) values (2); +insert into atacc1 (test) values (2); +-- add a unique constraint (fails) +alter table atacc1 add constraint atacc_test1 unique (test); +ERROR: could not create unique index "atacc_test1" +DETAIL: Key (test)=(2) is duplicated. +insert into atacc1 (test) values (3); +drop table atacc1; +-- let's do one where the unique constraint fails +-- because the column doesn't exist +create table atacc1 ( test int ); +-- add a unique constraint (fails) +alter table atacc1 add constraint atacc_test1 unique (test1); +ERROR: column "test1" named in key does not exist +drop table atacc1; +-- something a little more complicated +create table atacc1 ( test int, test2 int); +-- add a unique constraint +alter table atacc1 add constraint atacc_test1 unique (test, test2); +-- insert initial value +insert into atacc1 (test,test2) values (4,4); +-- should fail +insert into atacc1 (test,test2) values (4,4); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test, test2)=(4, 4) already exists. +-- should all succeed +insert into atacc1 (test,test2) values (4,5); +insert into atacc1 (test,test2) values (5,4); +insert into atacc1 (test,test2) values (5,5); +drop table atacc1; +-- lets do some naming tests +create table atacc1 (test int, test2 int, unique(test)); +alter table atacc1 add unique (test2); +-- should fail for @@ second one @@ +insert into atacc1 (test2, test) values (3, 3); +insert into atacc1 (test2, test) values (2, 3); +ERROR: duplicate key value violates unique constraint "atacc1_test_key" +DETAIL: Key (test)=(3) already exists. +drop table atacc1; +-- test primary key constraint adding +create table atacc1 ( id serial, test int) ; +-- add a primary key constraint +alter table atacc1 add constraint atacc_test1 primary key (test); +-- insert first value +insert into atacc1 (test) values (2); +-- should fail +insert into atacc1 (test) values (2); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test)=(2) already exists. +-- should succeed +insert into atacc1 (test) values (4); +-- inserting NULL should fail +insert into atacc1 (test) values(NULL); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (4, null). +-- try adding a second primary key (should fail) +alter table atacc1 add constraint atacc_oid1 primary key(id); +ERROR: multiple primary keys for table "atacc1" are not allowed +-- drop first primary key constraint +alter table atacc1 drop constraint atacc_test1 restrict; +-- try adding a primary key on oid (should succeed) +alter table atacc1 add constraint atacc_oid1 primary key(id); +drop table atacc1; +-- let's do one where the primary key constraint fails when added +create table atacc1 ( test int ); +-- insert soon to be failing rows +insert into atacc1 (test) values (2); +insert into atacc1 (test) values (2); +-- add a primary key (fails) +alter table atacc1 add constraint atacc_test1 primary key (test); +ERROR: could not create unique index "atacc_test1" +DETAIL: Key (test)=(2) is duplicated. +insert into atacc1 (test) values (3); +drop table atacc1; +-- let's do another one where the primary key constraint fails when added +create table atacc1 ( test int ); +-- insert soon to be failing row +insert into atacc1 (test) values (NULL); +-- add a primary key (fails) +alter table atacc1 add constraint atacc_test1 primary key (test); +ERROR: column "test" of relation "atacc1" contains null values +insert into atacc1 (test) values (3); +drop table atacc1; +-- let's do one where the primary key constraint fails +-- because the column doesn't exist +create table atacc1 ( test int ); +-- add a primary key constraint (fails) +alter table atacc1 add constraint atacc_test1 primary key (test1); +ERROR: column "test1" of relation "atacc1" does not exist +drop table atacc1; +-- adding a new column as primary key to a non-empty table. +-- should fail unless the column has a non-null default value. +create table atacc1 ( test int ); +insert into atacc1 (test) values (0); +-- add a primary key column without a default (fails). +alter table atacc1 add column test2 int primary key; +ERROR: column "test2" of relation "atacc1" contains null values +-- now add a primary key column with a default (succeeds). +alter table atacc1 add column test2 int default 0 primary key; +drop table atacc1; +-- this combination used to have order-of-execution problems (bug #15580) +create table atacc1 (a int); +insert into atacc1 values(1); +alter table atacc1 + add column b float8 not null default random(), + add primary key(a); +drop table atacc1; +-- additionally, we've seen issues with foreign key validation not being +-- properly delayed until after a table rewrite. Check that works ok. +create table atacc1 (a int primary key); +alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid; +alter table atacc1 validate constraint atacc1_fkey, alter a type bigint; +drop table atacc1; +-- we've also seen issues with check constraints being validated at the wrong +-- time when there's a pending table rewrite. +create table atacc1 (a bigint, b int); +insert into atacc1 values(1,1); +alter table atacc1 add constraint atacc1_chk check(b = 1) not valid; +alter table atacc1 validate constraint atacc1_chk, alter a type int; +drop table atacc1; +-- same as above, but ensure the constraint violation is detected +create table atacc1 (a bigint, b int); +insert into atacc1 values(1,2); +alter table atacc1 add constraint atacc1_chk check(b = 1) not valid; +alter table atacc1 validate constraint atacc1_chk, alter a type int; +ERROR: check constraint "atacc1_chk" of relation "atacc1" is violated by some row +drop table atacc1; +-- something a little more complicated +create table atacc1 ( test int, test2 int); +-- add a primary key constraint +alter table atacc1 add constraint atacc_test1 primary key (test, test2); +-- try adding a second primary key - should fail +alter table atacc1 add constraint atacc_test2 primary key (test); +ERROR: multiple primary keys for table "atacc1" are not allowed +-- insert initial value +insert into atacc1 (test,test2) values (4,4); +-- should fail +insert into atacc1 (test,test2) values (4,4); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test, test2)=(4, 4) already exists. +insert into atacc1 (test,test2) values (NULL,3); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, 3). +insert into atacc1 (test,test2) values (3, NULL); +ERROR: null value in column "test2" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (3, null). +insert into atacc1 (test,test2) values (NULL,NULL); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, null). +-- should all succeed +insert into atacc1 (test,test2) values (4,5); +insert into atacc1 (test,test2) values (5,4); +insert into atacc1 (test,test2) values (5,5); +drop table atacc1; +-- lets do some naming tests +create table atacc1 (test int, test2 int, primary key(test)); +-- only first should succeed +insert into atacc1 (test2, test) values (3, 3); +insert into atacc1 (test2, test) values (2, 3); +ERROR: duplicate key value violates unique constraint "atacc1_pkey" +DETAIL: Key (test)=(3) already exists. +insert into atacc1 (test2, test) values (1, NULL); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, 1). +drop table atacc1; +-- alter table / alter column [set/drop] not null tests +-- try altering system catalogs, should fail +alter table pg_class alter column relname drop not null; +ERROR: permission denied: "pg_class" is a system catalog +alter table pg_class alter relname set not null; +ERROR: permission denied: "pg_class" is a system catalog +-- try altering non-existent table, should fail +alter table non_existent alter column bar set not null; +ERROR: relation "non_existent" does not exist +alter table non_existent alter column bar drop not null; +ERROR: relation "non_existent" does not exist +-- test setting columns to null and not null and vice versa +-- test checking for null values and primary key +create table atacc1 (test int not null); +alter table atacc1 add constraint "atacc1_pkey" primary key (test); +alter table atacc1 alter column test drop not null; +ERROR: column "test" is in a primary key +alter table atacc1 drop constraint "atacc1_pkey"; +alter table atacc1 alter column test drop not null; +insert into atacc1 values (null); +alter table atacc1 alter test set not null; +ERROR: column "test" of relation "atacc1" contains null values +delete from atacc1; +alter table atacc1 alter test set not null; +-- try altering a non-existent column, should fail +alter table atacc1 alter bar set not null; +ERROR: column "bar" of relation "atacc1" does not exist +alter table atacc1 alter bar drop not null; +ERROR: column "bar" of relation "atacc1" does not exist +-- try creating a view and altering that, should fail +create view myview as select * from atacc1; +alter table myview alter column test drop not null; +ERROR: "myview" is not a table or foreign table +alter table myview alter column test set not null; +ERROR: "myview" is not a table or foreign table +drop view myview; +drop table atacc1; +-- set not null verified by constraints +create table atacc1 (test_a int, test_b int); +insert into atacc1 values (null, 1); +-- constraint not cover all values, should fail +alter table atacc1 add constraint atacc1_constr_or check(test_a is not null or test_b < 10); +alter table atacc1 alter test_a set not null; +ERROR: column "test_a" of relation "atacc1" contains null values +alter table atacc1 drop constraint atacc1_constr_or; +-- not valid constraint, should fail +alter table atacc1 add constraint atacc1_constr_invalid check(test_a is not null) not valid; +alter table atacc1 alter test_a set not null; +ERROR: column "test_a" of relation "atacc1" contains null values +alter table atacc1 drop constraint atacc1_constr_invalid; +-- with valid constraint +update atacc1 set test_a = 1; +alter table atacc1 add constraint atacc1_constr_a_valid check(test_a is not null); +alter table atacc1 alter test_a set not null; +delete from atacc1; +insert into atacc1 values (2, null); +alter table atacc1 alter test_a drop not null; +-- test multiple set not null at same time +-- test_a checked by atacc1_constr_a_valid, test_b should fail by table scan +alter table atacc1 alter test_a set not null, alter test_b set not null; +ERROR: column "test_b" of relation "atacc1" contains null values +-- commands order has no importance +alter table atacc1 alter test_b set not null, alter test_a set not null; +ERROR: column "test_b" of relation "atacc1" contains null values +-- valid one by table scan, one by check constraints +update atacc1 set test_b = 1; +alter table atacc1 alter test_b set not null, alter test_a set not null; +alter table atacc1 alter test_a drop not null, alter test_b drop not null; +-- both column has check constraints +alter table atacc1 add constraint atacc1_constr_b_valid check(test_b is not null); +alter table atacc1 alter test_b set not null, alter test_a set not null; +drop table atacc1; +-- test inheritance +create table parent (a int); +create table child (b varchar(255)) inherits (parent); +alter table parent alter a set not null; +insert into parent values (NULL); +ERROR: null value in column "a" of relation "parent" violates not-null constraint +DETAIL: Failing row contains (null). +insert into child (a, b) values (NULL, 'foo'); +ERROR: null value in column "a" of relation "child" violates not-null constraint +DETAIL: Failing row contains (null, foo). +alter table parent alter a drop not null; +insert into parent values (NULL); +insert into child (a, b) values (NULL, 'foo'); +alter table only parent alter a set not null; +ERROR: column "a" of relation "parent" contains null values +alter table child alter a set not null; +ERROR: column "a" of relation "child" contains null values +delete from parent; +alter table only parent alter a set not null; +insert into parent values (NULL); +ERROR: null value in column "a" of relation "parent" violates not-null constraint +DETAIL: Failing row contains (null). +alter table child alter a set not null; +insert into child (a, b) values (NULL, 'foo'); +ERROR: null value in column "a" of relation "child" violates not-null constraint +DETAIL: Failing row contains (null, foo). +delete from child; +alter table child alter a set not null; +insert into child (a, b) values (NULL, 'foo'); +ERROR: null value in column "a" of relation "child" violates not-null constraint +DETAIL: Failing row contains (null, foo). +drop table child; +drop table parent; +-- test setting and removing default values +create table def_test ( + c1 int4 default 5, + c2 text default 'initial_default' +); +insert into def_test default values; +alter table def_test alter column c1 drop default; +insert into def_test default values; +alter table def_test alter column c2 drop default; +insert into def_test default values; +alter table def_test alter column c1 set default 10; +alter table def_test alter column c2 set default 'new_default'; +insert into def_test default values; +select * from def_test; + c1 | c2 +----+----------------- + 5 | initial_default + | initial_default + | + 10 | new_default +(4 rows) + +-- set defaults to an incorrect type: this should fail +alter table def_test alter column c1 set default 'wrong_datatype'; +ERROR: invalid input syntax for type integer: "wrong_datatype" +alter table def_test alter column c2 set default 20; +-- set defaults on a non-existent column: this should fail +alter table def_test alter column c3 set default 30; +ERROR: column "c3" of relation "def_test" does not exist +-- set defaults on views: we need to create a view, add a rule +-- to allow insertions into it, and then alter the view to add +-- a default +create view def_view_test as select * from def_test; +create rule def_view_test_ins as + on insert to def_view_test + do instead insert into def_test select new.*; +insert into def_view_test default values; +alter table def_view_test alter column c1 set default 45; +insert into def_view_test default values; +alter table def_view_test alter column c2 set default 'view_default'; +insert into def_view_test default values; +select * from def_view_test; + c1 | c2 +----+----------------- + 5 | initial_default + | initial_default + | + 10 | new_default + | + 45 | + 45 | view_default +(7 rows) + +drop rule def_view_test_ins on def_view_test; +drop view def_view_test; +drop table def_test; +-- alter table / drop column tests +-- try altering system catalogs, should fail +alter table pg_class drop column relname; +ERROR: permission denied: "pg_class" is a system catalog +-- try altering non-existent table, should fail +alter table nosuchtable drop column bar; +ERROR: relation "nosuchtable" does not exist +-- test dropping columns +create table atacc1 (a int4 not null, b int4, c int4 not null, d int4); +insert into atacc1 values (1, 2, 3, 4); +alter table atacc1 drop a; +alter table atacc1 drop a; +ERROR: column "a" of relation "atacc1" does not exist +-- SELECTs +select * from atacc1; + b | c | d +---+---+--- + 2 | 3 | 4 +(1 row) + +select * from atacc1 order by a; +ERROR: column "a" does not exist +LINE 1: select * from atacc1 order by a; + ^ +select * from atacc1 order by "........pg.dropped.1........"; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select * from atacc1 order by "........pg.dropped.1........"... + ^ +select * from atacc1 group by a; +ERROR: column "a" does not exist +LINE 1: select * from atacc1 group by a; + ^ +select * from atacc1 group by "........pg.dropped.1........"; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select * from atacc1 group by "........pg.dropped.1........"... + ^ +select atacc1.* from atacc1; + b | c | d +---+---+--- + 2 | 3 | 4 +(1 row) + +select a from atacc1; +ERROR: column "a" does not exist +LINE 1: select a from atacc1; + ^ +select atacc1.a from atacc1; +ERROR: column atacc1.a does not exist +LINE 1: select atacc1.a from atacc1; + ^ +select b,c,d from atacc1; + b | c | d +---+---+--- + 2 | 3 | 4 +(1 row) + +select a,b,c,d from atacc1; +ERROR: column "a" does not exist +LINE 1: select a,b,c,d from atacc1; + ^ +select * from atacc1 where a = 1; +ERROR: column "a" does not exist +LINE 1: select * from atacc1 where a = 1; + ^ +select "........pg.dropped.1........" from atacc1; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select "........pg.dropped.1........" from atacc1; + ^ +select atacc1."........pg.dropped.1........" from atacc1; +ERROR: column atacc1.........pg.dropped.1........ does not exist +LINE 1: select atacc1."........pg.dropped.1........" from atacc1; + ^ +select "........pg.dropped.1........",b,c,d from atacc1; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select "........pg.dropped.1........",b,c,d from atacc1; + ^ +select * from atacc1 where "........pg.dropped.1........" = 1; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select * from atacc1 where "........pg.dropped.1........" = ... + ^ +-- UPDATEs +update atacc1 set a = 3; +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: update atacc1 set a = 3; + ^ +update atacc1 set b = 2 where a = 3; +ERROR: column "a" does not exist +LINE 1: update atacc1 set b = 2 where a = 3; + ^ +update atacc1 set "........pg.dropped.1........" = 3; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: update atacc1 set "........pg.dropped.1........" = 3; + ^ +update atacc1 set b = 2 where "........pg.dropped.1........" = 3; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: update atacc1 set b = 2 where "........pg.dropped.1........"... + ^ +-- INSERTs +insert into atacc1 values (10, 11, 12, 13); +ERROR: INSERT has more expressions than target columns +LINE 1: insert into atacc1 values (10, 11, 12, 13); + ^ +insert into atacc1 values (default, 11, 12, 13); +ERROR: INSERT has more expressions than target columns +LINE 1: insert into atacc1 values (default, 11, 12, 13); + ^ +insert into atacc1 values (11, 12, 13); +insert into atacc1 (a) values (10); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a) values (10); + ^ +insert into atacc1 (a) values (default); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a) values (default); + ^ +insert into atacc1 (a,b,c,d) values (10,11,12,13); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a,b,c,d) values (10,11,12,13); + ^ +insert into atacc1 (a,b,c,d) values (default,11,12,13); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a,b,c,d) values (default,11,12,13); + ^ +insert into atacc1 (b,c,d) values (11,12,13); +insert into atacc1 ("........pg.dropped.1........") values (10); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........") values (... + ^ +insert into atacc1 ("........pg.dropped.1........") values (default); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........") values (... + ^ +insert into atacc1 ("........pg.dropped.1........",b,c,d) values (10,11,12,13); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........",b,c,d) va... + ^ +insert into atacc1 ("........pg.dropped.1........",b,c,d) values (default,11,12,13); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........",b,c,d) va... + ^ +-- DELETEs +delete from atacc1 where a = 3; +ERROR: column "a" does not exist +LINE 1: delete from atacc1 where a = 3; + ^ +delete from atacc1 where "........pg.dropped.1........" = 3; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: delete from atacc1 where "........pg.dropped.1........" = 3; + ^ +delete from atacc1; +-- try dropping a non-existent column, should fail +alter table atacc1 drop bar; +ERROR: column "bar" of relation "atacc1" does not exist +-- try removing an oid column, should succeed (as it's nonexistent) +alter table atacc1 SET WITHOUT OIDS; +-- try adding an oid column, should fail (not supported) +alter table atacc1 SET WITH OIDS; +ERROR: syntax error at or near "WITH" +LINE 1: alter table atacc1 SET WITH OIDS; + ^ +-- try dropping the xmin column, should fail +alter table atacc1 drop xmin; +ERROR: cannot drop system column "xmin" +-- try creating a view and altering that, should fail +create view myview as select * from atacc1; +select * from myview; + b | c | d +---+---+--- +(0 rows) + +alter table myview drop d; +ERROR: "myview" is not a table, composite type, or foreign table +drop view myview; +-- test some commands to make sure they fail on the dropped column +analyze atacc1(a); +ERROR: column "a" of relation "atacc1" does not exist +analyze atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +vacuum analyze atacc1(a); +ERROR: column "a" of relation "atacc1" does not exist +vacuum analyze atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +comment on column atacc1.a is 'testing'; +ERROR: column "a" of relation "atacc1" does not exist +comment on column atacc1."........pg.dropped.1........" is 'testing'; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set storage plain; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set storage plain; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set statistics 0; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set statistics 0; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set default 3; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set default 3; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a drop default; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" drop default; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set not null; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set not null; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a drop not null; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" drop not null; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 rename a to x; +ERROR: column "a" does not exist +alter table atacc1 rename "........pg.dropped.1........" to x; +ERROR: column "........pg.dropped.1........" does not exist +alter table atacc1 add primary key(a); +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 add primary key("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 add unique(a); +ERROR: column "a" named in key does not exist +alter table atacc1 add unique("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" named in key does not exist +alter table atacc1 add check (a > 3); +ERROR: column "a" does not exist +alter table atacc1 add check ("........pg.dropped.1........" > 3); +ERROR: column "........pg.dropped.1........" does not exist +create table atacc2 (id int4 unique); +alter table atacc1 add foreign key (a) references atacc2(id); +ERROR: column "a" referenced in foreign key constraint does not exist +alter table atacc1 add foreign key ("........pg.dropped.1........") references atacc2(id); +ERROR: column "........pg.dropped.1........" referenced in foreign key constraint does not exist +alter table atacc2 add foreign key (id) references atacc1(a); +ERROR: column "a" referenced in foreign key constraint does not exist +alter table atacc2 add foreign key (id) references atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" referenced in foreign key constraint does not exist +drop table atacc2; +create index "testing_idx" on atacc1(a); +ERROR: column "a" does not exist +create index "testing_idx" on atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" does not exist +-- test create as and select into +insert into atacc1 values (21, 22, 23); +create table attest1 as select * from atacc1; +select * from attest1; + b | c | d +----+----+---- + 21 | 22 | 23 +(1 row) + +drop table attest1; +select * into attest2 from atacc1; +select * from attest2; + b | c | d +----+----+---- + 21 | 22 | 23 +(1 row) + +drop table attest2; +-- try dropping all columns +alter table atacc1 drop c; +alter table atacc1 drop d; +alter table atacc1 drop b; +select * from atacc1; +-- +(1 row) + +drop table atacc1; +-- test constraint error reporting in presence of dropped columns +create table atacc1 (id serial primary key, value int check (value < 10)); +insert into atacc1(value) values (100); +ERROR: new row for relation "atacc1" violates check constraint "atacc1_value_check" +DETAIL: Failing row contains (1, 100). +alter table atacc1 drop column value; +alter table atacc1 add column value int check (value < 10); +insert into atacc1(value) values (100); +ERROR: new row for relation "atacc1" violates check constraint "atacc1_value_check" +DETAIL: Failing row contains (2, 100). +insert into atacc1(id, value) values (null, 0); +ERROR: null value in column "id" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, 0). +drop table atacc1; +-- test inheritance +create table parent (a int, b int, c int); +insert into parent values (1, 2, 3); +alter table parent drop a; +create table child (d varchar(255)) inherits (parent); +insert into child values (12, 13, 'testing'); +select * from parent; + b | c +----+---- + 2 | 3 + 12 | 13 +(2 rows) + +select * from child; + b | c | d +----+----+--------- + 12 | 13 | testing +(1 row) + +alter table parent drop c; +select * from parent; + b +---- + 2 + 12 +(2 rows) + +select * from child; + b | d +----+--------- + 12 | testing +(1 row) + +drop table child; +drop table parent; +-- check error cases for inheritance column merging +create table parent (a float8, b numeric(10,4), c text collate "C"); +create table child (a float4) inherits (parent); -- fail +NOTICE: merging column "a" with inherited definition +ERROR: column "a" has a type conflict +DETAIL: double precision versus real +create table child (b decimal(10,7)) inherits (parent); -- fail +NOTICE: moving and merging column "b" with inherited definition +DETAIL: User-specified column moved to the position of the inherited column. +ERROR: column "b" has a type conflict +DETAIL: numeric(10,4) versus numeric(10,7) +create table child (c text collate "POSIX") inherits (parent); -- fail +NOTICE: moving and merging column "c" with inherited definition +DETAIL: User-specified column moved to the position of the inherited column. +ERROR: column "c" has a collation conflict +DETAIL: "C" versus "POSIX" +create table child (a double precision, b decimal(10,4)) inherits (parent); +NOTICE: merging column "a" with inherited definition +NOTICE: merging column "b" with inherited definition +drop table child; +drop table parent; +-- test copy in/out +create table attest (a int4, b int4, c int4); +insert into attest values (1,2,3); +alter table attest drop a; +copy attest to stdout; +2 3 +copy attest(a) to stdout; +ERROR: column "a" of relation "attest" does not exist +copy attest("........pg.dropped.1........") to stdout; +ERROR: column "........pg.dropped.1........" of relation "attest" does not exist +copy attest from stdin; +ERROR: extra data after last expected column +CONTEXT: COPY attest, line 1: "10 11 12" +select * from attest; + b | c +---+--- + 2 | 3 +(1 row) + +copy attest from stdin; +select * from attest; + b | c +----+---- + 2 | 3 + 21 | 22 +(2 rows) + +copy attest(a) from stdin; +ERROR: column "a" of relation "attest" does not exist +copy attest("........pg.dropped.1........") from stdin; +ERROR: column "........pg.dropped.1........" of relation "attest" does not exist +copy attest(b,c) from stdin; +select * from attest; + b | c +----+---- + 2 | 3 + 21 | 22 + 31 | 32 +(3 rows) + +drop table attest; +-- test inheritance +create table dropColumn (a int, b int, e int); +create table dropColumnChild (c int) inherits (dropColumn); +create table dropColumnAnother (d int) inherits (dropColumnChild); +-- these two should fail +alter table dropColumnchild drop column a; +ERROR: cannot drop inherited column "a" +alter table only dropColumnChild drop column b; +ERROR: cannot drop inherited column "b" +-- these three should work +alter table only dropColumn drop column e; +alter table dropColumnChild drop column c; +alter table dropColumn drop column a; +create table renameColumn (a int); +create table renameColumnChild (b int) inherits (renameColumn); +create table renameColumnAnother (c int) inherits (renameColumnChild); +-- these three should fail +alter table renameColumnChild rename column a to d; +ERROR: cannot rename inherited column "a" +alter table only renameColumnChild rename column a to d; +ERROR: inherited column "a" must be renamed in child tables too +alter table only renameColumn rename column a to d; +ERROR: inherited column "a" must be renamed in child tables too +-- these should work +alter table renameColumn rename column a to d; +alter table renameColumnChild rename column b to a; +-- these should work +alter table if exists doesnt_exist_tab rename column a to d; +NOTICE: relation "doesnt_exist_tab" does not exist, skipping +alter table if exists doesnt_exist_tab rename column b to a; +NOTICE: relation "doesnt_exist_tab" does not exist, skipping +-- this should work +alter table renameColumn add column w int; +-- this should fail +alter table only renameColumn add column x int; +ERROR: column must be added to child tables too +-- Test corner cases in dropping of inherited columns +create table p1 (f1 int, f2 int); +create table c1 (f1 int not null) inherits(p1); +NOTICE: merging column "f1" with inherited definition +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +-- should work +alter table p1 drop column f1; +-- c1.f1 is still there, but no longer inherited +select f1 from c1; + f1 +---- +(0 rows) + +alter table c1 drop column f1; +select f1 from c1; +ERROR: column "f1" does not exist +LINE 1: select f1 from c1; + ^ +HINT: Perhaps you meant to reference the column "c1.f2". +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1 (f1 int, f2 int); +create table c1 () inherits(p1); +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +alter table p1 drop column f1; +-- c1.f1 is dropped now, since there is no local definition for it +select f1 from c1; +ERROR: column "f1" does not exist +LINE 1: select f1 from c1; + ^ +HINT: Perhaps you meant to reference the column "c1.f2". +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1 (f1 int, f2 int); +create table c1 () inherits(p1); +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +alter table only p1 drop column f1; +-- c1.f1 is NOT dropped, but must now be considered non-inherited +alter table c1 drop column f1; +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1 (f1 int, f2 int); +create table c1 (f1 int not null) inherits(p1); +NOTICE: merging column "f1" with inherited definition +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +alter table only p1 drop column f1; +-- c1.f1 is still there, but no longer inherited +alter table c1 drop column f1; +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1(id int, name text); +create table p2(id2 int, name text, height int); +create table c1(age int) inherits(p1,p2); +NOTICE: merging multiple inherited definitions of column "name" +create table gc1() inherits (c1); +select relname, attname, attinhcount, attislocal +from pg_class join pg_attribute on (pg_class.oid = pg_attribute.attrelid) +where relname in ('p1','p2','c1','gc1') and attnum > 0 and not attisdropped +order by relname, attnum; + relname | attname | attinhcount | attislocal +---------+---------+-------------+------------ + c1 | id | 1 | f + c1 | name | 2 | f + c1 | id2 | 1 | f + c1 | height | 1 | f + c1 | age | 0 | t + gc1 | id | 1 | f + gc1 | name | 1 | f + gc1 | id2 | 1 | f + gc1 | height | 1 | f + gc1 | age | 1 | f + p1 | id | 0 | t + p1 | name | 0 | t + p2 | id2 | 0 | t + p2 | name | 0 | t + p2 | height | 0 | t +(15 rows) + +-- should work +alter table only p1 drop column name; +-- should work. Now c1.name is local and inhcount is 0. +alter table p2 drop column name; +-- should be rejected since its inherited +alter table gc1 drop column name; +ERROR: cannot drop inherited column "name" +-- should work, and drop gc1.name along +alter table c1 drop column name; +-- should fail: column does not exist +alter table gc1 drop column name; +ERROR: column "name" of relation "gc1" does not exist +-- should work and drop the attribute in all tables +alter table p2 drop column height; +-- IF EXISTS test +create table dropColumnExists (); +alter table dropColumnExists drop column non_existing; --fail +ERROR: column "non_existing" of relation "dropcolumnexists" does not exist +alter table dropColumnExists drop column if exists non_existing; --succeed +NOTICE: column "non_existing" of relation "dropcolumnexists" does not exist, skipping +select relname, attname, attinhcount, attislocal +from pg_class join pg_attribute on (pg_class.oid = pg_attribute.attrelid) +where relname in ('p1','p2','c1','gc1') and attnum > 0 and not attisdropped +order by relname, attnum; + relname | attname | attinhcount | attislocal +---------+---------+-------------+------------ + c1 | id | 1 | f + c1 | id2 | 1 | f + c1 | age | 0 | t + gc1 | id | 1 | f + gc1 | id2 | 1 | f + gc1 | age | 1 | f + p1 | id | 0 | t + p2 | id2 | 0 | t +(8 rows) + +drop table p1, p2 cascade; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table c1 +drop cascades to table gc1 +-- test attinhcount tracking with merged columns +create table depth0(); +create table depth1(c text) inherits (depth0); +create table depth2() inherits (depth1); +alter table depth0 add c text; +NOTICE: merging definition of column "c" for child "depth1" +select attrelid::regclass, attname, attinhcount, attislocal +from pg_attribute +where attnum > 0 and attrelid::regclass in ('depth0', 'depth1', 'depth2') +order by attrelid::regclass::text, attnum; + attrelid | attname | attinhcount | attislocal +----------+---------+-------------+------------ + depth0 | c | 0 | t + depth1 | c | 1 | t + depth2 | c | 1 | f +(3 rows) + +-- test renumbering of child-table columns in inherited operations +create table p1 (f1 int); +create table c1 (f2 text, f3 int) inherits (p1); +alter table p1 add column a1 int check (a1 > 0); +alter table p1 add column f2 text; +NOTICE: merging definition of column "f2" for child "c1" +insert into p1 values (1,2,'abc'); +insert into c1 values(11,'xyz',33,0); -- should fail +ERROR: new row for relation "c1" violates check constraint "p1_a1_check" +DETAIL: Failing row contains (11, xyz, 33, 0). +insert into c1 values(11,'xyz',33,22); +select * from p1; + f1 | a1 | f2 +----+----+----- + 1 | 2 | abc + 11 | 22 | xyz +(2 rows) + +update p1 set a1 = a1 + 1, f2 = upper(f2); +select * from p1; + f1 | a1 | f2 +----+----+----- + 1 | 3 | ABC + 11 | 23 | XYZ +(2 rows) + +drop table p1 cascade; +NOTICE: drop cascades to table c1 +-- test that operations with a dropped column do not try to reference +-- its datatype +create domain mytype as text; +create temp table foo (f1 text, f2 mytype, f3 text); +insert into foo values('bb','cc','dd'); +select * from foo; + f1 | f2 | f3 +----+----+---- + bb | cc | dd +(1 row) + +drop domain mytype cascade; +NOTICE: drop cascades to column f2 of table foo +select * from foo; + f1 | f3 +----+---- + bb | dd +(1 row) + +insert into foo values('qq','rr'); +select * from foo; + f1 | f3 +----+---- + bb | dd + qq | rr +(2 rows) + +update foo set f3 = 'zz'; +select * from foo; + f1 | f3 +----+---- + bb | zz + qq | zz +(2 rows) + +select f3,max(f1) from foo group by f3; + f3 | max +----+----- + zz | qq +(1 row) + +-- Simple tests for alter table column type +alter table foo alter f1 TYPE integer; -- fails +ERROR: column "f1" cannot be cast automatically to type integer +HINT: You might need to specify "USING f1::integer". +alter table foo alter f1 TYPE varchar(10); +create table anothertab (atcol1 serial8, atcol2 boolean, + constraint anothertab_chk check (atcol1 <= 3)); +insert into anothertab (atcol1, atcol2) values (default, true); +insert into anothertab (atcol1, atcol2) values (default, false); +select * from anothertab; + atcol1 | atcol2 +--------+-------- + 1 | t + 2 | f +(2 rows) + +alter table anothertab alter column atcol1 type boolean; -- fails +ERROR: column "atcol1" cannot be cast automatically to type boolean +HINT: You might need to specify "USING atcol1::boolean". +alter table anothertab alter column atcol1 type boolean using atcol1::int; -- fails +ERROR: result of USING clause for column "atcol1" cannot be cast automatically to type boolean +HINT: You might need to add an explicit cast. +alter table anothertab alter column atcol1 type integer; +select * from anothertab; + atcol1 | atcol2 +--------+-------- + 1 | t + 2 | f +(2 rows) + +insert into anothertab (atcol1, atcol2) values (45, null); -- fails +ERROR: new row for relation "anothertab" violates check constraint "anothertab_chk" +DETAIL: Failing row contains (45, null). +insert into anothertab (atcol1, atcol2) values (default, null); +select * from anothertab; + atcol1 | atcol2 +--------+-------- + 1 | t + 2 | f + 3 | +(3 rows) + +alter table anothertab alter column atcol2 type text + using case when atcol2 is true then 'IT WAS TRUE' + when atcol2 is false then 'IT WAS FALSE' + else 'IT WAS NULL!' end; +select * from anothertab; + atcol1 | atcol2 +--------+-------------- + 1 | IT WAS TRUE + 2 | IT WAS FALSE + 3 | IT WAS NULL! +(3 rows) + +alter table anothertab alter column atcol1 type boolean + using case when atcol1 % 2 = 0 then true else false end; -- fails +ERROR: default for column "atcol1" cannot be cast automatically to type boolean +alter table anothertab alter column atcol1 drop default; +alter table anothertab alter column atcol1 type boolean + using case when atcol1 % 2 = 0 then true else false end; -- fails +ERROR: operator does not exist: boolean <= integer +HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +alter table anothertab drop constraint anothertab_chk; +alter table anothertab drop constraint anothertab_chk; -- fails +ERROR: constraint "anothertab_chk" of relation "anothertab" does not exist +alter table anothertab drop constraint IF EXISTS anothertab_chk; -- succeeds +NOTICE: constraint "anothertab_chk" of relation "anothertab" does not exist, skipping +alter table anothertab alter column atcol1 type boolean + using case when atcol1 % 2 = 0 then true else false end; +select * from anothertab; + atcol1 | atcol2 +--------+-------------- + f | IT WAS TRUE + t | IT WAS FALSE + f | IT WAS NULL! +(3 rows) + +drop table anothertab; +-- Test index handling in alter table column type (cf. bugs #15835, #15865) +create table anothertab(f1 int primary key, f2 int unique, + f3 int, f4 int, f5 int); +alter table anothertab + add exclude using btree (f3 with =); +alter table anothertab + add exclude using btree (f4 with =) where (f4 is not null); +alter table anothertab + add exclude using btree (f4 with =) where (f5 > 0); +alter table anothertab + add unique(f1,f4); +create index on anothertab(f2,f3); +create unique index on anothertab(f4); +\d anothertab + Table "public.anothertab" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + f1 | integer | | not null | + f2 | integer | | | + f3 | integer | | | + f4 | integer | | | + f5 | integer | | | +Indexes: + "anothertab_pkey" PRIMARY KEY, btree (f1) + "anothertab_f1_f4_key" UNIQUE CONSTRAINT, btree (f1, f4) + "anothertab_f2_f3_idx" btree (f2, f3) + "anothertab_f2_key" UNIQUE CONSTRAINT, btree (f2) + "anothertab_f3_excl" EXCLUDE USING btree (f3 WITH =) + "anothertab_f4_excl" EXCLUDE USING btree (f4 WITH =) WHERE (f4 IS NOT NULL) + "anothertab_f4_excl1" EXCLUDE USING btree (f4 WITH =) WHERE (f5 > 0) + "anothertab_f4_idx" UNIQUE, btree (f4) + +alter table anothertab alter column f1 type bigint; +alter table anothertab + alter column f2 type bigint, + alter column f3 type bigint, + alter column f4 type bigint; +alter table anothertab alter column f5 type bigint; +\d anothertab + Table "public.anothertab" + Column | Type | Collation | Nullable | Default +--------+--------+-----------+----------+--------- + f1 | bigint | | not null | + f2 | bigint | | | + f3 | bigint | | | + f4 | bigint | | | + f5 | bigint | | | +Indexes: + "anothertab_pkey" PRIMARY KEY, btree (f1) + "anothertab_f1_f4_key" UNIQUE CONSTRAINT, btree (f1, f4) + "anothertab_f2_f3_idx" btree (f2, f3) + "anothertab_f2_key" UNIQUE CONSTRAINT, btree (f2) + "anothertab_f3_excl" EXCLUDE USING btree (f3 WITH =) + "anothertab_f4_excl" EXCLUDE USING btree (f4 WITH =) WHERE (f4 IS NOT NULL) + "anothertab_f4_excl1" EXCLUDE USING btree (f4 WITH =) WHERE (f5 > 0) + "anothertab_f4_idx" UNIQUE, btree (f4) + +drop table anothertab; +-- test that USING expressions are parsed before column alter type / drop steps +create table another (f1 int, f2 text, f3 text); +insert into another values(1, 'one', 'uno'); +insert into another values(2, 'two', 'due'); +insert into another values(3, 'three', 'tre'); +select * from another; + f1 | f2 | f3 +----+-------+----- + 1 | one | uno + 2 | two | due + 3 | three | tre +(3 rows) + +alter table another + alter f1 type text using f2 || ' and ' || f3 || ' more', + alter f2 type bigint using f1 * 10, + drop column f3; +select * from another; + f1 | f2 +--------------------+---- + one and uno more | 10 + two and due more | 20 + three and tre more | 30 +(3 rows) + +drop table another; +-- Create an index that skips WAL, then perform a SET DATA TYPE that skips +-- rewriting the index. +begin; +create table skip_wal_skip_rewrite_index (c varchar(10) primary key); +alter table skip_wal_skip_rewrite_index alter c type varchar(20); +commit; +-- table's row type +create table tab1 (a int, b text); +create table tab2 (x int, y tab1); +alter table tab1 alter column b type varchar; -- fails +ERROR: cannot alter table "tab1" because column "tab2.y" uses its row type +-- Alter column type that's part of a partitioned index +create table at_partitioned (a int, b text) partition by range (a); +create table at_part_1 partition of at_partitioned for values from (0) to (1000); +insert into at_partitioned values (512, '0.123'); +create table at_part_2 (b text, a int); +insert into at_part_2 values ('1.234', 1024); +create index on at_partitioned (b); +create index on at_partitioned (a); +\d at_part_1 + Table "public.at_part_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | +Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) +Indexes: + "at_part_1_a_idx" btree (a) + "at_part_1_b_idx" btree (b) + +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | text | | | + a | integer | | | + +alter table at_partitioned attach partition at_part_2 for values from (1000) to (2000); +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | text | | | + a | integer | | | +Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) +Indexes: + "at_part_2_a_idx" btree (a) + "at_part_2_b_idx" btree (b) + +alter table at_partitioned alter column b type numeric using b::numeric; +\d at_part_1 + Table "public.at_part_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | numeric | | | +Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) +Indexes: + "at_part_1_a_idx" btree (a) + "at_part_1_b_idx" btree (b) + +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | numeric | | | + a | integer | | | +Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) +Indexes: + "at_part_2_a_idx" btree (a) + "at_part_2_b_idx" btree (b) + +drop table at_partitioned; +-- Alter column type when no table rewrite is required +-- Also check that comments are preserved +create table at_partitioned(id int, name varchar(64), unique (id, name)) + partition by hash(id); +comment on constraint at_partitioned_id_name_key on at_partitioned is 'parent constraint'; +comment on index at_partitioned_id_name_key is 'parent index'; +create table at_partitioned_0 partition of at_partitioned + for values with (modulus 2, remainder 0); +comment on constraint at_partitioned_0_id_name_key on at_partitioned_0 is 'child 0 constraint'; +comment on index at_partitioned_0_id_name_key is 'child 0 index'; +create table at_partitioned_1 partition of at_partitioned + for values with (modulus 2, remainder 1); +comment on constraint at_partitioned_1_id_name_key on at_partitioned_1 is 'child 1 constraint'; +comment on index at_partitioned_1_id_name_key is 'child 1 index'; +insert into at_partitioned values(1, 'foo'); +insert into at_partitioned values(3, 'bar'); +create temp table old_oids as + select relname, oid as oldoid, relfilenode as oldfilenode + from pg_class where relname like 'at_partitioned%'; +select relname, + c.oid = oldoid as orig_oid, + case relfilenode + when 0 then 'none' + when c.oid then 'own' + when oldfilenode then 'orig' + else 'OTHER' + end as storage, + obj_description(c.oid, 'pg_class') as desc + from pg_class c left join old_oids using (relname) + where relname like 'at_partitioned%' + order by relname; + relname | orig_oid | storage | desc +------------------------------+----------+---------+--------------- + at_partitioned | t | none | + at_partitioned_0 | t | own | + at_partitioned_0_id_name_key | t | own | child 0 index + at_partitioned_1 | t | own | + at_partitioned_1_id_name_key | t | own | child 1 index + at_partitioned_id_name_key | t | none | parent index +(6 rows) + +select conname, obj_description(oid, 'pg_constraint') as desc + from pg_constraint where conname like 'at_partitioned%' + order by conname; + conname | desc +------------------------------+-------------------- + at_partitioned_0_id_name_key | child 0 constraint + at_partitioned_1_id_name_key | child 1 constraint + at_partitioned_id_name_key | parent constraint +(3 rows) + +alter table at_partitioned alter column name type varchar(127); +-- Note: these tests currently show the wrong behavior for comments :-( +select relname, + c.oid = oldoid as orig_oid, + case relfilenode + when 0 then 'none' + when c.oid then 'own' + when oldfilenode then 'orig' + else 'OTHER' + end as storage, + obj_description(c.oid, 'pg_class') as desc + from pg_class c left join old_oids using (relname) + where relname like 'at_partitioned%' + order by relname; + relname | orig_oid | storage | desc +------------------------------+----------+---------+-------------- + at_partitioned | t | none | + at_partitioned_0 | t | own | + at_partitioned_0_id_name_key | f | own | parent index + at_partitioned_1 | t | own | + at_partitioned_1_id_name_key | f | own | parent index + at_partitioned_id_name_key | f | none | parent index +(6 rows) + +select conname, obj_description(oid, 'pg_constraint') as desc + from pg_constraint where conname like 'at_partitioned%' + order by conname; + conname | desc +------------------------------+------------------- + at_partitioned_0_id_name_key | + at_partitioned_1_id_name_key | + at_partitioned_id_name_key | parent constraint +(3 rows) + +-- Don't remove this DROP, it exposes bug #15672 +drop table at_partitioned; +-- disallow recursive containment of row types +create temp table recur1 (f1 int); +alter table recur1 add column f2 recur1; -- fails +ERROR: composite type recur1 cannot be made a member of itself +alter table recur1 add column f2 recur1[]; -- fails +ERROR: composite type recur1 cannot be made a member of itself +create domain array_of_recur1 as recur1[]; +alter table recur1 add column f2 array_of_recur1; -- fails +ERROR: composite type recur1 cannot be made a member of itself +create temp table recur2 (f1 int, f2 recur1); +alter table recur1 add column f2 recur2; -- fails +ERROR: composite type recur1 cannot be made a member of itself +alter table recur1 add column f2 int; +alter table recur1 alter column f2 type recur2; -- fails +ERROR: composite type recur1 cannot be made a member of itself +-- SET STORAGE may need to add a TOAST table +create table test_storage (a text); +alter table test_storage alter a set storage plain; +alter table test_storage add b int default 0; -- rewrite table to remove its TOAST table +alter table test_storage alter a set storage extended; -- re-add TOAST table +select reltoastrelid <> 0 as has_toast_table +from pg_class +where oid = 'test_storage'::regclass; + has_toast_table +----------------- + t +(1 row) + +-- test that SET STORAGE propagates to index correctly +create index test_storage_idx on test_storage (b, a); +alter table test_storage alter column a set storage external; +\d+ test_storage + Table "public.test_storage" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + a | text | | | | external | | + b | integer | | | 0 | plain | | +Indexes: + "test_storage_idx" btree (b, a) + +\d+ test_storage_idx + Index "public.test_storage_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+----------+-------------- + b | integer | yes | b | plain | + a | text | yes | a | external | +btree, for table "public.test_storage" + +-- ALTER COLUMN TYPE with a check constraint and a child table (bug #13779) +CREATE TABLE test_inh_check (a float check (a > 10.2), b float); +CREATE TABLE test_inh_check_child() INHERITS(test_inh_check); +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | double precision | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | double precision | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(2 rows) + +ALTER TABLE test_inh_check ALTER COLUMN a TYPE numeric; +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(2 rows) + +-- also try noinherit, local, and local+inherited cases +ALTER TABLE test_inh_check ADD CONSTRAINT bnoinherit CHECK (b > 100) NO INHERIT; +ALTER TABLE test_inh_check_child ADD CONSTRAINT blocal CHECK (b < 1000); +ALTER TABLE test_inh_check_child ADD CONSTRAINT bmerged CHECK (b > 1); +ALTER TABLE test_inh_check ADD CONSTRAINT bmerged CHECK (b > 1); +NOTICE: merging constraint "bmerged" with inherited definition +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "bmerged" CHECK (b > 1::double precision) + "bnoinherit" CHECK (b > 100::double precision) NO INHERIT + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "blocal" CHECK (b < 1000::double precision) + "bmerged" CHECK (b > 1::double precision) + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | bmerged | 0 | t | f + test_inh_check | bnoinherit | 0 | t | t + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | blocal | 0 | t | f + test_inh_check_child | bmerged | 1 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(6 rows) + +ALTER TABLE test_inh_check ALTER COLUMN b TYPE numeric; +NOTICE: merging constraint "bmerged" with inherited definition +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | numeric | | | + b | numeric | | | +Check constraints: + "bmerged" CHECK (b::double precision > 1::double precision) + "bnoinherit" CHECK (b::double precision > 100::double precision) NO INHERIT + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | numeric | | | + b | numeric | | | +Check constraints: + "blocal" CHECK (b::double precision < 1000::double precision) + "bmerged" CHECK (b::double precision > 1::double precision) + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | bmerged | 0 | t | f + test_inh_check | bnoinherit | 0 | t | t + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | blocal | 0 | t | f + test_inh_check_child | bmerged | 1 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(6 rows) + +-- ALTER COLUMN TYPE with different schema in children +-- Bug at https://postgr.es/m/20170102225618.GA10071@telsasoft.com +CREATE TABLE test_type_diff (f1 int); +CREATE TABLE test_type_diff_c (extra smallint) INHERITS (test_type_diff); +ALTER TABLE test_type_diff ADD COLUMN f2 int; +INSERT INTO test_type_diff_c VALUES (1, 2, 3); +ALTER TABLE test_type_diff ALTER COLUMN f2 TYPE bigint USING f2::bigint; +CREATE TABLE test_type_diff2 (int_two int2, int_four int4, int_eight int8); +CREATE TABLE test_type_diff2_c1 (int_four int4, int_eight int8, int_two int2); +CREATE TABLE test_type_diff2_c2 (int_eight int8, int_two int2, int_four int4); +CREATE TABLE test_type_diff2_c3 (int_two int2, int_four int4, int_eight int8); +ALTER TABLE test_type_diff2_c1 INHERIT test_type_diff2; +ALTER TABLE test_type_diff2_c2 INHERIT test_type_diff2; +ALTER TABLE test_type_diff2_c3 INHERIT test_type_diff2; +INSERT INTO test_type_diff2_c1 VALUES (1, 2, 3); +INSERT INTO test_type_diff2_c2 VALUES (4, 5, 6); +INSERT INTO test_type_diff2_c3 VALUES (7, 8, 9); +ALTER TABLE test_type_diff2 ALTER COLUMN int_four TYPE int8 USING int_four::int8; +-- whole-row references are disallowed +ALTER TABLE test_type_diff2 ALTER COLUMN int_four TYPE int4 USING (pg_column_size(test_type_diff2)); +ERROR: cannot convert whole-row table reference +DETAIL: USING expression contains a whole-row table reference. +-- check for rollback of ANALYZE corrupting table property flags (bug #11638) +CREATE TABLE check_fk_presence_1 (id int PRIMARY KEY, t text); +CREATE TABLE check_fk_presence_2 (id int REFERENCES check_fk_presence_1, t text); +BEGIN; +ALTER TABLE check_fk_presence_2 DROP CONSTRAINT check_fk_presence_2_id_fkey; +ANALYZE check_fk_presence_2; +ROLLBACK; +\d check_fk_presence_2 + Table "public.check_fk_presence_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | | + t | text | | | +Foreign-key constraints: + "check_fk_presence_2_id_fkey" FOREIGN KEY (id) REFERENCES check_fk_presence_1(id) + +DROP TABLE check_fk_presence_1, check_fk_presence_2; +-- check column addition within a view (bug #14876) +create table at_base_table(id int, stuff text); +insert into at_base_table values (23, 'skidoo'); +create view at_view_1 as select * from at_base_table bt; +create view at_view_2 as select *, to_json(v1) as j from at_view_1 v1; +\d+ at_view_1 + View "public.at_view_1" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | +View definition: + SELECT bt.id, + bt.stuff + FROM at_base_table bt; + +\d+ at_view_2 + View "public.at_view_2" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | + j | json | | | | extended | +View definition: + SELECT v1.id, + v1.stuff, + to_json(v1.*) AS j + FROM at_view_1 v1; + +explain (verbose, costs off) select * from at_view_2; + QUERY PLAN +---------------------------------------------------------- + Seq Scan on public.at_base_table bt + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff)) +(2 rows) + +select * from at_view_2; + id | stuff | j +----+--------+---------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo"} +(1 row) + +create or replace view at_view_1 as select *, 2+2 as more from at_base_table bt; +\d+ at_view_1 + View "public.at_view_1" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | + more | integer | | | | plain | +View definition: + SELECT bt.id, + bt.stuff, + 2 + 2 AS more + FROM at_base_table bt; + +\d+ at_view_2 + View "public.at_view_2" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | + j | json | | | | extended | +View definition: + SELECT v1.id, + v1.stuff, + to_json(v1.*) AS j + FROM at_view_1 v1; + +explain (verbose, costs off) select * from at_view_2; + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on public.at_base_table bt + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, NULL)) +(2 rows) + +select * from at_view_2; + id | stuff | j +----+--------+---------------------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo","more":null} +(1 row) + +drop view at_view_2; +drop view at_view_1; +drop table at_base_table; +-- check adding a column not iself requiring a rewrite, together with +-- a column requiring a default (bug #16038) +-- ensure that rewrites aren't silently optimized away, removing the +-- value of the test +CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) +RETURNS boolean +LANGUAGE plpgsql AS $$ +DECLARE + v_relfilenode oid; +BEGIN + v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; + + EXECUTE p_ddl; + + RETURN v_relfilenode <> (SELECT relfilenode FROM pg_class WHERE oid = p_tablename); +END; +$$; +CREATE TABLE rewrite_test(col text); +INSERT INTO rewrite_test VALUES ('something'); +INSERT INTO rewrite_test VALUES (NULL); +-- empty[12] don't need rewrite, but notempty[12]_rewrite will force one +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN empty1 text, + ADD COLUMN notempty1_rewrite serial; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN notempty2_rewrite serial, + ADD COLUMN empty2 text; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +-- also check that fast defaults cause no problem, first without rewrite +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN empty3 text, + ADD COLUMN notempty3_norewrite int default 42; +$$); + check_ddl_rewrite +------------------- + f +(1 row) + +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN notempty4_norewrite int default 42, + ADD COLUMN empty4 text; +$$); + check_ddl_rewrite +------------------- + f +(1 row) + +-- then with rewrite +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN empty5 text, + ADD COLUMN notempty5_norewrite int default 42, + ADD COLUMN notempty5_rewrite serial; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN notempty6_rewrite serial, + ADD COLUMN empty6 text, + ADD COLUMN notempty6_norewrite int default 42; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +-- cleanup +DROP FUNCTION check_ddl_rewrite(regclass, text); +DROP TABLE rewrite_test; +-- +-- lock levels +-- +drop type lockmodes; +ERROR: type "lockmodes" does not exist +create type lockmodes as enum ( + 'SIReadLock' +,'AccessShareLock' +,'RowShareLock' +,'RowExclusiveLock' +,'ShareUpdateExclusiveLock' +,'ShareLock' +,'ShareRowExclusiveLock' +,'ExclusiveLock' +,'AccessExclusiveLock' +); +drop view my_locks; +ERROR: view "my_locks" does not exist +create or replace view my_locks as +select case when c.relname like 'pg_toast%' then 'pg_toast' else c.relname end, max(mode::lockmodes) as max_lockmode +from pg_locks l join pg_class c on l.relation = c.oid +where virtualtransaction = ( + select virtualtransaction + from pg_locks + where transactionid = pg_current_xact_id()::xid) +and locktype = 'relation' +and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') +and c.relname != 'my_locks' +group by c.relname; +create table alterlock (f1 int primary key, f2 text); +insert into alterlock values (1, 'foo'); +create table alterlock2 (f3 int primary key, f1 int); +insert into alterlock2 values (1, 1); +begin; alter table alterlock alter column f2 set statistics 150; +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock +(1 row) + +rollback; +begin; alter table alterlock cluster on alterlock_pkey; +select * from my_locks order by 1; + relname | max_lockmode +----------------+-------------------------- + alterlock | ShareUpdateExclusiveLock + alterlock_pkey | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock set without cluster; +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock +(1 row) + +commit; +begin; alter table alterlock set (fillfactor = 100); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock reset (fillfactor); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock set (toast.autovacuum_enabled = off); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock set (autovacuum_enabled = off); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock alter column f2 set (n_distinct = 1); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock +(1 row) + +rollback; +-- test that mixing options with different lock levels works as expected +begin; alter table alterlock set (autovacuum_enabled = off, fillfactor = 80); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock alter column f2 set storage extended; +select * from my_locks order by 1; + relname | max_lockmode +-----------+--------------------- + alterlock | AccessExclusiveLock +(1 row) + +rollback; +begin; alter table alterlock alter column f2 set default 'x'; +select * from my_locks order by 1; + relname | max_lockmode +-----------+--------------------- + alterlock | AccessExclusiveLock +(1 row) + +rollback; +begin; +create trigger ttdummy + before delete or update on alterlock + for each row + execute procedure + ttdummy (1, 1); +select * from my_locks order by 1; + relname | max_lockmode +-----------+----------------------- + alterlock | ShareRowExclusiveLock +(1 row) + +rollback; +begin; +select * from my_locks order by 1; + relname | max_lockmode +---------+-------------- +(0 rows) + +alter table alterlock2 add foreign key (f1) references alterlock (f1); +select * from my_locks order by 1; + relname | max_lockmode +-----------------+----------------------- + alterlock | ShareRowExclusiveLock + alterlock2 | ShareRowExclusiveLock + alterlock2_pkey | AccessShareLock + alterlock_pkey | AccessShareLock +(4 rows) + +rollback; +begin; +alter table alterlock2 +add constraint alterlock2nv foreign key (f1) references alterlock (f1) NOT VALID; +select * from my_locks order by 1; + relname | max_lockmode +------------+----------------------- + alterlock | ShareRowExclusiveLock + alterlock2 | ShareRowExclusiveLock +(2 rows) + +commit; +begin; +alter table alterlock2 validate constraint alterlock2nv; +select * from my_locks order by 1; + relname | max_lockmode +-----------------+-------------------------- + alterlock | RowShareLock + alterlock2 | ShareUpdateExclusiveLock + alterlock2_pkey | AccessShareLock + alterlock_pkey | AccessShareLock +(4 rows) + +rollback; +create or replace view my_locks as +select case when c.relname like 'pg_toast%' then 'pg_toast' else c.relname end, max(mode::lockmodes) as max_lockmode +from pg_locks l join pg_class c on l.relation = c.oid +where virtualtransaction = ( + select virtualtransaction + from pg_locks + where transactionid = pg_current_xact_id()::xid) +and locktype = 'relation' +and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') +and c.relname = 'my_locks' +group by c.relname; +-- raise exception +alter table my_locks set (autovacuum_enabled = false); +ERROR: unrecognized parameter "autovacuum_enabled" +alter view my_locks set (autovacuum_enabled = false); +ERROR: unrecognized parameter "autovacuum_enabled" +alter table my_locks reset (autovacuum_enabled); +alter view my_locks reset (autovacuum_enabled); +begin; +alter view my_locks set (security_barrier=off); +select * from my_locks order by 1; + relname | max_lockmode +----------+--------------------- + my_locks | AccessExclusiveLock +(1 row) + +alter view my_locks reset (security_barrier); +rollback; +-- this test intentionally applies the ALTER TABLE command against a view, but +-- uses a view option so we expect this to succeed. This form of SQL is +-- accepted for historical reasons, as shown in the docs for ALTER VIEW +begin; +alter table my_locks set (security_barrier=off); +select * from my_locks order by 1; + relname | max_lockmode +----------+--------------------- + my_locks | AccessExclusiveLock +(1 row) + +alter table my_locks reset (security_barrier); +rollback; +-- cleanup +drop table alterlock2; +drop table alterlock; +drop view my_locks; +drop type lockmodes; +-- +-- alter function +-- +create function test_strict(text) returns text as + 'select coalesce($1, ''got passed a null'');' + language sql returns null on null input; +select test_strict(NULL); + test_strict +------------- + +(1 row) + +alter function test_strict(text) called on null input; +select test_strict(NULL); + test_strict +------------------- + got passed a null +(1 row) + +create function non_strict(text) returns text as + 'select coalesce($1, ''got passed a null'');' + language sql called on null input; +select non_strict(NULL); + non_strict +------------------- + got passed a null +(1 row) + +alter function non_strict(text) returns null on null input; +select non_strict(NULL); + non_strict +------------ + +(1 row) + +-- +-- alter object set schema +-- +create schema alter1; +create schema alter2; +create table alter1.t1(f1 serial primary key, f2 int check (f2 > 0)); +create view alter1.v1 as select * from alter1.t1; +create function alter1.plus1(int) returns int as 'select $1+1' language sql; +create domain alter1.posint integer check (value > 0); +create type alter1.ctype as (f1 int, f2 text); +create function alter1.same(alter1.ctype, alter1.ctype) returns boolean language sql +as 'select $1.f1 is not distinct from $2.f1 and $1.f2 is not distinct from $2.f2'; +create operator alter1.=(procedure = alter1.same, leftarg = alter1.ctype, rightarg = alter1.ctype); +create operator class alter1.ctype_hash_ops default for type alter1.ctype using hash as + operator 1 alter1.=(alter1.ctype, alter1.ctype); +create conversion alter1.latin1_to_utf8 for 'latin1' to 'utf8' from iso8859_1_to_utf8; +create text search parser alter1.prs(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype); +create text search configuration alter1.cfg(parser = alter1.prs); +create text search template alter1.tmpl(init = dsimple_init, lexize = dsimple_lexize); +create text search dictionary alter1.dict(template = alter1.tmpl); +insert into alter1.t1(f2) values(11); +insert into alter1.t1(f2) values(12); +alter table alter1.t1 set schema alter1; -- no-op, same schema +alter table alter1.t1 set schema alter2; +alter table alter1.v1 set schema alter2; +alter function alter1.plus1(int) set schema alter2; +alter domain alter1.posint set schema alter2; +alter operator class alter1.ctype_hash_ops using hash set schema alter2; +alter operator family alter1.ctype_hash_ops using hash set schema alter2; +alter operator alter1.=(alter1.ctype, alter1.ctype) set schema alter2; +alter function alter1.same(alter1.ctype, alter1.ctype) set schema alter2; +alter type alter1.ctype set schema alter1; -- no-op, same schema +alter type alter1.ctype set schema alter2; +alter conversion alter1.latin1_to_utf8 set schema alter2; +alter text search parser alter1.prs set schema alter2; +alter text search configuration alter1.cfg set schema alter2; +alter text search template alter1.tmpl set schema alter2; +alter text search dictionary alter1.dict set schema alter2; +-- this should succeed because nothing is left in alter1 +drop schema alter1; +insert into alter2.t1(f2) values(13); +insert into alter2.t1(f2) values(14); +select * from alter2.t1; + f1 | f2 +----+---- + 1 | 11 + 2 | 12 + 3 | 13 + 4 | 14 +(4 rows) + +select * from alter2.v1; + f1 | f2 +----+---- + 1 | 11 + 2 | 12 + 3 | 13 + 4 | 14 +(4 rows) + +select alter2.plus1(41); + plus1 +------- + 42 +(1 row) + +-- clean up +drop schema alter2 cascade; +NOTICE: drop cascades to 13 other objects +DETAIL: drop cascades to table alter2.t1 +drop cascades to view alter2.v1 +drop cascades to function alter2.plus1(integer) +drop cascades to type alter2.posint +drop cascades to type alter2.ctype +drop cascades to function alter2.same(alter2.ctype,alter2.ctype) +drop cascades to operator alter2.=(alter2.ctype,alter2.ctype) +drop cascades to operator family alter2.ctype_hash_ops for access method hash +drop cascades to conversion alter2.latin1_to_utf8 +drop cascades to text search parser alter2.prs +drop cascades to text search configuration alter2.cfg +drop cascades to text search template alter2.tmpl +drop cascades to text search dictionary alter2.dict +-- +-- composite types +-- +CREATE TYPE test_type AS (a int); +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + +ALTER TYPE nosuchtype ADD ATTRIBUTE b text; -- fails +ERROR: relation "nosuchtype" does not exist +ALTER TYPE test_type ADD ATTRIBUTE b text; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + +ALTER TYPE test_type ADD ATTRIBUTE b text; -- fails +ERROR: column "b" of relation "test_type" already exists +ALTER TYPE test_type ALTER ATTRIBUTE b SET DATA TYPE varchar; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + a | integer | | | + b | character varying | | | + +ALTER TYPE test_type ALTER ATTRIBUTE b SET DATA TYPE integer; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + +ALTER TYPE test_type DROP ATTRIBUTE b; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + +ALTER TYPE test_type DROP ATTRIBUTE c; -- fails +ERROR: column "c" of relation "test_type" does not exist +ALTER TYPE test_type DROP ATTRIBUTE IF EXISTS c; +NOTICE: column "c" of relation "test_type" does not exist, skipping +ALTER TYPE test_type DROP ATTRIBUTE a, ADD ATTRIBUTE d boolean; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + d | boolean | | | + +ALTER TYPE test_type RENAME ATTRIBUTE a TO aa; +ERROR: column "a" does not exist +ALTER TYPE test_type RENAME ATTRIBUTE d TO dd; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + dd | boolean | | | + +DROP TYPE test_type; +CREATE TYPE test_type1 AS (a int, b text); +CREATE TABLE test_tbl1 (x int, y test_type1); +ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails +ERROR: cannot alter type "test_type1" because column "test_tbl1.y" uses it +CREATE TYPE test_type2 AS (a int, b text); +CREATE TABLE test_tbl2 OF test_type2; +CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2); +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 ADD ATTRIBUTE c text; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 ADD ATTRIBUTE c text CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 ALTER ATTRIBUTE b TYPE varchar; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 ALTER ATTRIBUTE b TYPE varchar CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + a | integer | | | + b | character varying | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + a | integer | | | + b | character varying | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 DROP ATTRIBUTE b; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 DROP ATTRIBUTE b CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + aa | integer | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + aa | integer | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +\d test_tbl2_subclass + Table "public.test_tbl2_subclass" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + aa | integer | | | + c | text | | | +Inherits: test_tbl2 + +DROP TABLE test_tbl2_subclass; +CREATE TYPE test_typex AS (a int, b text); +CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0)); +ALTER TYPE test_typex DROP ATTRIBUTE a; -- fails +ERROR: cannot drop column a of composite type test_typex because other objects depend on it +DETAIL: constraint test_tblx_y_check on table test_tblx depends on column a of composite type test_typex +HINT: Use DROP ... CASCADE to drop the dependent objects too. +ALTER TYPE test_typex DROP ATTRIBUTE a CASCADE; +NOTICE: drop cascades to constraint test_tblx_y_check on table test_tblx +\d test_tblx + Table "public.test_tblx" + Column | Type | Collation | Nullable | Default +--------+------------+-----------+----------+--------- + x | integer | | | + y | test_typex | | | + +DROP TABLE test_tblx; +DROP TYPE test_typex; +-- This test isn't that interesting on its own, but the purpose is to leave +-- behind a table to test pg_upgrade with. The table has a composite type +-- column in it, and the composite type has a dropped attribute. +CREATE TYPE test_type3 AS (a int); +CREATE TABLE test_tbl3 (c) AS SELECT '(1)'::test_type3; +ALTER TYPE test_type3 DROP ATTRIBUTE a, ADD ATTRIBUTE b int; +CREATE TYPE test_type_empty AS (); +DROP TYPE test_type_empty; +-- +-- typed tables: OF / NOT OF +-- +CREATE TYPE tt_t0 AS (z inet, x int, y numeric(8,2)); +ALTER TYPE tt_t0 DROP ATTRIBUTE z; +CREATE TABLE tt0 (x int NOT NULL, y numeric(8,2)); -- OK +CREATE TABLE tt1 (x int, y bigint); -- wrong base type +CREATE TABLE tt2 (x int, y numeric(9,2)); -- wrong typmod +CREATE TABLE tt3 (y numeric(8,2), x int); -- wrong column order +CREATE TABLE tt4 (x int); -- too few columns +CREATE TABLE tt5 (x int, y numeric(8,2), z int); -- too few columns +CREATE TABLE tt6 () INHERITS (tt0); -- can't have a parent +CREATE TABLE tt7 (x int, q text, y numeric(8,2)); +ALTER TABLE tt7 DROP q; -- OK +ALTER TABLE tt0 OF tt_t0; +ALTER TABLE tt1 OF tt_t0; +ERROR: table "tt1" has different type for column "y" +ALTER TABLE tt2 OF tt_t0; +ERROR: table "tt2" has different type for column "y" +ALTER TABLE tt3 OF tt_t0; +ERROR: table has column "y" where type requires "x" +ALTER TABLE tt4 OF tt_t0; +ERROR: table is missing column "y" +ALTER TABLE tt5 OF tt_t0; +ERROR: table has extra column "z" +ALTER TABLE tt6 OF tt_t0; +ERROR: typed tables cannot inherit +ALTER TABLE tt7 OF tt_t0; +CREATE TYPE tt_t1 AS (x int, y numeric(8,2)); +ALTER TABLE tt7 OF tt_t1; -- reassign an already-typed table +ALTER TABLE tt7 NOT OF; +\d tt7 + Table "public.tt7" + Column | Type | Collation | Nullable | Default +--------+--------------+-----------+----------+--------- + x | integer | | | + y | numeric(8,2) | | | + +-- make sure we can drop a constraint on the parent but it remains on the child +CREATE TABLE test_drop_constr_parent (c text CHECK (c IS NOT NULL)); +CREATE TABLE test_drop_constr_child () INHERITS (test_drop_constr_parent); +ALTER TABLE ONLY test_drop_constr_parent DROP CONSTRAINT "test_drop_constr_parent_c_check"; +-- should fail +INSERT INTO test_drop_constr_child (c) VALUES (NULL); +ERROR: new row for relation "test_drop_constr_child" violates check constraint "test_drop_constr_parent_c_check" +DETAIL: Failing row contains (null). +DROP TABLE test_drop_constr_parent CASCADE; +NOTICE: drop cascades to table test_drop_constr_child +-- +-- IF EXISTS test +-- +ALTER TABLE IF EXISTS tt8 ADD COLUMN f int; +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f); +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10); +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0; +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1; +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; +NOTICE: relation "tt8" does not exist, skipping +CREATE TABLE tt8(a int); +CREATE SCHEMA alter2; +ALTER TABLE IF EXISTS tt8 ADD COLUMN f int; +ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f); +ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10); +ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0; +ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1; +ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; +\d alter2.tt8 + Table "alter2.tt8" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + f1 | integer | | not null | 0 +Indexes: + "xxx" PRIMARY KEY, btree (f1) +Check constraints: + "tt8_f_check" CHECK (f1 >= 0 AND f1 <= 10) + +DROP TABLE alter2.tt8; +DROP SCHEMA alter2; +-- +-- Check conflicts between index and CHECK constraint names +-- +CREATE TABLE tt9(c integer); +ALTER TABLE tt9 ADD CHECK(c > 1); +ALTER TABLE tt9 ADD CHECK(c > 2); -- picks nonconflicting name +ALTER TABLE tt9 ADD CONSTRAINT foo CHECK(c > 3); +ALTER TABLE tt9 ADD CONSTRAINT foo CHECK(c > 4); -- fail, dup name +ERROR: constraint "foo" for relation "tt9" already exists +ALTER TABLE tt9 ADD UNIQUE(c); +ALTER TABLE tt9 ADD UNIQUE(c); -- picks nonconflicting name +ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key UNIQUE(c); -- fail, dup name +ERROR: relation "tt9_c_key" already exists +ALTER TABLE tt9 ADD CONSTRAINT foo UNIQUE(c); -- fail, dup name +ERROR: constraint "foo" for relation "tt9" already exists +ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key CHECK(c > 5); -- fail, dup name +ERROR: constraint "tt9_c_key" for relation "tt9" already exists +ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key2 CHECK(c > 6); +ALTER TABLE tt9 ADD UNIQUE(c); -- picks nonconflicting name +\d tt9 + Table "public.tt9" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c | integer | | | +Indexes: + "tt9_c_key" UNIQUE CONSTRAINT, btree (c) + "tt9_c_key1" UNIQUE CONSTRAINT, btree (c) + "tt9_c_key3" UNIQUE CONSTRAINT, btree (c) +Check constraints: + "foo" CHECK (c > 3) + "tt9_c_check" CHECK (c > 1) + "tt9_c_check1" CHECK (c > 2) + "tt9_c_key2" CHECK (c > 6) + +DROP TABLE tt9; +-- Check that comments on constraints and indexes are not lost at ALTER TABLE. +CREATE TABLE comment_test ( + id int, + positive_col int CHECK (positive_col > 0), + indexed_col int, + CONSTRAINT comment_test_pk PRIMARY KEY (id)); +CREATE INDEX comment_test_index ON comment_test(indexed_col); +COMMENT ON COLUMN comment_test.id IS 'Column ''id'' on comment_test'; +COMMENT ON INDEX comment_test_index IS 'Simple index on comment_test'; +COMMENT ON CONSTRAINT comment_test_positive_col_check ON comment_test IS 'CHECK constraint on comment_test.positive_col'; +COMMENT ON CONSTRAINT comment_test_pk ON comment_test IS 'PRIMARY KEY constraint of comment_test'; +COMMENT ON INDEX comment_test_pk IS 'Index backing the PRIMARY KEY of comment_test'; +SELECT col_description('comment_test'::regclass, 1) as comment; + comment +----------------------------- + Column 'id' on comment_test +(1 row) + +SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test'::regclass ORDER BY 1, 2; + index | comment +--------------------+----------------------------------------------- + comment_test_index | Simple index on comment_test + comment_test_pk | Index backing the PRIMARY KEY of comment_test +(2 rows) + +SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test'::regclass ORDER BY 1, 2; + constraint | comment +---------------------------------+----------------------------------------------- + comment_test_pk | PRIMARY KEY constraint of comment_test + comment_test_positive_col_check | CHECK constraint on comment_test.positive_col +(2 rows) + +-- Change the datatype of all the columns. ALTER TABLE is optimized to not +-- rebuild an index if the new data type is binary compatible with the old +-- one. Check do a dummy ALTER TABLE that doesn't change the datatype +-- first, to test that no-op codepath, and another one that does. +ALTER TABLE comment_test ALTER COLUMN indexed_col SET DATA TYPE int; +ALTER TABLE comment_test ALTER COLUMN indexed_col SET DATA TYPE text; +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE int; +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE text; +ALTER TABLE comment_test ALTER COLUMN positive_col SET DATA TYPE int; +ALTER TABLE comment_test ALTER COLUMN positive_col SET DATA TYPE bigint; +-- Check that the comments are intact. +SELECT col_description('comment_test'::regclass, 1) as comment; + comment +----------------------------- + Column 'id' on comment_test +(1 row) + +SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test'::regclass ORDER BY 1, 2; + index | comment +--------------------+----------------------------------------------- + comment_test_index | Simple index on comment_test + comment_test_pk | Index backing the PRIMARY KEY of comment_test +(2 rows) + +SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test'::regclass ORDER BY 1, 2; + constraint | comment +---------------------------------+----------------------------------------------- + comment_test_pk | PRIMARY KEY constraint of comment_test + comment_test_positive_col_check | CHECK constraint on comment_test.positive_col +(2 rows) + +-- Check compatibility for foreign keys and comments. This is done +-- separately as rebuilding the column type of the parent leads +-- to an error and would reduce the test scope. +CREATE TABLE comment_test_child ( + id text CONSTRAINT comment_test_child_fk REFERENCES comment_test); +CREATE INDEX comment_test_child_fk ON comment_test_child(id); +COMMENT ON COLUMN comment_test_child.id IS 'Column ''id'' on comment_test_child'; +COMMENT ON INDEX comment_test_child_fk IS 'Index backing the FOREIGN KEY of comment_test_child'; +COMMENT ON CONSTRAINT comment_test_child_fk ON comment_test_child IS 'FOREIGN KEY constraint of comment_test_child'; +-- Change column type of parent +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE text; +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE int USING id::integer; +ERROR: foreign key constraint "comment_test_child_fk" cannot be implemented +DETAIL: Key columns "id" and "id" are of incompatible types: text and integer. +-- Comments should be intact +SELECT col_description('comment_test_child'::regclass, 1) as comment; + comment +----------------------------------- + Column 'id' on comment_test_child +(1 row) + +SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test_child'::regclass ORDER BY 1, 2; + index | comment +-----------------------+----------------------------------------------------- + comment_test_child_fk | Index backing the FOREIGN KEY of comment_test_child +(1 row) + +SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test_child'::regclass ORDER BY 1, 2; + constraint | comment +-----------------------+---------------------------------------------- + comment_test_child_fk | FOREIGN KEY constraint of comment_test_child +(1 row) + +-- Check that we map relation oids to filenodes and back correctly. Only +-- display bad mappings so the test output doesn't change all the time. A +-- filenode function call can return NULL for a relation dropped concurrently +-- with the call's surrounding query, so ignore a NULL mapped_oid for +-- relations that no longer exist after all calls finish. +CREATE TEMP TABLE filenode_mapping AS +SELECT + oid, mapped_oid, reltablespace, relfilenode, relname +FROM pg_class, + pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid +WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid; +SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid +WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL; + oid | mapped_oid | reltablespace | relfilenode | relname +-----+------------+---------------+-------------+--------- +(0 rows) + +-- Checks on creating and manipulation of user defined relations in +-- pg_catalog. +SHOW allow_system_table_mods; + allow_system_table_mods +------------------------- + off +(1 row) + +-- disallowed because of search_path issues with pg_dump +CREATE TABLE pg_catalog.new_system_table(); +ERROR: permission denied to create "pg_catalog.new_system_table" +DETAIL: System catalog modifications are currently disallowed. +-- instead create in public first, move to catalog +CREATE TABLE new_system_table(id serial primary key, othercol text); +ALTER TABLE new_system_table SET SCHEMA pg_catalog; +ALTER TABLE new_system_table SET SCHEMA public; +ALTER TABLE new_system_table SET SCHEMA pg_catalog; +-- will be ignored -- already there: +ALTER TABLE new_system_table SET SCHEMA pg_catalog; +ALTER TABLE new_system_table RENAME TO old_system_table; +CREATE INDEX old_system_table__othercol ON old_system_table (othercol); +INSERT INTO old_system_table(othercol) VALUES ('somedata'), ('otherdata'); +UPDATE old_system_table SET id = -id; +DELETE FROM old_system_table WHERE othercol = 'somedata'; +TRUNCATE old_system_table; +ALTER TABLE old_system_table DROP CONSTRAINT new_system_table_pkey; +ALTER TABLE old_system_table DROP COLUMN othercol; +DROP TABLE old_system_table; +-- set logged +CREATE UNLOGGED TABLE unlogged1(f1 SERIAL PRIMARY KEY, f2 TEXT); +-- check relpersistence of an unlogged table +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged1' +UNION ALL +SELECT 'toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^unlogged1' +UNION ALL +SELECT 'toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^unlogged1' +ORDER BY relname; + relname | relkind | relpersistence +------------------+---------+---------------- + toast index | i | p + toast table | t | p + unlogged1 | r | p + unlogged1_f1_seq | S | p + unlogged1_pkey | i | p +(5 rows) + +CREATE UNLOGGED TABLE unlogged2(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES unlogged1); -- foreign key +CREATE UNLOGGED TABLE unlogged3(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES unlogged3); -- self-referencing foreign key +ALTER TABLE unlogged3 SET LOGGED; -- skip self-referencing foreign key +ALTER TABLE unlogged2 SET LOGGED; -- fails because a foreign key to an unlogged table exists +ALTER TABLE unlogged1 SET LOGGED; +-- check relpersistence of an unlogged table after changing to permanent +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged1' +UNION ALL +SELECT 'toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^unlogged1' +UNION ALL +SELECT 'toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^unlogged1' +ORDER BY relname; + relname | relkind | relpersistence +------------------+---------+---------------- + toast index | i | p + toast table | t | p + unlogged1 | r | p + unlogged1_f1_seq | S | p + unlogged1_pkey | i | p +(5 rows) + +ALTER TABLE unlogged1 SET LOGGED; -- silently do nothing +DROP TABLE unlogged3; +DROP TABLE unlogged2; +DROP TABLE unlogged1; +-- set unlogged +CREATE TABLE logged1(f1 SERIAL PRIMARY KEY, f2 TEXT); +-- check relpersistence of a permanent table +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^logged1' +UNION ALL +SELECT 'toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^logged1' +UNION ALL +SELECT 'toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^logged1' +ORDER BY relname; + relname | relkind | relpersistence +----------------+---------+---------------- + logged1 | r | p + logged1_f1_seq | S | p + logged1_pkey | i | p + toast index | i | p + toast table | t | p +(5 rows) + +CREATE TABLE logged2(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES logged1); -- foreign key +CREATE TABLE logged3(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES logged3); -- self-referencing foreign key +ALTER TABLE logged1 SET UNLOGGED; -- fails because a foreign key from a permanent table exists +ERROR: could not change table "logged1" to unlogged because it references logged table "logged2" +ALTER TABLE logged3 SET UNLOGGED; -- skip self-referencing foreign key +ALTER TABLE logged2 SET UNLOGGED; +ALTER TABLE logged1 SET UNLOGGED; +-- check relpersistence of a permanent table after changing to unlogged +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^logged1' +UNION ALL +SELECT 'toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^logged1' +UNION ALL +SELECT 'toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^logged1' +ORDER BY relname; + relname | relkind | relpersistence +----------------+---------+---------------- + logged1 | r | u + logged1_f1_seq | S | p + logged1_pkey | i | u + toast index | i | u + toast table | t | u +(5 rows) + +ALTER TABLE logged1 SET UNLOGGED; -- silently do nothing +DROP TABLE logged3; +DROP TABLE logged2; +DROP TABLE logged1; +-- test ADD COLUMN IF NOT EXISTS +CREATE TABLE test_add_column(c1 integer); +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN c2 integer; +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN c2 integer; -- fail because c2 already exists +ERROR: column "c2" of relation "test_add_column" already exists +ALTER TABLE ONLY test_add_column + ADD COLUMN c2 integer; -- fail because c2 already exists +ERROR: column "c2" of relation "test_add_column" already exists +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer; -- skipping because c2 already exists +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +ALTER TABLE ONLY test_add_column + ADD COLUMN IF NOT EXISTS c2 integer; -- skipping because c2 already exists +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN c2 integer, -- fail because c2 already exists + ADD COLUMN c3 integer primary key; +ERROR: column "c2" of relation "test_add_column" already exists +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists + ADD COLUMN c3 integer primary key; +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists + ADD COLUMN IF NOT EXISTS c3 integer primary key; -- skipping because c3 already exists +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +NOTICE: column "c3" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists + ADD COLUMN IF NOT EXISTS c3 integer, -- skipping because c3 already exists + ADD COLUMN c4 integer REFERENCES test_add_column; +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +NOTICE: column "c3" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c4 integer REFERENCES test_add_column; +NOTICE: column "c4" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c5 SERIAL CHECK (c5 > 8); +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------------------------------------------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | + c5 | integer | | not null | nextval('test_add_column_c5_seq'::regclass) +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Check constraints: + "test_add_column_c5_check" CHECK (c5 > 8) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c5 SERIAL CHECK (c5 > 10); +NOTICE: column "c5" of relation "test_add_column" already exists, skipping +\d test_add_column* + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------------------------------------------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | + c5 | integer | | not null | nextval('test_add_column_c5_seq'::regclass) +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Check constraints: + "test_add_column_c5_check" CHECK (c5 > 8) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + + Sequence "public.test_add_column_c5_seq" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +---------+-------+---------+------------+-----------+---------+------- + integer | 1 | 1 | 2147483647 | 1 | no | 1 +Owned by: public.test_add_column.c5 + + Index "public.test_add_column_pkey" + Column | Type | Key? | Definition +--------+---------+------+------------ + c3 | integer | yes | c3 +primary key, btree, for table "public.test_add_column" + +DROP TABLE test_add_column; +\d test_add_column* +-- assorted cases with multiple ALTER TABLE steps +CREATE TABLE ataddindex(f1 INT); +INSERT INTO ataddindex VALUES (42), (43); +CREATE UNIQUE INDEX ataddindexi0 ON ataddindex(f1); +ALTER TABLE ataddindex + ADD PRIMARY KEY USING INDEX ataddindexi0, + ALTER f1 TYPE BIGINT; +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+--------+-----------+----------+--------- + f1 | bigint | | not null | +Indexes: + "ataddindexi0" PRIMARY KEY, btree (f1) + +DROP TABLE ataddindex; +CREATE TABLE ataddindex(f1 VARCHAR(10)); +INSERT INTO ataddindex(f1) VALUES ('foo'), ('a'); +ALTER TABLE ataddindex + ALTER f1 SET DATA TYPE TEXT, + ADD EXCLUDE ((f1 LIKE 'a') WITH =); +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+------+-----------+----------+--------- + f1 | text | | | +Indexes: + "ataddindex_expr_excl" EXCLUDE USING btree ((f1 ~~ 'a'::text) WITH =) + +DROP TABLE ataddindex; +CREATE TABLE ataddindex(id int, ref_id int); +ALTER TABLE ataddindex + ADD PRIMARY KEY (id), + ADD FOREIGN KEY (ref_id) REFERENCES ataddindex; +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | not null | + ref_id | integer | | | +Indexes: + "ataddindex_pkey" PRIMARY KEY, btree (id) +Foreign-key constraints: + "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) +Referenced by: + TABLE "ataddindex" CONSTRAINT "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) + +DROP TABLE ataddindex; +CREATE TABLE ataddindex(id int, ref_id int); +ALTER TABLE ataddindex + ADD UNIQUE (id), + ADD FOREIGN KEY (ref_id) REFERENCES ataddindex (id); +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | | + ref_id | integer | | | +Indexes: + "ataddindex_id_key" UNIQUE CONSTRAINT, btree (id) +Foreign-key constraints: + "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) +Referenced by: + TABLE "ataddindex" CONSTRAINT "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) + +DROP TABLE ataddindex; +-- unsupported constraint types for partitioned tables +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE (a, (a+b+1)); +ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&); +ERROR: exclusion constraints are not supported on partitioned tables +LINE 1: ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&); + ^ +-- cannot drop column that is part of the partition key +ALTER TABLE partitioned DROP COLUMN a; +ERROR: cannot drop column "a" because it is part of the partition key of relation "partitioned" +ALTER TABLE partitioned ALTER COLUMN a TYPE char(5); +ERROR: cannot alter column "a" because it is part of the partition key of relation "partitioned" +ALTER TABLE partitioned DROP COLUMN b; +ERROR: cannot drop column "b" because it is part of the partition key of relation "partitioned" +ALTER TABLE partitioned ALTER COLUMN b TYPE char(5); +ERROR: cannot alter column "b" because it is part of the partition key of relation "partitioned" +-- partitioned table cannot participate in regular inheritance +CREATE TABLE nonpartitioned ( + a int, + b int +); +ALTER TABLE partitioned INHERIT nonpartitioned; +ERROR: cannot change inheritance of partitioned table +ALTER TABLE nonpartitioned INHERIT partitioned; +ERROR: cannot inherit from partitioned table "partitioned" +-- cannot add NO INHERIT constraint to partitioned tables +ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT; +ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned" +DROP TABLE partitioned, nonpartitioned; +-- +-- ATTACH PARTITION +-- +-- check that target table is partitioned +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part (like unparted); +ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a'); +ERROR: table "unparted" is not partitioned +DROP TABLE unparted, fail_part; +-- check that partition bound is compatible +CREATE TABLE list_parted ( + a int NOT NULL, + b char(2) COLLATE "C", + CONSTRAINT check_a CHECK (a > 0) +) PARTITION BY LIST (a); +CREATE TABLE fail_part (LIKE list_parted); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10); +ERROR: invalid bound specification for a list partition +LINE 1: ...list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) T... + ^ +DROP TABLE fail_part; +-- check that the table being attached exists +ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1); +ERROR: relation "nonexistent" does not exist +-- check ownership of the source table +CREATE ROLE regress_test_me; +CREATE ROLE regress_test_not_me; +CREATE TABLE not_owned_by_me (LIKE list_parted); +ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; +SET SESSION AUTHORIZATION regress_test_me; +CREATE TABLE owned_by_me ( + a int +) PARTITION BY LIST (a); +ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1); +ERROR: must be owner of table not_owned_by_me +RESET SESSION AUTHORIZATION; +DROP TABLE owned_by_me, not_owned_by_me; +DROP ROLE regress_test_not_me; +DROP ROLE regress_test_me; +-- check that the table being attached is not part of regular inheritance +CREATE TABLE parent (LIKE list_parted); +CREATE TABLE child () INHERITS (parent); +ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1); +ERROR: cannot attach inheritance child as partition +ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); +ERROR: cannot attach inheritance parent as partition +DROP TABLE parent CASCADE; +NOTICE: drop cascades to table child +-- check any TEMP-ness +CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a); +CREATE TABLE perm_part (a int); +ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1); +ERROR: cannot attach a permanent relation as partition of temporary relation "temp_parted" +DROP TABLE temp_parted, perm_part; +-- check that the table being attached is not a typed table +CREATE TYPE mytype AS (a int); +CREATE TABLE fail_part OF mytype; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: cannot attach a typed table as partition +DROP TYPE mytype CASCADE; +NOTICE: drop cascades to table fail_part +-- check that the table being attached has only columns present in the parent +CREATE TABLE fail_part (like list_parted, c int); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: table "fail_part" contains column "c" not found in parent "list_parted" +DETAIL: The new partition may contain only the columns present in parent. +DROP TABLE fail_part; +-- check that the table being attached has every column of the parent +CREATE TABLE fail_part (a int NOT NULL); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table is missing column "b" +DROP TABLE fail_part; +-- check that columns match in type, collation and NOT NULL status +CREATE TABLE fail_part ( + b char(3), + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different type for column "b" +ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "POSIX"; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different collation for column "b" +DROP TABLE fail_part; +-- check that the table being attached has all constraints of the parent +CREATE TABLE fail_part ( + b char(2) COLLATE "C", + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table is missing constraint "check_a" +-- check that the constraint matches in definition with parent's constraint +ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different definition for check constraint "check_a" +DROP TABLE fail_part; +-- check the attributes and constraints after partition is attached +CREATE TABLE part_1 ( + a int NOT NULL, + b char(2) COLLATE "C", + CONSTRAINT check_a CHECK (a > 0) +); +ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1); +-- attislocal and conislocal are always false for merged attributes and constraints respectively. +SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0; + attislocal | attinhcount +------------+------------- + f | 1 + f | 1 +(2 rows) + +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a'; + conislocal | coninhcount +------------+------------- + f | 1 +(1 row) + +-- check that the new partition won't overlap with an existing partition +CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: partition "fail_part" would overlap partition "part_1" +LINE 1: ...LE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); + ^ +DROP TABLE fail_part; +-- check that an existing table can be attached as a default partition +CREATE TABLE def_part (LIKE list_parted INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION def_part DEFAULT; +-- check attaching default partition fails if a default partition already +-- exists +CREATE TABLE fail_def_part (LIKE part_1 INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION fail_def_part DEFAULT; +ERROR: partition "fail_def_part" conflicts with existing default partition "def_part" +LINE 1: ...ER TABLE list_parted ATTACH PARTITION fail_def_part DEFAULT; + ^ +-- check validation when attaching list partitions +CREATE TABLE list_parted2 ( + a int, + b char +) PARTITION BY LIST (a); +-- check that violating rows are correctly reported +CREATE TABLE part_2 (LIKE list_parted2); +INSERT INTO part_2 VALUES (3, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +ERROR: partition constraint of relation "part_2" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM part_2; +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +-- check partition cannot be attached if default has some row for its values +CREATE TABLE list_parted2_def PARTITION OF list_parted2 DEFAULT; +INSERT INTO list_parted2_def VALUES (11, 'z'); +CREATE TABLE part_3 (LIKE list_parted2); +ALTER TABLE list_parted2 ATTACH PARTITION part_3 FOR VALUES IN (11); +ERROR: updated partition constraint for default partition "list_parted2_def" would be violated by some row +-- should be ok after deleting the bad row +DELETE FROM list_parted2_def WHERE a = 11; +ALTER TABLE list_parted2 ATTACH PARTITION part_3 FOR VALUES IN (11); +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part_3_4 ( + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IN (3)) +); +-- however, if a list partition does not accept nulls, there should be +-- an explicit NOT NULL constraint on the partition key column for the +-- validation scan to be skipped; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); +-- adding a NOT NULL constraint will cause the scan to be skipped +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +ALTER TABLE part_3_4 ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); +-- check if default partition scan skipped +ALTER TABLE list_parted2_def ADD CONSTRAINT check_a CHECK (a IN (5, 6)); +CREATE TABLE part_55_66 PARTITION OF list_parted2 FOR VALUES IN (55, 66); +-- check validation when attaching range partitions +CREATE TABLE range_parted ( + a int, + b int +) PARTITION BY RANGE (a, b); +-- check that violating rows are correctly reported +CREATE TABLE part1 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 1 AND b <= 10) +); +INSERT INTO part1 VALUES (1, 10); +-- Remember the TO bound is exclusive +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); +ERROR: partition constraint of relation "part1" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM part1; +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part2 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 10 AND b < 18) +); +ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20); +-- Create default partition +CREATE TABLE partr_def1 PARTITION OF range_parted DEFAULT; +-- Only one default partition is allowed, hence, following should give error +CREATE TABLE partr_def2 (LIKE part1 INCLUDING CONSTRAINTS); +ALTER TABLE range_parted ATTACH PARTITION partr_def2 DEFAULT; +ERROR: partition "partr_def2" conflicts with existing default partition "partr_def1" +LINE 1: ...LTER TABLE range_parted ATTACH PARTITION partr_def2 DEFAULT; + ^ +-- Overlapping partitions cannot be attached, hence, following should give error +INSERT INTO partr_def1 VALUES (2, 10); +CREATE TABLE part3 (LIKE range_parted); +ALTER TABLE range_parted ATTACH partition part3 FOR VALUES FROM (2, 10) TO (2, 20); +ERROR: updated partition constraint for default partition "partr_def1" would be violated by some row +-- Attaching partitions should be successful when there are no overlapping rows +ALTER TABLE range_parted ATTACH partition part3 FOR VALUES FROM (3, 10) TO (3, 20); +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE part_5 ( + LIKE list_parted2 +) PARTITION BY LIST (b); +-- check that violating rows are correctly reported +CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a'); +INSERT INTO part_5_a (a, b) VALUES (6, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +ERROR: partition constraint of relation "part_5_a" is violated by some row +-- delete the faulting row and also add a constraint to skip the scan +DELETE FROM part_5_a WHERE a NOT IN (3); +ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 5); +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +ALTER TABLE list_parted2 DETACH PARTITION part_5; +ALTER TABLE part_5 DROP CONSTRAINT check_a; +-- scan should again be skipped, even though NOT NULL is now a column property +ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +-- Check the case where attnos of the partitioning columns in the table being +-- attached differs from the parent. It should not affect the constraint- +-- checking logic that allows to skip the scan. +CREATE TABLE part_6 ( + c int, + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 6) +); +ALTER TABLE part_6 DROP c; +ALTER TABLE list_parted2 ATTACH PARTITION part_6 FOR VALUES IN (6); +-- Similar to above, but the table being attached is a partitioned table +-- whose partition has still different attnos for the root partitioning +-- columns. +CREATE TABLE part_7 ( + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7) +) PARTITION BY LIST (b); +CREATE TABLE part_7_a_null ( + c int, + d int, + e int, + LIKE list_parted2, -- 'a' will have attnum = 4 + CONSTRAINT check_b CHECK (b IS NULL OR b = 'a'), + CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7) +); +ALTER TABLE part_7_a_null DROP c, DROP d, DROP e; +ALTER TABLE part_7 ATTACH PARTITION part_7_a_null FOR VALUES IN ('a', null); +ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7); +-- Same example, but check this time that the constraint correctly detects +-- violating rows +ALTER TABLE list_parted2 DETACH PARTITION part_7; +ALTER TABLE part_7 DROP CONSTRAINT check_a; -- thusly, scan won't be skipped +INSERT INTO part_7 (a, b) VALUES (8, null), (9, 'a'); +SELECT tableoid::regclass, a, b FROM part_7 order by a; + tableoid | a | b +---------------+---+--- + part_7_a_null | 8 | + part_7_a_null | 9 | a +(2 rows) + +ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7); +ERROR: partition constraint of relation "part_7_a_null" is violated by some row +-- check that leaf partitions of default partition are scanned when +-- attaching a partitioned table. +ALTER TABLE part_5 DROP CONSTRAINT check_a; +CREATE TABLE part5_def PARTITION OF part_5 DEFAULT PARTITION BY LIST(a); +CREATE TABLE part5_def_p1 PARTITION OF part5_def FOR VALUES IN (5); +INSERT INTO part5_def_p1 VALUES (5, 'y'); +CREATE TABLE part5_p1 (LIKE part_5); +ALTER TABLE part_5 ATTACH PARTITION part5_p1 FOR VALUES IN ('y'); +ERROR: updated partition constraint for default partition "part5_def_p1" would be violated by some row +-- should be ok after deleting the bad row +DELETE FROM part5_def_p1 WHERE b = 'y'; +ALTER TABLE part_5 ATTACH PARTITION part5_p1 FOR VALUES IN ('y'); +-- check that the table being attached is not already a partition +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +ERROR: "part_2" is already a partition +-- check that circular inheritance is not allowed +ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b'); +ERROR: circular inheritance not allowed +DETAIL: "part_5" is already a child of "list_parted2". +ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0); +ERROR: circular inheritance not allowed +DETAIL: "list_parted2" is already a child of "list_parted2". +-- If a partitioned table being created or an existing table being attached +-- as a partition does not have a constraint that would allow validation scan +-- to be skipped, but an individual partition does, then the partition's +-- validation scan is skipped. +CREATE TABLE quuux (a int, b text) PARTITION BY LIST (a); +CREATE TABLE quuux_default PARTITION OF quuux DEFAULT PARTITION BY LIST (b); +CREATE TABLE quuux_default1 PARTITION OF quuux_default ( + CONSTRAINT check_1 CHECK (a IS NOT NULL AND a = 1) +) FOR VALUES IN ('b'); +CREATE TABLE quuux1 (a int, b text); +ALTER TABLE quuux ATTACH PARTITION quuux1 FOR VALUES IN (1); -- validate! +CREATE TABLE quuux2 (a int, b text); +ALTER TABLE quuux ATTACH PARTITION quuux2 FOR VALUES IN (2); -- skip validation +DROP TABLE quuux1, quuux2; +-- should validate for quuux1, but not for quuux2 +CREATE TABLE quuux1 PARTITION OF quuux FOR VALUES IN (1); +CREATE TABLE quuux2 PARTITION OF quuux FOR VALUES IN (2); +DROP TABLE quuux; +-- check validation when attaching hash partitions +-- Use hand-rolled hash functions and operator class to get predictable result +-- on different machines. part_test_int4_ops is defined in insert.sql. +-- check that the new partition won't overlap with an existing partition +CREATE TABLE hash_parted ( + a int, + b int +) PARTITION BY HASH (a part_test_int4_ops); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 4, REMAINDER 0); +CREATE TABLE fail_part (LIKE hpart_1); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 4); +ERROR: partition "fail_part" would overlap partition "hpart_1" +LINE 1: ...hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODU... + ^ +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 0); +ERROR: partition "fail_part" would overlap partition "hpart_1" +LINE 1: ...hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODU... + ^ +DROP TABLE fail_part; +-- check validation when attaching hash partitions +-- check that violating rows are correctly reported +CREATE TABLE hpart_2 (LIKE hash_parted); +INSERT INTO hpart_2 VALUES (3, 0); +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); +ERROR: partition constraint of relation "hpart_2" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM hpart_2; +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE hpart_5 ( + LIKE hash_parted +) PARTITION BY LIST (b); +-- check that violating rows are correctly reported +CREATE TABLE hpart_5_a PARTITION OF hpart_5 FOR VALUES IN ('1', '2', '3'); +INSERT INTO hpart_5_a (a, b) VALUES (7, 1); +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +ERROR: partition constraint of relation "hpart_5_a" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM hpart_5_a; +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +-- check that the table being attach is with valid modulus and remainder value +CREATE TABLE fail_part(LIKE hash_parted); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 0, REMAINDER 1); +ERROR: modulus for hash partition must be a positive integer +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 8); +ERROR: remainder for hash partition must be less than modulus +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 3, REMAINDER 2); +ERROR: every hash partition modulus must be a factor of the next larger modulus +DETAIL: The new modulus 3 is not a factor of 4, the modulus of existing partition "hpart_1". +DROP TABLE fail_part; +-- +-- DETACH PARTITION +-- +-- check that the table is partitioned at all +CREATE TABLE regular_table (a int); +ALTER TABLE regular_table DETACH PARTITION any_name; +ERROR: table "regular_table" is not partitioned +DROP TABLE regular_table; +-- check that the partition being detached exists at all +ALTER TABLE list_parted2 DETACH PARTITION part_4; +ERROR: relation "part_4" does not exist +ALTER TABLE hash_parted DETACH PARTITION hpart_4; +ERROR: relation "hpart_4" does not exist +-- check that the partition being detached is actually a partition of the parent +CREATE TABLE not_a_part (a int); +ALTER TABLE list_parted2 DETACH PARTITION not_a_part; +ERROR: relation "not_a_part" is not a partition of relation "list_parted2" +ALTER TABLE list_parted2 DETACH PARTITION part_1; +ERROR: relation "part_1" is not a partition of relation "list_parted2" +ALTER TABLE hash_parted DETACH PARTITION not_a_part; +ERROR: relation "not_a_part" is not a partition of relation "hash_parted" +DROP TABLE not_a_part; +-- check that, after being detached, attinhcount/coninhcount is dropped to 0 and +-- attislocal/conislocal is set to true +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0; + attinhcount | attislocal +-------------+------------ + 0 | t + 0 | t +(2 rows) + +SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a'; + coninhcount | conislocal +-------------+------------ + 0 | t +(1 row) + +DROP TABLE part_3_4; +-- check that a detached partition is not dropped on dropping a partitioned table +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE(a); +CREATE TABLE part_rp PARTITION OF range_parted2 FOR VALUES FROM (0) to (100); +ALTER TABLE range_parted2 DETACH PARTITION part_rp; +DROP TABLE range_parted2; +SELECT * from part_rp; + a +--- +(0 rows) + +DROP TABLE part_rp; +-- concurrent detach +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE(a); +CREATE TABLE part_rp PARTITION OF range_parted2 FOR VALUES FROM (0) to (100); +BEGIN; +-- doesn't work in a partition block +ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; +ERROR: ALTER TABLE ... DETACH CONCURRENTLY cannot run inside a transaction block +COMMIT; +CREATE TABLE part_rpd PARTITION OF range_parted2 DEFAULT; +-- doesn't work if there's a default partition +ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; +ERROR: cannot detach partitions concurrently when a default partition exists +-- doesn't work for the default partition +ALTER TABLE range_parted2 DETACH PARTITION part_rpd CONCURRENTLY; +ERROR: cannot detach partitions concurrently when a default partition exists +DROP TABLE part_rpd; +-- works fine +ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; +\d+ range_parted2 + Partitioned table "public.range_parted2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | +Partition key: RANGE (a) +Number of partitions: 0 + +-- constraint should be created +\d part_rp + Table "public.part_rp" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Check constraints: + "part_rp_a_check" CHECK (a IS NOT NULL AND a >= 0 AND a < 100) + +CREATE TABLE part_rp100 PARTITION OF range_parted2 (CHECK (a>=123 AND a<133 AND a IS NOT NULL)) FOR VALUES FROM (100) to (200); +ALTER TABLE range_parted2 DETACH PARTITION part_rp100 CONCURRENTLY; +-- redundant constraint should not be created +\d part_rp100 + Table "public.part_rp100" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Check constraints: + "part_rp100_a_check" CHECK (a >= 123 AND a < 133 AND a IS NOT NULL) + +DROP TABLE range_parted2; +-- Check ALTER TABLE commands for partitioned tables and partitions +-- cannot add/drop column to/from *only* the parent +ALTER TABLE ONLY list_parted2 ADD COLUMN c int; +ERROR: column must be added to child tables too +ALTER TABLE ONLY list_parted2 DROP COLUMN b; +ERROR: cannot drop column from only the partitioned table when partitions exist +HINT: Do not specify the ONLY keyword. +-- cannot add a column to partition or drop an inherited one +ALTER TABLE part_2 ADD COLUMN c text; +ERROR: cannot add column to a partition +ALTER TABLE part_2 DROP COLUMN b; +ERROR: cannot drop inherited column "b" +-- Nor rename, alter type +ALTER TABLE part_2 RENAME COLUMN b to c; +ERROR: cannot rename inherited column "b" +ALTER TABLE part_2 ALTER COLUMN b TYPE text; +ERROR: cannot alter inherited column "b" +-- cannot add/drop NOT NULL or check constraints to *only* the parent, when +-- partitions exist +ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL; +ERROR: constraint must be added to child tables too +DETAIL: Column "b" of relation "part_2" is not already NOT NULL. +HINT: Do not specify the ONLY keyword. +ALTER TABLE ONLY list_parted2 ADD CONSTRAINT check_b CHECK (b <> 'zz'); +ERROR: constraint must be added to child tables too +ALTER TABLE list_parted2 ALTER b SET NOT NULL; +ALTER TABLE ONLY list_parted2 ALTER b DROP NOT NULL; +ERROR: cannot remove constraint from only the partitioned table when partitions exist +HINT: Do not specify the ONLY keyword. +ALTER TABLE list_parted2 ADD CONSTRAINT check_b CHECK (b <> 'zz'); +ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_b; +ERROR: cannot remove constraint from only the partitioned table when partitions exist +HINT: Do not specify the ONLY keyword. +-- It's alright though, if no partitions are yet created +CREATE TABLE parted_no_parts (a int) PARTITION BY LIST (a); +ALTER TABLE ONLY parted_no_parts ALTER a SET NOT NULL; +ALTER TABLE ONLY parted_no_parts ADD CONSTRAINT check_a CHECK (a > 0); +ALTER TABLE ONLY parted_no_parts ALTER a DROP NOT NULL; +ALTER TABLE ONLY parted_no_parts DROP CONSTRAINT check_a; +DROP TABLE parted_no_parts; +-- cannot drop inherited NOT NULL or check constraints from partition +ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0); +ALTER TABLE part_2 ALTER b DROP NOT NULL; +ERROR: column "b" is marked NOT NULL in parent table +ALTER TABLE part_2 DROP CONSTRAINT check_a2; +ERROR: cannot drop inherited constraint "check_a2" of relation "part_2" +-- Doesn't make sense to add NO INHERIT constraints on partitioned tables +ALTER TABLE list_parted2 add constraint check_b2 check (b <> 'zz') NO INHERIT; +ERROR: cannot add NO INHERIT constraint to partitioned table "list_parted2" +-- check that a partition cannot participate in regular inheritance +CREATE TABLE inh_test () INHERITS (part_2); +ERROR: cannot inherit from partition "part_2" +CREATE TABLE inh_test (LIKE part_2); +ALTER TABLE inh_test INHERIT part_2; +ERROR: cannot inherit from a partition +ALTER TABLE part_2 INHERIT inh_test; +ERROR: cannot change inheritance of a partition +-- cannot drop or alter type of partition key columns of lower level +-- partitioned tables; for example, part_5, which is list_parted2's +-- partition, is partitioned on b; +ALTER TABLE list_parted2 DROP COLUMN b; +ERROR: cannot drop column "b" because it is part of the partition key of relation "part_5" +ALTER TABLE list_parted2 ALTER COLUMN b TYPE text; +ERROR: cannot alter column "b" because it is part of the partition key of relation "part_5" +-- dropping non-partition key columns should be allowed on the parent table. +ALTER TABLE list_parted DROP COLUMN b; +SELECT * FROM list_parted; + a +--- +(0 rows) + +-- cleanup +DROP TABLE list_parted, list_parted2, range_parted; +DROP TABLE fail_def_part; +DROP TABLE hash_parted; +-- more tests for certain multi-level partitioning scenarios +create table p (a int, b int) partition by range (a, b); +create table p1 (b int, a int not null) partition by range (b); +create table p11 (like p1); +alter table p11 drop a; +alter table p11 add a int; +alter table p11 drop a; +alter table p11 add a int not null; +-- attnum for key attribute 'a' is different in p, p1, and p11 +select attrelid::regclass, attname, attnum +from pg_attribute +where attname = 'a' + and (attrelid = 'p'::regclass + or attrelid = 'p1'::regclass + or attrelid = 'p11'::regclass) +order by attrelid::regclass::text; + attrelid | attname | attnum +----------+---------+-------- + p | a | 1 + p1 | a | 2 + p11 | a | 4 +(3 rows) + +alter table p1 attach partition p11 for values from (2) to (5); +insert into p1 (a, b) values (2, 3); +-- check that partition validation scan correctly detects violating rows +alter table p attach partition p1 for values from (1, 2) to (1, 10); +ERROR: partition constraint of relation "p11" is violated by some row +-- cleanup +drop table p; +drop table p1; +-- validate constraint on partitioned tables should only scan leaf partitions +create table parted_validate_test (a int) partition by list (a); +create table parted_validate_test_1 partition of parted_validate_test for values in (0, 1); +alter table parted_validate_test add constraint parted_validate_test_chka check (a > 0) not valid; +alter table parted_validate_test validate constraint parted_validate_test_chka; +drop table parted_validate_test; +-- test alter column options +CREATE TABLE attmp(i integer); +INSERT INTO attmp VALUES (1); +ALTER TABLE attmp ALTER COLUMN i SET (n_distinct = 1, n_distinct_inherited = 2); +ALTER TABLE attmp ALTER COLUMN i RESET (n_distinct_inherited); +ANALYZE attmp; +DROP TABLE attmp; +DROP USER regress_alter_table_user1; +-- check that violating rows are correctly reported when attaching as the +-- default partition +create table defpart_attach_test (a int) partition by list (a); +create table defpart_attach_test1 partition of defpart_attach_test for values in (1); +create table defpart_attach_test_d (b int, a int); +alter table defpart_attach_test_d drop b; +insert into defpart_attach_test_d values (1), (2); +-- error because its constraint as the default partition would be violated +-- by the row containing 1 +alter table defpart_attach_test attach partition defpart_attach_test_d default; +ERROR: partition constraint of relation "defpart_attach_test_d" is violated by some row +delete from defpart_attach_test_d where a = 1; +alter table defpart_attach_test_d add check (a > 1); +-- should be attached successfully and without needing to be scanned +alter table defpart_attach_test attach partition defpart_attach_test_d default; +-- check that attaching a partition correctly reports any rows in the default +-- partition that should not be there for the new partition to be attached +-- successfully +create table defpart_attach_test_2 (like defpart_attach_test_d); +alter table defpart_attach_test attach partition defpart_attach_test_2 for values in (2); +ERROR: updated partition constraint for default partition "defpart_attach_test_d" would be violated by some row +drop table defpart_attach_test; +-- check combinations of temporary and permanent relations when attaching +-- partitions. +create table perm_part_parent (a int) partition by list (a); +create temp table temp_part_parent (a int) partition by list (a); +create table perm_part_child (a int); +create temp table temp_part_child (a int); +alter table temp_part_parent attach partition perm_part_child default; -- error +ERROR: cannot attach a permanent relation as partition of temporary relation "temp_part_parent" +alter table perm_part_parent attach partition temp_part_child default; -- error +ERROR: cannot attach a temporary relation as partition of permanent relation "perm_part_parent" +alter table temp_part_parent attach partition temp_part_child default; -- ok +drop table perm_part_parent cascade; +drop table temp_part_parent cascade; +-- check that attaching partitions to a table while it is being used is +-- prevented +create table tab_part_attach (a int) partition by list (a); +create or replace function func_part_attach() returns trigger + language plpgsql as $$ + begin + execute 'create table tab_part_attach_1 (a int)'; + execute 'alter table tab_part_attach attach partition tab_part_attach_1 for values in (1)'; + return null; + end $$; +create trigger trig_part_attach before insert on tab_part_attach + for each statement execute procedure func_part_attach(); +insert into tab_part_attach values (1); +ERROR: cannot ALTER TABLE "tab_part_attach" because it is being used by active queries in this session +CONTEXT: SQL statement "alter table tab_part_attach attach partition tab_part_attach_1 for values in (1)" +PL/pgSQL function func_part_attach() line 4 at EXECUTE +drop table tab_part_attach; +drop function func_part_attach(); +-- test case where the partitioning operator is a SQL function whose +-- evaluation results in the table's relcache being rebuilt partway through +-- the execution of an ATTACH PARTITION command +create function at_test_sql_partop (int4, int4) returns int language sql +as $$ select case when $1 = $2 then 0 when $1 > $2 then 1 else -1 end; $$; +create operator class at_test_sql_partop for type int4 using btree as + operator 1 < (int4, int4), operator 2 <= (int4, int4), + operator 3 = (int4, int4), operator 4 >= (int4, int4), + operator 5 > (int4, int4), function 1 at_test_sql_partop(int4, int4); +create table at_test_sql_partop (a int) partition by range (a at_test_sql_partop); +create table at_test_sql_partop_1 (a int); +alter table at_test_sql_partop attach partition at_test_sql_partop_1 for values from (0) to (10); +drop table at_test_sql_partop; +drop operator class at_test_sql_partop using btree; +drop function at_test_sql_partop; +/* Test case for bug #16242 */ +-- We create a parent and child where the child has missing +-- non-null attribute values, and arrange to pass them through +-- tuple conversion from the child to the parent tupdesc +create table bar1 (a integer, b integer not null default 1) + partition by range (a); +create table bar2 (a integer); +insert into bar2 values (1); +alter table bar2 add column b integer not null default 1; +-- (at this point bar2 contains tuple with natts=1) +alter table bar1 attach partition bar2 default; +-- this works: +select * from bar1; + a | b +---+--- + 1 | 1 +(1 row) + +-- this exercises tuple conversion: +create function xtrig() + returns trigger language plpgsql +as $$ + declare + r record; + begin + for r in select * from old loop + raise info 'a=%, b=%', r.a, r.b; + end loop; + return NULL; + end; +$$; +create trigger xtrig + after update on bar1 + referencing old table as old + for each statement execute procedure xtrig(); +update bar1 set a = a + 1; +INFO: a=1, b=1 +/* End test case for bug #16242 */ +-- Test that ALTER TABLE rewrite preserves a clustered index +-- for normal indexes and indexes on constraints. +create table alttype_cluster (a int); +alter table alttype_cluster add primary key (a); +create index alttype_cluster_ind on alttype_cluster (a); +alter table alttype_cluster cluster on alttype_cluster_ind; +-- Normal index remains clustered. +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | t + alttype_cluster_pkey | f +(2 rows) + +alter table alttype_cluster alter a type bigint; +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | t + alttype_cluster_pkey | f +(2 rows) + +-- Constraint index remains clustered. +alter table alttype_cluster cluster on alttype_cluster_pkey; +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | f + alttype_cluster_pkey | t +(2 rows) + +alter table alttype_cluster alter a type int; +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | f + alttype_cluster_pkey | t +(2 rows) + +drop table alttype_cluster; diff --git a/src/test/regress/expected/create_table_1.out b/src/test/regress/expected/create_table_1.out new file mode 100644 index 00000000000..4ec5f297a34 --- /dev/null +++ b/src/test/regress/expected/create_table_1.out @@ -0,0 +1,1315 @@ +-- +-- CREATE_TABLE +-- +-- +-- CLASS DEFINITIONS +-- +CREATE TABLE hobbies_r ( + name text, + person text +); +CREATE TABLE equipment_r ( + name text, + hobby text +); +CREATE TABLE onek ( + unique1 int4, + unique2 int4, + two int4, + four int4, + ten int4, + twenty int4, + hundred int4, + thousand int4, + twothousand int4, + fivethous int4, + tenthous int4, + odd int4, + even int4, + stringu1 name, + stringu2 name, + string4 name +); +CREATE TABLE tenk1 ( + unique1 int4, + unique2 int4, + two int4, + four int4, + ten int4, + twenty int4, + hundred int4, + thousand int4, + twothousand int4, + fivethous int4, + tenthous int4, + odd int4, + even int4, + stringu1 name, + stringu2 name, + string4 name +); +CREATE TABLE tenk2 ( + unique1 int4, + unique2 int4, + two int4, + four int4, + ten int4, + twenty int4, + hundred int4, + thousand int4, + twothousand int4, + fivethous int4, + tenthous int4, + odd int4, + even int4, + stringu1 name, + stringu2 name, + string4 name +); +CREATE TABLE person ( + name text, + age int4, + location point +); +CREATE TABLE emp ( + salary int4, + manager name +) INHERITS (person); +CREATE TABLE student ( + gpa float8 +) INHERITS (person); +CREATE TABLE stud_emp ( + percent int4 +) INHERITS (emp, student); +NOTICE: merging multiple inherited definitions of column "name" +NOTICE: merging multiple inherited definitions of column "age" +NOTICE: merging multiple inherited definitions of column "location" +CREATE TABLE city ( + name name, + location box, + budget city_budget +); +CREATE TABLE dept ( + dname name, + mgrname text +); +CREATE TABLE slow_emp4000 ( + home_base box +); +CREATE TABLE fast_emp4000 ( + home_base box +); +CREATE TABLE road ( + name text, + thepath path +); +CREATE TABLE ihighway () INHERITS (road); +CREATE TABLE shighway ( + surface text +) INHERITS (road); +CREATE TABLE real_city ( + pop int4, + cname text, + outline path +); +-- +-- test the "star" operators a bit more thoroughly -- this time, +-- throw in lots of NULL fields... +-- +-- a is the type root +-- b and c inherit from a (one-level single inheritance) +-- d inherits from b and c (two-level multiple inheritance) +-- e inherits from c (two-level single inheritance) +-- f inherits from e (three-level single inheritance) +-- +CREATE TABLE a_star ( + class char, + a int4 +); +CREATE TABLE b_star ( + b text +) INHERITS (a_star); +CREATE TABLE c_star ( + c name +) INHERITS (a_star); +CREATE TABLE d_star ( + d float8 +) INHERITS (b_star, c_star); +NOTICE: merging multiple inherited definitions of column "class" +NOTICE: merging multiple inherited definitions of column "a" +CREATE TABLE e_star ( + e int2 +) INHERITS (c_star); +CREATE TABLE f_star ( + f polygon +) INHERITS (e_star); +CREATE TABLE aggtest ( + a int2, + b float4 +); +CREATE TABLE hash_i4_heap ( + seqno int4, + random int4 +); +CREATE TABLE hash_name_heap ( + seqno int4, + random name +); +CREATE TABLE hash_txt_heap ( + seqno int4, + random text +); +CREATE TABLE hash_f8_heap ( + seqno int4, + random float8 +); +-- don't include the hash_ovfl_heap stuff in the distribution +-- the data set is too large for what it's worth +-- +-- CREATE TABLE hash_ovfl_heap ( +-- x int4, +-- y int4 +-- ); +CREATE TABLE bt_i4_heap ( + seqno int4, + random int4 +); +CREATE TABLE bt_name_heap ( + seqno name, + random int4 +); +CREATE TABLE bt_txt_heap ( + seqno text, + random int4 +); +CREATE TABLE bt_f8_heap ( + seqno float8, + random int4 +); +CREATE TABLE array_op_test ( + seqno int4, + i int4[], + t text[] +); +CREATE TABLE array_index_op_test ( + seqno int4, + i int4[], + t text[] +); +CREATE TABLE testjsonb ( + j jsonb +); +CREATE TABLE unknowntab ( + u unknown -- fail +); +ERROR: column "u" has pseudo-type unknown +CREATE TYPE unknown_comptype AS ( + u unknown -- fail +); +ERROR: column "u" has pseudo-type unknown +CREATE TABLE IF NOT EXISTS test_tsvector( + t text, + a tsvector +); +CREATE TABLE IF NOT EXISTS test_tsvector( + t text +); +NOTICE: relation "test_tsvector" already exists, skipping +-- invalid: non-lowercase quoted reloptions identifiers +CREATE TABLE tas_case WITH ("Fillfactor" = 10) AS SELECT 1 a; +ERROR: unrecognized parameter "Fillfactor" +CREATE UNLOGGED TABLE unlogged1 (a int primary key); -- OK +CREATE TEMPORARY TABLE unlogged2 (a int primary key); -- OK +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged\d' ORDER BY relname; + relname | relkind | relpersistence +----------------+---------+---------------- + unlogged1 | r | p + unlogged1_pkey | i | p + unlogged2 | r | t + unlogged2_pkey | i | t +(4 rows) + +REINDEX INDEX unlogged1_pkey; +REINDEX INDEX unlogged2_pkey; +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged\d' ORDER BY relname; + relname | relkind | relpersistence +----------------+---------+---------------- + unlogged1 | r | p + unlogged1_pkey | i | p + unlogged2 | r | t + unlogged2_pkey | i | t +(4 rows) + +DROP TABLE unlogged2; +INSERT INTO unlogged1 VALUES (42); +CREATE UNLOGGED TABLE public.unlogged2 (a int primary key); -- also OK +CREATE UNLOGGED TABLE pg_temp.unlogged3 (a int primary key); -- not OK +ERROR: only temporary relations may be created in temporary schemas +LINE 1: CREATE UNLOGGED TABLE pg_temp.unlogged3 (a int primary key); + ^ +CREATE TABLE pg_temp.implicitly_temp (a int primary key); -- OK +CREATE TEMP TABLE explicitly_temp (a int primary key); -- also OK +CREATE TEMP TABLE pg_temp.doubly_temp (a int primary key); -- also OK +CREATE TEMP TABLE public.temp_to_perm (a int primary key); -- not OK +ERROR: cannot create temporary relation in non-temporary schema +LINE 1: CREATE TEMP TABLE public.temp_to_perm (a int primary key); + ^ +DROP TABLE unlogged1, public.unlogged2; +CREATE TABLE as_select1 AS SELECT * FROM pg_class WHERE relkind = 'r'; +CREATE TABLE as_select1 AS SELECT * FROM pg_class WHERE relkind = 'r'; +ERROR: relation "as_select1" already exists +CREATE TABLE IF NOT EXISTS as_select1 AS SELECT * FROM pg_class WHERE relkind = 'r'; +NOTICE: relation "as_select1" already exists, skipping +DROP TABLE as_select1; +PREPARE select1 AS SELECT 1 as a; +CREATE TABLE as_select1 AS EXECUTE select1; +CREATE TABLE as_select1 AS EXECUTE select1; +ERROR: relation "as_select1" already exists +SELECT * FROM as_select1; + a +--- + 1 +(1 row) + +CREATE TABLE IF NOT EXISTS as_select1 AS EXECUTE select1; +NOTICE: relation "as_select1" already exists, skipping +DROP TABLE as_select1; +DEALLOCATE select1; +-- create an extra wide table to test for issues related to that +-- (temporarily hide query, to avoid the long CREATE TABLE stmt) +\set ECHO none +INSERT INTO extra_wide_table(firstc, lastc) VALUES('first col', 'last col'); +SELECT firstc, lastc FROM extra_wide_table; + firstc | lastc +-----------+---------- + first col | last col +(1 row) + +-- check that tables with oids cannot be created anymore +CREATE TABLE withoid() WITH OIDS; +ERROR: syntax error at or near "OIDS" +LINE 1: CREATE TABLE withoid() WITH OIDS; + ^ +CREATE TABLE withoid() WITH (oids); +ERROR: tables declared WITH OIDS are not supported +CREATE TABLE withoid() WITH (oids = true); +ERROR: tables declared WITH OIDS are not supported +-- but explicitly not adding oids is still supported +CREATE TEMP TABLE withoutoid() WITHOUT OIDS; DROP TABLE withoutoid; +CREATE TEMP TABLE withoutoid() WITH (oids = false); DROP TABLE withoutoid; +-- check restriction with default expressions +-- invalid use of column reference in default expressions +CREATE TABLE default_expr_column (id int DEFAULT (id)); +ERROR: cannot use column reference in DEFAULT expression +LINE 1: CREATE TABLE default_expr_column (id int DEFAULT (id)); + ^ +CREATE TABLE default_expr_column (id int DEFAULT (bar.id)); +ERROR: cannot use column reference in DEFAULT expression +LINE 1: CREATE TABLE default_expr_column (id int DEFAULT (bar.id)); + ^ +CREATE TABLE default_expr_agg_column (id int DEFAULT (avg(id))); +ERROR: cannot use column reference in DEFAULT expression +LINE 1: ...TE TABLE default_expr_agg_column (id int DEFAULT (avg(id))); + ^ +-- invalid column definition +CREATE TABLE default_expr_non_column (a int DEFAULT (avg(non_existent))); +ERROR: cannot use column reference in DEFAULT expression +LINE 1: ...TABLE default_expr_non_column (a int DEFAULT (avg(non_existe... + ^ +-- invalid use of aggregate +CREATE TABLE default_expr_agg (a int DEFAULT (avg(1))); +ERROR: aggregate functions are not allowed in DEFAULT expressions +LINE 1: CREATE TABLE default_expr_agg (a int DEFAULT (avg(1))); + ^ +-- invalid use of subquery +CREATE TABLE default_expr_agg (a int DEFAULT (select 1)); +ERROR: cannot use subquery in DEFAULT expression +LINE 1: CREATE TABLE default_expr_agg (a int DEFAULT (select 1)); + ^ +-- invalid use of set-returning function +CREATE TABLE default_expr_agg (a int DEFAULT (generate_series(1,3))); +ERROR: set-returning functions are not allowed in DEFAULT expressions +LINE 1: CREATE TABLE default_expr_agg (a int DEFAULT (generate_serie... + ^ +-- Verify that subtransaction rollback restores rd_createSubid. +BEGIN; +CREATE TABLE remember_create_subid (c int); +SAVEPOINT q; DROP TABLE remember_create_subid; ROLLBACK TO q; +COMMIT; +DROP TABLE remember_create_subid; +-- Verify that subtransaction rollback restores rd_firstRelfilenodeSubid. +CREATE TABLE remember_node_subid (c int); +BEGIN; +ALTER TABLE remember_node_subid ALTER c TYPE bigint; +SAVEPOINT q; DROP TABLE remember_node_subid; ROLLBACK TO q; +COMMIT; +DROP TABLE remember_node_subid; +-- +-- Partitioned tables +-- +-- cannot combine INHERITS and PARTITION BY (although grammar allows) +CREATE TABLE partitioned ( + a int +) INHERITS (some_table) PARTITION BY LIST (a); +ERROR: cannot create partitioned table as inheritance child +-- cannot use more than 1 column as partition key for list partitioned table +CREATE TABLE partitioned ( + a1 int, + a2 int +) PARTITION BY LIST (a1, a2); -- fail +ERROR: cannot use "list" partition strategy with more than one column +-- unsupported constraint type for partitioned tables +CREATE TABLE partitioned ( + a int, + EXCLUDE USING gist (a WITH &&) +) PARTITION BY RANGE (a); +ERROR: exclusion constraints are not supported on partitioned tables +LINE 3: EXCLUDE USING gist (a WITH &&) + ^ +-- prevent using prohibited expressions in the key +CREATE FUNCTION retset (a int) RETURNS SETOF int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (retset(a)); +ERROR: set-returning functions are not allowed in partition key expressions +DROP FUNCTION retset(int); +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE ((avg(a))); +ERROR: aggregate functions are not allowed in partition key expressions +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE ((avg(a) OVER (PARTITION BY b))); +ERROR: window functions are not allowed in partition key expressions +CREATE TABLE partitioned ( + a int +) PARTITION BY LIST ((a LIKE (SELECT 1))); +ERROR: cannot use subquery in partition key expression +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE ((42)); +ERROR: cannot use constant expression as partition key +CREATE FUNCTION const_func () RETURNS int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (const_func()); +ERROR: cannot use constant expression as partition key +DROP FUNCTION const_func(); +-- only accept valid partitioning strategy +CREATE TABLE partitioned ( + a int +) PARTITION BY MAGIC (a); +ERROR: unrecognized partitioning strategy "magic" +-- specified column must be present in the table +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (b); +ERROR: column "b" named in partition key does not exist +LINE 3: ) PARTITION BY RANGE (b); + ^ +-- cannot use system columns in partition key +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (xmin); +ERROR: cannot use system column "xmin" in partition key +LINE 3: ) PARTITION BY RANGE (xmin); + ^ +-- cannot use pseudotypes +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE (((a, b))); +ERROR: partition key column 1 has pseudo-type record +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE (a, ('unknown')); +ERROR: partition key column 2 has pseudo-type unknown +-- functions in key must be immutable +CREATE FUNCTION immut_func (a int) RETURNS int AS $$ SELECT a + random()::int; $$ LANGUAGE SQL; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (immut_func(a)); +ERROR: functions in partition key expression must be marked IMMUTABLE +DROP FUNCTION immut_func(int); +-- prevent using columns of unsupported types in key (type must have a btree operator class) +CREATE TABLE partitioned ( + a point +) PARTITION BY LIST (a); +ERROR: data type point has no default operator class for access method "btree" +HINT: You must specify a btree operator class or define a default btree operator class for the data type. +CREATE TABLE partitioned ( + a point +) PARTITION BY LIST (a point_ops); +ERROR: operator class "point_ops" does not exist for access method "btree" +CREATE TABLE partitioned ( + a point +) PARTITION BY RANGE (a); +ERROR: data type point has no default operator class for access method "btree" +HINT: You must specify a btree operator class or define a default btree operator class for the data type. +CREATE TABLE partitioned ( + a point +) PARTITION BY RANGE (a point_ops); +ERROR: operator class "point_ops" does not exist for access method "btree" +-- cannot add NO INHERIT constraints to partitioned tables +CREATE TABLE partitioned ( + a int, + CONSTRAINT check_a CHECK (a > 0) NO INHERIT +) PARTITION BY RANGE (a); +ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned" +-- some checks after successful creation of a partitioned table +CREATE FUNCTION plusone(a int) RETURNS INT AS $$ SELECT a+1; $$ LANGUAGE SQL; +CREATE TABLE partitioned ( + a int, + b int, + c text, + d text +) PARTITION BY RANGE (a oid_ops, plusone(b), c collate "default", d collate "C"); +-- check relkind +SELECT relkind FROM pg_class WHERE relname = 'partitioned'; + relkind +--------- + p +(1 row) + +-- prevent a function referenced in partition key from being dropped +DROP FUNCTION plusone(int); +ERROR: cannot drop function plusone(integer) because other objects depend on it +DETAIL: table partitioned depends on function plusone(integer) +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- partitioned table cannot participate in regular inheritance +CREATE TABLE partitioned2 ( + a int, + b text +) PARTITION BY RANGE ((a+1), substr(b, 1, 5)); +CREATE TABLE fail () INHERITS (partitioned2); +ERROR: cannot inherit from partitioned table "partitioned2" +-- Partition key in describe output +\d partitioned + Partitioned table "public.partitioned" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | + d | text | | | +Partition key: RANGE (a oid_ops, plusone(b), c, d COLLATE "C") +Number of partitions: 0 + +\d+ partitioned2 + Partitioned table "public.partitioned2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + a | integer | | | | plain | | + b | text | | | | extended | | +Partition key: RANGE (((a + 1)), substr(b, 1, 5)) +Number of partitions: 0 + +INSERT INTO partitioned2 VALUES (1, 'hello'); +ERROR: no partition of relation "partitioned2" found for row +DETAIL: Partition key of the failing row contains ((a + 1), substr(b, 1, 5)) = (2, hello). +CREATE TABLE part2_1 PARTITION OF partitioned2 FOR VALUES FROM (-1, 'aaaaa') TO (100, 'ccccc'); +\d+ part2_1 + Table "public.part2_1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + a | integer | | | | plain | | + b | text | | | | extended | | +Partition of: partitioned2 FOR VALUES FROM ('-1', 'aaaaa') TO (100, 'ccccc') +Partition constraint: (((a + 1) IS NOT NULL) AND (substr(b, 1, 5) IS NOT NULL) AND (((a + 1) > '-1'::integer) OR (((a + 1) = '-1'::integer) AND (substr(b, 1, 5) >= 'aaaaa'::text))) AND (((a + 1) < 100) OR (((a + 1) = 100) AND (substr(b, 1, 5) < 'ccccc'::text)))) + +DROP TABLE partitioned, partitioned2; +-- check reference to partitioned table's rowtype in partition descriptor +create table partitioned (a int, b int) + partition by list ((row(a, b)::partitioned)); +create table partitioned1 + partition of partitioned for values in ('(1,2)'::partitioned); +create table partitioned2 + partition of partitioned for values in ('(2,4)'::partitioned); +explain (costs off) +select * from partitioned where row(a,b)::partitioned = '(1,2)'::partitioned; + QUERY PLAN +----------------------------------------------------------- + Seq Scan on partitioned1 partitioned + Filter: (ROW(a, b)::partitioned = '(1,2)'::partitioned) +(2 rows) + +drop table partitioned; +-- whole-row Var in partition key works too +create table partitioned (a int, b int) + partition by list ((partitioned)); +create table partitioned1 + partition of partitioned for values in ('(1,2)'); +create table partitioned2 + partition of partitioned for values in ('(2,4)'); +explain (costs off) +select * from partitioned where partitioned = '(1,2)'::partitioned; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on partitioned1 partitioned + Filter: ((partitioned.*)::partitioned = '(1,2)'::partitioned) +(2 rows) + +\d+ partitioned1 + Table "public.partitioned1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | +Partition of: partitioned FOR VALUES IN ('(1,2)') +Partition constraint: (((partitioned1.*)::partitioned IS DISTINCT FROM NULL) AND ((partitioned1.*)::partitioned = '(1,2)'::partitioned)) + +drop table partitioned; +-- check that dependencies of partition columns are handled correctly +create domain intdom1 as int; +create table partitioned ( + a intdom1, + b text +) partition by range (a); +alter table partitioned drop column a; -- fail +ERROR: cannot drop column "a" because it is part of the partition key of relation "partitioned" +drop domain intdom1; -- fail, requires cascade +ERROR: cannot drop type intdom1 because other objects depend on it +DETAIL: table partitioned depends on type intdom1 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +drop domain intdom1 cascade; +NOTICE: drop cascades to table partitioned +table partitioned; -- gone +ERROR: relation "partitioned" does not exist +LINE 1: table partitioned; + ^ +-- likewise for columns used in partition expressions +create domain intdom1 as int; +create table partitioned ( + a intdom1, + b text +) partition by range (plusone(a)); +alter table partitioned drop column a; -- fail +ERROR: cannot drop column "a" because it is part of the partition key of relation "partitioned" +drop domain intdom1; -- fail, requires cascade +ERROR: cannot drop type intdom1 because other objects depend on it +DETAIL: table partitioned depends on type intdom1 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +drop domain intdom1 cascade; +NOTICE: drop cascades to table partitioned +table partitioned; -- gone +ERROR: relation "partitioned" does not exist +LINE 1: table partitioned; + ^ +-- +-- Partitions +-- +-- check partition bound syntax +CREATE TABLE list_parted ( + a int +) PARTITION BY LIST (a); +CREATE TABLE part_p1 PARTITION OF list_parted FOR VALUES IN ('1'); +CREATE TABLE part_p2 PARTITION OF list_parted FOR VALUES IN (2); +CREATE TABLE part_p3 PARTITION OF list_parted FOR VALUES IN ((2+1)); +CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null); +\d+ list_parted + Partitioned table "public.list_parted" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | +Partition key: LIST (a) +Partitions: part_null FOR VALUES IN (NULL), + part_p1 FOR VALUES IN (1), + part_p2 FOR VALUES IN (2), + part_p3 FOR VALUES IN (3) + +-- forbidden expressions for partition bound with list partitioned table +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (somename); +ERROR: cannot use column reference in partition bound expression +LINE 1: ...expr_fail PARTITION OF list_parted FOR VALUES IN (somename); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (somename.somename); +ERROR: cannot use column reference in partition bound expression +LINE 1: ...expr_fail PARTITION OF list_parted FOR VALUES IN (somename.s... + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (a); +ERROR: cannot use column reference in partition bound expression +LINE 1: ..._bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (a); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (sum(a)); +ERROR: cannot use column reference in partition bound expression +LINE 1: ...s_expr_fail PARTITION OF list_parted FOR VALUES IN (sum(a)); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (sum(somename)); +ERROR: cannot use column reference in partition bound expression +LINE 1: ..._fail PARTITION OF list_parted FOR VALUES IN (sum(somename))... + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (sum(1)); +ERROR: aggregate functions are not allowed in partition bound +LINE 1: ...s_expr_fail PARTITION OF list_parted FOR VALUES IN (sum(1)); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN ((select 1)); +ERROR: cannot use subquery in partition bound +LINE 1: ...expr_fail PARTITION OF list_parted FOR VALUES IN ((select 1)... + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN (generate_series(4, 6)); +ERROR: set-returning functions are not allowed in partition bound +LINE 1: ...expr_fail PARTITION OF list_parted FOR VALUES IN (generate_s... + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF list_parted FOR VALUES IN ((1+1) collate "POSIX"); +ERROR: collations are not supported by type integer +LINE 1: ...ail PARTITION OF list_parted FOR VALUES IN ((1+1) collate "P... + ^ +-- syntax does not allow empty list of values for list partitions +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (); +ERROR: syntax error at or near ")" +LINE 1: ...E TABLE fail_part PARTITION OF list_parted FOR VALUES IN (); + ^ +-- trying to specify range for list partitioned table +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2); +ERROR: invalid bound specification for a list partition +LINE 1: ...BLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) T... + ^ +-- trying to specify modulus and remainder for list partitioned table +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); +ERROR: invalid bound specification for a list partition +LINE 1: ...BLE fail_part PARTITION OF list_parted FOR VALUES WITH (MODU... + ^ +-- check default partition cannot be created more than once +CREATE TABLE part_default PARTITION OF list_parted DEFAULT; +CREATE TABLE fail_default_part PARTITION OF list_parted DEFAULT; +ERROR: partition "fail_default_part" conflicts with existing default partition "part_default" +LINE 1: ...TE TABLE fail_default_part PARTITION OF list_parted DEFAULT; + ^ +-- specified literal can't be cast to the partition column data type +CREATE TABLE bools ( + a bool +) PARTITION BY LIST (a); +CREATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1); +ERROR: specified value cannot be cast to type boolean for column "a" +LINE 1: ...REATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1); + ^ +DROP TABLE bools; +-- specified literal can be cast, and the cast might not be immutable +CREATE TABLE moneyp ( + a money +) PARTITION BY LIST (a); +CREATE TABLE moneyp_10 PARTITION OF moneyp FOR VALUES IN (10); +CREATE TABLE moneyp_11 PARTITION OF moneyp FOR VALUES IN ('11'); +CREATE TABLE moneyp_12 PARTITION OF moneyp FOR VALUES IN (to_char(12, '99')::int); +DROP TABLE moneyp; +-- cast is immutable +CREATE TABLE bigintp ( + a bigint +) PARTITION BY LIST (a); +CREATE TABLE bigintp_10 PARTITION OF bigintp FOR VALUES IN (10); +-- fails due to overlap: +CREATE TABLE bigintp_10_2 PARTITION OF bigintp FOR VALUES IN ('10'); +ERROR: partition "bigintp_10_2" would overlap partition "bigintp_10" +LINE 1: ...ABLE bigintp_10_2 PARTITION OF bigintp FOR VALUES IN ('10'); + ^ +DROP TABLE bigintp; +CREATE TABLE range_parted ( + a date +) PARTITION BY RANGE (a); +-- forbidden expressions for partition bounds with range partitioned table +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (somename) TO ('2019-01-01'); +ERROR: cannot use column reference in partition bound expression +LINE 2: FOR VALUES FROM (somename) TO ('2019-01-01'); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (somename.somename) TO ('2019-01-01'); +ERROR: cannot use column reference in partition bound expression +LINE 2: FOR VALUES FROM (somename.somename) TO ('2019-01-01'); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (a) TO ('2019-01-01'); +ERROR: cannot use column reference in partition bound expression +LINE 2: FOR VALUES FROM (a) TO ('2019-01-01'); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (max(a)) TO ('2019-01-01'); +ERROR: cannot use column reference in partition bound expression +LINE 2: FOR VALUES FROM (max(a)) TO ('2019-01-01'); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (max(somename)) TO ('2019-01-01'); +ERROR: cannot use column reference in partition bound expression +LINE 2: FOR VALUES FROM (max(somename)) TO ('2019-01-01'); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (max('2019-02-01'::date)) TO ('2019-01-01'); +ERROR: aggregate functions are not allowed in partition bound +LINE 2: FOR VALUES FROM (max('2019-02-01'::date)) TO ('2019-01-01'... + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM ((select 1)) TO ('2019-01-01'); +ERROR: cannot use subquery in partition bound +LINE 2: FOR VALUES FROM ((select 1)) TO ('2019-01-01'); + ^ +CREATE TABLE part_bogus_expr_fail PARTITION OF range_parted + FOR VALUES FROM (generate_series(1, 3)) TO ('2019-01-01'); +ERROR: set-returning functions are not allowed in partition bound +LINE 2: FOR VALUES FROM (generate_series(1, 3)) TO ('2019-01-01'); + ^ +-- trying to specify list for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); +ERROR: invalid bound specification for a range partition +LINE 1: ...BLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); + ^ +-- trying to specify modulus and remainder for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); +ERROR: invalid bound specification for a range partition +LINE 1: ...LE fail_part PARTITION OF range_parted FOR VALUES WITH (MODU... + ^ +-- each of start and end bounds must have same number of values as the +-- length of the partition key +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z'); +ERROR: FROM must specify exactly one value per partitioning column +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a') TO ('z', 1); +ERROR: TO must specify exactly one value per partitioning column +-- cannot specify null values in range bounds +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (maxvalue); +ERROR: cannot specify NULL in range bound +-- trying to specify modulus and remainder for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); +ERROR: invalid bound specification for a range partition +LINE 1: ...LE fail_part PARTITION OF range_parted FOR VALUES WITH (MODU... + ^ +-- check partition bound syntax for the hash partition +CREATE TABLE hash_parted ( + a int +) PARTITION BY HASH (a); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 10, REMAINDER 0); +CREATE TABLE hpart_2 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 50, REMAINDER 1); +CREATE TABLE hpart_3 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 200, REMAINDER 2); +-- modulus 25 is factor of modulus of 50 but 10 is not a factor of 25. +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 25, REMAINDER 3); +ERROR: every hash partition modulus must be a factor of the next larger modulus +DETAIL: The new modulus 25 is not divisible by 10, the modulus of existing partition "hpart_1". +-- previous modulus 50 is factor of 150 but this modulus is not a factor of next modulus 200. +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 150, REMAINDER 3); +ERROR: every hash partition modulus must be a factor of the next larger modulus +DETAIL: The new modulus 150 is not a factor of 200, the modulus of existing partition "hpart_3". +-- trying to specify range for the hash partitioned table +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES FROM ('a', 1) TO ('z'); +ERROR: invalid bound specification for a hash partition +LINE 1: ...BLE fail_part PARTITION OF hash_parted FOR VALUES FROM ('a',... + ^ +-- trying to specify list value for the hash partitioned table +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES IN (1000); +ERROR: invalid bound specification for a hash partition +LINE 1: ...BLE fail_part PARTITION OF hash_parted FOR VALUES IN (1000); + ^ +-- trying to create default partition for the hash partitioned table +CREATE TABLE fail_default_part PARTITION OF hash_parted DEFAULT; +ERROR: a hash-partitioned table may not have a default partition +-- check if compatible with the specified parent +-- cannot create as partition of a non-partitioned table +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a'); +ERROR: "unparted" is not partitioned +CREATE TABLE fail_part PARTITION OF unparted FOR VALUES WITH (MODULUS 2, REMAINDER 1); +ERROR: "unparted" is not partitioned +DROP TABLE unparted; +-- cannot create a permanent rel as partition of a temp rel +CREATE TEMP TABLE temp_parted ( + a int +) PARTITION BY LIST (a); +CREATE TABLE fail_part PARTITION OF temp_parted FOR VALUES IN ('a'); +ERROR: cannot create a permanent relation as partition of temporary relation "temp_parted" +DROP TABLE temp_parted; +-- check for partition bound overlap and other invalid specifications +CREATE TABLE list_parted2 ( + a varchar +) PARTITION BY LIST (a); +CREATE TABLE part_null_z PARTITION OF list_parted2 FOR VALUES IN (null, 'z'); +CREATE TABLE part_ab PARTITION OF list_parted2 FOR VALUES IN ('a', 'b'); +CREATE TABLE list_parted2_def PARTITION OF list_parted2 DEFAULT; +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN (null); +ERROR: partition "fail_part" would overlap partition "part_null_z" +LINE 1: ...LE fail_part PARTITION OF list_parted2 FOR VALUES IN (null); + ^ +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN ('b', 'c'); +ERROR: partition "fail_part" would overlap partition "part_ab" +LINE 1: ...ail_part PARTITION OF list_parted2 FOR VALUES IN ('b', 'c'); + ^ +-- check default partition overlap +INSERT INTO list_parted2 VALUES('X'); +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN ('W', 'X', 'Y'); +ERROR: updated partition constraint for default partition "list_parted2_def" would be violated by some row +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE (a); +-- trying to create range partition with empty range +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (0); +ERROR: empty range bound specified for partition "fail_part" +LINE 1: ..._part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (0); + ^ +DETAIL: Specified lower bound (1) is greater than or equal to upper bound (0). +-- note that the range '[1, 1)' has no elements +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (1); +ERROR: empty range bound specified for partition "fail_part" +LINE 1: ..._part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (1); + ^ +DETAIL: Specified lower bound (1) is greater than or equal to upper bound (1). +CREATE TABLE part0 PARTITION OF range_parted2 FOR VALUES FROM (minvalue) TO (1); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (minvalue) TO (2); +ERROR: partition "fail_part" would overlap partition "part0" +LINE 1: ..._part PARTITION OF range_parted2 FOR VALUES FROM (minvalue) ... + ^ +CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (-1) TO (1); +ERROR: partition "fail_part" would overlap partition "part0" +LINE 1: ..._part PARTITION OF range_parted2 FOR VALUES FROM (-1) TO (1)... + ^ +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (maxvalue); +ERROR: partition "fail_part" would overlap partition "part1" +LINE 1: ..._part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (max... + ^ +CREATE TABLE part2 PARTITION OF range_parted2 FOR VALUES FROM (20) TO (30); +CREATE TABLE part3 PARTITION OF range_parted2 FOR VALUES FROM (30) TO (40); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (10) TO (30); +ERROR: partition "fail_part" would overlap partition "part2" +LINE 1: ...art PARTITION OF range_parted2 FOR VALUES FROM (10) TO (30); + ^ +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (10) TO (50); +ERROR: partition "fail_part" would overlap partition "part2" +LINE 1: ...art PARTITION OF range_parted2 FOR VALUES FROM (10) TO (50); + ^ +-- Create a default partition for range partitioned table +CREATE TABLE range2_default PARTITION OF range_parted2 DEFAULT; +-- More than one default partition is not allowed, so this should give error +CREATE TABLE fail_default_part PARTITION OF range_parted2 DEFAULT; +ERROR: partition "fail_default_part" conflicts with existing default partition "range2_default" +LINE 1: ... TABLE fail_default_part PARTITION OF range_parted2 DEFAULT; + ^ +-- Check if the range for default partitions overlap +INSERT INTO range_parted2 VALUES (85); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (80) TO (90); +ERROR: updated partition constraint for default partition "range2_default" would be violated by some row +CREATE TABLE part4 PARTITION OF range_parted2 FOR VALUES FROM (90) TO (100); +-- now check for multi-column range partition key +CREATE TABLE range_parted3 ( + a int, + b int +) PARTITION BY RANGE (a, (b+1)); +CREATE TABLE part00 PARTITION OF range_parted3 FOR VALUES FROM (0, minvalue) TO (0, maxvalue); +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (0, minvalue) TO (0, 1); +ERROR: partition "fail_part" would overlap partition "part00" +LINE 1: ..._part PARTITION OF range_parted3 FOR VALUES FROM (0, minvalu... + ^ +CREATE TABLE part10 PARTITION OF range_parted3 FOR VALUES FROM (1, minvalue) TO (1, 1); +CREATE TABLE part11 PARTITION OF range_parted3 FOR VALUES FROM (1, 1) TO (1, 10); +CREATE TABLE part12 PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, maxvalue); +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, 20); +ERROR: partition "fail_part" would overlap partition "part12" +LINE 1: ...rt PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1,... + ^ +CREATE TABLE range3_default PARTITION OF range_parted3 DEFAULT; +-- cannot create a partition that says column b is allowed to range +-- from -infinity to +infinity, while there exist partitions that have +-- more specific ranges +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, minvalue) TO (1, maxvalue); +ERROR: partition "fail_part" would overlap partition "part10" +LINE 1: ..._part PARTITION OF range_parted3 FOR VALUES FROM (1, minvalu... + ^ +-- check for partition bound overlap and other invalid specifications for the hash partition +CREATE TABLE hash_parted2 ( + a varchar +) PARTITION BY HASH (a); +CREATE TABLE h2part_1 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +CREATE TABLE h2part_2 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 0); +CREATE TABLE h2part_3 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 4); +CREATE TABLE h2part_4 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 5); +-- overlap with part_4 +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +ERROR: partition "fail_part" would overlap partition "h2part_4" +LINE 1: ...LE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODU... + ^ +-- modulus must be greater than zero +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 0, REMAINDER 1); +ERROR: modulus for hash partition must be a positive integer +-- remainder must be greater than or equal to zero and less than modulus +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 8); +ERROR: remainder for hash partition must be less than modulus +-- check schema propagation from parent +CREATE TABLE parted ( + a text, + b int NOT NULL DEFAULT 0, + CONSTRAINT check_a CHECK (length(a) > 0) +) PARTITION BY LIST (a); +CREATE TABLE part_a PARTITION OF parted FOR VALUES IN ('a'); +-- only inherited attributes (never local ones) +SELECT attname, attislocal, attinhcount FROM pg_attribute + WHERE attrelid = 'part_a'::regclass and attnum > 0 + ORDER BY attnum; + attname | attislocal | attinhcount +---------+------------+------------- + a | f | 1 + b | f | 1 +(2 rows) + +-- able to specify column default, column constraint, and table constraint +-- first check the "column specified more than once" error +CREATE TABLE part_b PARTITION OF parted ( + b NOT NULL, + b DEFAULT 1, + b CHECK (b >= 0), + CONSTRAINT check_a CHECK (length(a) > 0) +) FOR VALUES IN ('b'); +ERROR: column "b" specified more than once +CREATE TABLE part_b PARTITION OF parted ( + b NOT NULL DEFAULT 1, + CONSTRAINT check_a CHECK (length(a) > 0), + CONSTRAINT check_b CHECK (b >= 0) +) FOR VALUES IN ('b'); +NOTICE: merging constraint "check_a" with inherited definition +-- conislocal should be false for any merged constraints, true otherwise +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass ORDER BY conislocal, coninhcount; + conislocal | coninhcount +------------+------------- + f | 1 + t | 0 +(2 rows) + +-- Once check_b is added to the parent, it should be made non-local for part_b +ALTER TABLE parted ADD CONSTRAINT check_b CHECK (b >= 0); +NOTICE: merging constraint "check_b" with inherited definition +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass; + conislocal | coninhcount +------------+------------- + f | 1 + f | 1 +(2 rows) + +-- Neither check_a nor check_b are droppable from part_b +ALTER TABLE part_b DROP CONSTRAINT check_a; +ERROR: cannot drop inherited constraint "check_a" of relation "part_b" +ALTER TABLE part_b DROP CONSTRAINT check_b; +ERROR: cannot drop inherited constraint "check_b" of relation "part_b" +-- And dropping it from parted should leave no trace of them on part_b, unlike +-- traditional inheritance where they will be left behind, because they would +-- be local constraints. +ALTER TABLE parted DROP CONSTRAINT check_a, DROP CONSTRAINT check_b; +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass; + conislocal | coninhcount +------------+------------- +(0 rows) + +-- specify PARTITION BY for a partition +CREATE TABLE fail_part_col_not_found PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE (c); +ERROR: column "c" named in partition key does not exist +LINE 1: ...TITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE (c); + ^ +CREATE TABLE part_c PARTITION OF parted (b WITH OPTIONS NOT NULL DEFAULT 0) FOR VALUES IN ('c') PARTITION BY RANGE ((b)); +-- create a level-2 partition +CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10); +-- check that NOT NULL and default value are inherited correctly +create table parted_notnull_inh_test (a int default 1, b int not null default 0) partition by list (a); +create table parted_notnull_inh_test1 partition of parted_notnull_inh_test (a not null, b default 1) for values in (1); +insert into parted_notnull_inh_test (b) values (null); +ERROR: null value in column "b" of relation "parted_notnull_inh_test1" violates not-null constraint +DETAIL: Failing row contains (1, null). +-- note that while b's default is overriden, a's default is preserved +\d parted_notnull_inh_test1 + Table "public.parted_notnull_inh_test1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | 1 + b | integer | | not null | 1 +Partition of: parted_notnull_inh_test FOR VALUES IN (1) + +drop table parted_notnull_inh_test; +-- check that collations are assigned in partition bound expressions +create table parted_boolean_col (a bool, b text) partition by list(a); +create table parted_boolean_less partition of parted_boolean_col + for values in ('foo' < 'bar'); +create table parted_boolean_greater partition of parted_boolean_col + for values in ('foo' > 'bar'); +drop table parted_boolean_col; +-- check for a conflicting COLLATE clause +create table parted_collate_must_match (a text collate "C", b text collate "C") + partition by range (a); +-- on the partition key +create table parted_collate_must_match1 partition of parted_collate_must_match + (a collate "POSIX") for values from ('a') to ('m'); +-- on another column +create table parted_collate_must_match2 partition of parted_collate_must_match + (b collate "POSIX") for values from ('m') to ('z'); +drop table parted_collate_must_match; +-- check that non-matching collations for partition bound +-- expressions are coerced to the right collation +create table test_part_coll_posix (a text) partition by range (a collate "POSIX"); +-- ok, collation is implicitly coerced +create table test_part_coll partition of test_part_coll_posix for values from ('a' collate "C") to ('g'); +-- ok +create table test_part_coll2 partition of test_part_coll_posix for values from ('g') to ('m'); +-- ok, collation is implicitly coerced +create table test_part_coll_cast partition of test_part_coll_posix for values from (name 'm' collate "C") to ('s'); +-- ok; partition collation silently overrides the default collation of type 'name' +create table test_part_coll_cast2 partition of test_part_coll_posix for values from (name 's') to ('z'); +drop table test_part_coll_posix; +-- Partition bound in describe output +\d+ part_b + Table "public.part_b" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + a | text | | | | extended | | + b | integer | | not null | 1 | plain | | +Partition of: parted FOR VALUES IN ('b') +Partition constraint: ((a IS NOT NULL) AND (a = 'b'::text)) + +-- Both partition bound and partition key in describe output +\d+ part_c + Partitioned table "public.part_c" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + a | text | | | | extended | | + b | integer | | not null | 0 | plain | | +Partition of: parted FOR VALUES IN ('c') +Partition constraint: ((a IS NOT NULL) AND (a = 'c'::text)) +Partition key: RANGE (b) +Partitions: part_c_1_10 FOR VALUES FROM (1) TO (10) + +-- a level-2 partition's constraint will include the parent's expressions +\d+ part_c_1_10 + Table "public.part_c_1_10" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + a | text | | | | extended | | + b | integer | | not null | 0 | plain | | +Partition of: part_c FOR VALUES FROM (1) TO (10) +Partition constraint: ((a IS NOT NULL) AND (a = 'c'::text) AND (b IS NOT NULL) AND (b >= 1) AND (b < 10)) + +-- Show partition count in the parent's describe output +-- Tempted to include \d+ output listing partitions with bound info but +-- output could vary depending on the order in which partition oids are +-- returned. +\d parted + Partitioned table "public.parted" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | text | | | + b | integer | | not null | 0 +Partition key: LIST (a) +Number of partitions: 3 (Use \d+ to list them.) + +\d hash_parted + Partitioned table "public.hash_parted" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: HASH (a) +Number of partitions: 3 (Use \d+ to list them.) + +-- check that we get the expected partition constraints +CREATE TABLE range_parted4 (a int, b int, c int) PARTITION BY RANGE (abs(a), abs(b), c); +CREATE TABLE unbounded_range_part PARTITION OF range_parted4 FOR VALUES FROM (MINVALUE, MINVALUE, MINVALUE) TO (MAXVALUE, MAXVALUE, MAXVALUE); +\d+ unbounded_range_part + Table "public.unbounded_range_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | + c | integer | | | | plain | | +Partition of: range_parted4 FOR VALUES FROM (MINVALUE, MINVALUE, MINVALUE) TO (MAXVALUE, MAXVALUE, MAXVALUE) +Partition constraint: ((abs(a) IS NOT NULL) AND (abs(b) IS NOT NULL) AND (c IS NOT NULL)) + +DROP TABLE unbounded_range_part; +CREATE TABLE range_parted4_1 PARTITION OF range_parted4 FOR VALUES FROM (MINVALUE, MINVALUE, MINVALUE) TO (1, MAXVALUE, MAXVALUE); +\d+ range_parted4_1 + Table "public.range_parted4_1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | + c | integer | | | | plain | | +Partition of: range_parted4 FOR VALUES FROM (MINVALUE, MINVALUE, MINVALUE) TO (1, MAXVALUE, MAXVALUE) +Partition constraint: ((abs(a) IS NOT NULL) AND (abs(b) IS NOT NULL) AND (c IS NOT NULL) AND (abs(a) <= 1)) + +CREATE TABLE range_parted4_2 PARTITION OF range_parted4 FOR VALUES FROM (3, 4, 5) TO (6, 7, MAXVALUE); +\d+ range_parted4_2 + Table "public.range_parted4_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | + c | integer | | | | plain | | +Partition of: range_parted4 FOR VALUES FROM (3, 4, 5) TO (6, 7, MAXVALUE) +Partition constraint: ((abs(a) IS NOT NULL) AND (abs(b) IS NOT NULL) AND (c IS NOT NULL) AND ((abs(a) > 3) OR ((abs(a) = 3) AND (abs(b) > 4)) OR ((abs(a) = 3) AND (abs(b) = 4) AND (c >= 5))) AND ((abs(a) < 6) OR ((abs(a) = 6) AND (abs(b) <= 7)))) + +CREATE TABLE range_parted4_3 PARTITION OF range_parted4 FOR VALUES FROM (6, 8, MINVALUE) TO (9, MAXVALUE, MAXVALUE); +\d+ range_parted4_3 + Table "public.range_parted4_3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | | plain | | + c | integer | | | | plain | | +Partition of: range_parted4 FOR VALUES FROM (6, 8, MINVALUE) TO (9, MAXVALUE, MAXVALUE) +Partition constraint: ((abs(a) IS NOT NULL) AND (abs(b) IS NOT NULL) AND (c IS NOT NULL) AND ((abs(a) > 6) OR ((abs(a) = 6) AND (abs(b) >= 8))) AND (abs(a) <= 9)) + +DROP TABLE range_parted4; +-- user-defined operator class in partition key +CREATE FUNCTION my_int4_sort(int4,int4) RETURNS int LANGUAGE sql + AS $$ SELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN 1 ELSE -1 END; $$; +CREATE OPERATOR CLASS test_int4_ops FOR TYPE int4 USING btree AS + OPERATOR 1 < (int4,int4), OPERATOR 2 <= (int4,int4), + OPERATOR 3 = (int4,int4), OPERATOR 4 >= (int4,int4), + OPERATOR 5 > (int4,int4), FUNCTION 1 my_int4_sort(int4,int4); +CREATE TABLE partkey_t (a int4) PARTITION BY RANGE (a test_int4_ops); +CREATE TABLE partkey_t_1 PARTITION OF partkey_t FOR VALUES FROM (0) TO (1000); +INSERT INTO partkey_t VALUES (100); +INSERT INTO partkey_t VALUES (200); +-- cleanup +DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3; +DROP TABLE partkey_t, hash_parted, hash_parted2; +DROP OPERATOR CLASS test_int4_ops USING btree; +DROP FUNCTION my_int4_sort(int4,int4); +-- comments on partitioned tables columns +CREATE TABLE parted_col_comment (a int, b text) PARTITION BY LIST (a); +COMMENT ON TABLE parted_col_comment IS 'Am partitioned table'; +COMMENT ON COLUMN parted_col_comment.a IS 'Partition key'; +SELECT obj_description('parted_col_comment'::regclass); + obj_description +---------------------- + Am partitioned table +(1 row) + +\d+ parted_col_comment + Partitioned table "public.parted_col_comment" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+--------------- + a | integer | | | | plain | | Partition key + b | text | | | | extended | | +Partition key: LIST (a) +Number of partitions: 0 + +DROP TABLE parted_col_comment; +-- list partitioning on array type column +CREATE TABLE arrlp (a int[]) PARTITION BY LIST (a); +CREATE TABLE arrlp12 PARTITION OF arrlp FOR VALUES IN ('{1}', '{2}'); +\d+ arrlp12 + Table "public.arrlp12" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+-----------+-----------+----------+---------+----------+--------------+------------- + a | integer[] | | | | extended | | +Partition of: arrlp FOR VALUES IN ('{1}', '{2}') +Partition constraint: ((a IS NOT NULL) AND ((a = '{1}'::integer[]) OR (a = '{2}'::integer[]))) + +DROP TABLE arrlp; +-- partition on boolean column +create table boolspart (a bool) partition by list (a); +create table boolspart_t partition of boolspart for values in (true); +create table boolspart_f partition of boolspart for values in (false); +\d+ boolspart + Partitioned table "public.boolspart" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | boolean | | | | plain | | +Partition key: LIST (a) +Partitions: boolspart_f FOR VALUES IN (false), + boolspart_t FOR VALUES IN (true) + +drop table boolspart; +-- partitions mixing temporary and permanent relations +create table perm_parted (a int) partition by list (a); +create temporary table temp_parted (a int) partition by list (a); +create table perm_part partition of temp_parted default; -- error +ERROR: cannot create a permanent relation as partition of temporary relation "temp_parted" +create temp table temp_part partition of perm_parted default; -- error +ERROR: cannot create a temporary relation as partition of permanent relation "perm_parted" +create temp table temp_part partition of temp_parted default; -- ok +drop table perm_parted cascade; +drop table temp_parted cascade; +-- check that adding partitions to a table while it is being used is prevented +create table tab_part_create (a int) partition by list (a); +create or replace function func_part_create() returns trigger + language plpgsql as $$ + begin + execute 'create table tab_part_create_1 partition of tab_part_create for values in (1)'; + return null; + end $$; +create trigger trig_part_create before insert on tab_part_create + for each statement execute procedure func_part_create(); +insert into tab_part_create values (1); +ERROR: cannot CREATE TABLE .. PARTITION OF "tab_part_create" because it is being used by active queries in this session +CONTEXT: SQL statement "create table tab_part_create_1 partition of tab_part_create for values in (1)" +PL/pgSQL function func_part_create() line 3 at EXECUTE +drop table tab_part_create; +drop function func_part_create(); +-- test using a volatile expression as partition bound +create table volatile_partbound_test (partkey timestamp) partition by range (partkey); +create table volatile_partbound_test1 partition of volatile_partbound_test for values from (minvalue) to (current_timestamp); +create table volatile_partbound_test2 partition of volatile_partbound_test for values from (current_timestamp) to (maxvalue); +-- this should go into the partition volatile_partbound_test2 +insert into volatile_partbound_test values (current_timestamp); +select tableoid::regclass from volatile_partbound_test; + tableoid +-------------------------- + volatile_partbound_test2 +(1 row) + +drop table volatile_partbound_test; +-- test the case where a check constraint on default partition allows +-- to avoid scanning it when adding a new partition +create table defcheck (a int, b int) partition by list (b); +create table defcheck_def (a int, c int, b int); +alter table defcheck_def drop c; +alter table defcheck attach partition defcheck_def default; +alter table defcheck_def add check (b <= 0 and b is not null); +create table defcheck_1 partition of defcheck for values in (1, null); +-- test that complex default partition constraints are enforced correctly +insert into defcheck_def values (0, 0); +create table defcheck_0 partition of defcheck for values in (0); +ERROR: updated partition constraint for default partition "defcheck_def" would be violated by some row +drop table defcheck; +-- tests of column drop with partition tables and indexes using +-- predicates and expressions. +create table part_column_drop ( + useless_1 int, + id int, + useless_2 int, + d int, + b int, + useless_3 int +) partition by range (id); +alter table part_column_drop drop column useless_1; +alter table part_column_drop drop column useless_2; +alter table part_column_drop drop column useless_3; +create index part_column_drop_b_pred on part_column_drop(b) where b = 1; +create index part_column_drop_b_expr on part_column_drop((b = 1)); +create index part_column_drop_d_pred on part_column_drop(d) where d = 2; +create index part_column_drop_d_expr on part_column_drop((d = 2)); +create table part_column_drop_1_10 partition of + part_column_drop for values from (1) to (10); +\d part_column_drop + Partitioned table "public.part_column_drop" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | | + d | integer | | | + b | integer | | | +Partition key: RANGE (id) +Indexes: + "part_column_drop_b_expr" btree ((b = 1)) + "part_column_drop_b_pred" btree (b) WHERE b = 1 + "part_column_drop_d_expr" btree ((d = 2)) + "part_column_drop_d_pred" btree (d) WHERE d = 2 +Number of partitions: 1 (Use \d+ to list them.) + +\d part_column_drop_1_10 + Table "public.part_column_drop_1_10" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | | + d | integer | | | + b | integer | | | +Partition of: part_column_drop FOR VALUES FROM (1) TO (10) +Indexes: + "part_column_drop_1_10_b_idx" btree (b) WHERE b = 1 + "part_column_drop_1_10_d_idx" btree (d) WHERE d = 2 + "part_column_drop_1_10_expr_idx" btree ((b = 1)) + "part_column_drop_1_10_expr_idx1" btree ((d = 2)) + +drop table part_column_drop; diff --git a/src/test/regress/expected/sequence_1.out b/src/test/regress/expected/sequence_1.out new file mode 100644 index 00000000000..462e3f3caa4 --- /dev/null +++ b/src/test/regress/expected/sequence_1.out @@ -0,0 +1,824 @@ +-- +-- CREATE SEQUENCE +-- +-- various error cases +CREATE UNLOGGED SEQUENCE sequence_testx; +ERROR: unlogged sequences are not supported +CREATE SEQUENCE sequence_testx INCREMENT BY 0; +ERROR: INCREMENT must not be zero +CREATE SEQUENCE sequence_testx INCREMENT BY -1 MINVALUE 20; +ERROR: MINVALUE (20) must be less than MAXVALUE (-1) +CREATE SEQUENCE sequence_testx INCREMENT BY 1 MAXVALUE -20; +ERROR: MINVALUE (1) must be less than MAXVALUE (-20) +CREATE SEQUENCE sequence_testx INCREMENT BY -1 START 10; +ERROR: START value (10) cannot be greater than MAXVALUE (-1) +CREATE SEQUENCE sequence_testx INCREMENT BY 1 START -10; +ERROR: START value (-10) cannot be less than MINVALUE (1) +CREATE SEQUENCE sequence_testx CACHE 0; +ERROR: CACHE (0) must be greater than zero +-- OWNED BY errors +CREATE SEQUENCE sequence_testx OWNED BY nobody; -- nonsense word +ERROR: invalid OWNED BY option +HINT: Specify OWNED BY table.column or OWNED BY NONE. +CREATE SEQUENCE sequence_testx OWNED BY pg_class_oid_index.oid; -- not a table +ERROR: referenced relation "pg_class_oid_index" is not a table or foreign table +CREATE SEQUENCE sequence_testx OWNED BY pg_class.relname; -- not same schema +ERROR: sequence must be in same schema as table it is linked to +CREATE TABLE sequence_test_table (a int); +CREATE SEQUENCE sequence_testx OWNED BY sequence_test_table.b; -- wrong column +ERROR: column "b" of relation "sequence_test_table" does not exist +DROP TABLE sequence_test_table; +-- sequence data types +CREATE SEQUENCE sequence_test5 AS integer; +CREATE SEQUENCE sequence_test6 AS smallint; +CREATE SEQUENCE sequence_test7 AS bigint; +CREATE SEQUENCE sequence_test8 AS integer MAXVALUE 100000; +CREATE SEQUENCE sequence_test9 AS integer INCREMENT BY -1; +CREATE SEQUENCE sequence_test10 AS integer MINVALUE -100000 START 1; +CREATE SEQUENCE sequence_test11 AS smallint; +CREATE SEQUENCE sequence_test12 AS smallint INCREMENT -1; +CREATE SEQUENCE sequence_test13 AS smallint MINVALUE -32768; +CREATE SEQUENCE sequence_test14 AS smallint MAXVALUE 32767 INCREMENT -1; +CREATE SEQUENCE sequence_testx AS text; +ERROR: sequence type must be smallint, integer, or bigint +CREATE SEQUENCE sequence_testx AS nosuchtype; +ERROR: type "nosuchtype" does not exist +LINE 1: CREATE SEQUENCE sequence_testx AS nosuchtype; + ^ +CREATE SEQUENCE sequence_testx AS smallint MAXVALUE 100000; +ERROR: MAXVALUE (100000) is out of range for sequence data type smallint +CREATE SEQUENCE sequence_testx AS smallint MINVALUE -100000; +ERROR: MINVALUE (-100000) is out of range for sequence data type smallint +ALTER SEQUENCE sequence_test5 AS smallint; -- success, max will be adjusted +ALTER SEQUENCE sequence_test8 AS smallint; -- fail, max has to be adjusted +ERROR: MAXVALUE (100000) is out of range for sequence data type smallint +ALTER SEQUENCE sequence_test8 AS smallint MAXVALUE 20000; -- ok now +ALTER SEQUENCE sequence_test9 AS smallint; -- success, min will be adjusted +ALTER SEQUENCE sequence_test10 AS smallint; -- fail, min has to be adjusted +ERROR: MINVALUE (-100000) is out of range for sequence data type smallint +ALTER SEQUENCE sequence_test10 AS smallint MINVALUE -20000; -- ok now +ALTER SEQUENCE sequence_test11 AS int; -- max will be adjusted +ALTER SEQUENCE sequence_test12 AS int; -- min will be adjusted +ALTER SEQUENCE sequence_test13 AS int; -- min and max will be adjusted +ALTER SEQUENCE sequence_test14 AS int; -- min and max will be adjusted +--- +--- test creation of SERIAL column +--- +CREATE TABLE serialTest1 (f1 text, f2 serial); +INSERT INTO serialTest1 VALUES ('foo'); +INSERT INTO serialTest1 VALUES ('bar'); +INSERT INTO serialTest1 VALUES ('force', 100); +INSERT INTO serialTest1 VALUES ('wrong', NULL); +ERROR: null value in column "f2" of relation "serialtest1" violates not-null constraint +DETAIL: Failing row contains (wrong, null). +SELECT * FROM serialTest1; + f1 | f2 +-------+----- + foo | 1 + bar | 2 + force | 100 +(3 rows) + +SELECT pg_get_serial_sequence('serialTest1', 'f2'); + pg_get_serial_sequence +--------------------------- + public.serialtest1_f2_seq +(1 row) + +-- test smallserial / bigserial +CREATE TABLE serialTest2 (f1 text, f2 serial, f3 smallserial, f4 serial2, + f5 bigserial, f6 serial8); +INSERT INTO serialTest2 (f1) + VALUES ('test_defaults'); +INSERT INTO serialTest2 (f1, f2, f3, f4, f5, f6) + VALUES ('test_max_vals', 2147483647, 32767, 32767, 9223372036854775807, + 9223372036854775807), + ('test_min_vals', -2147483648, -32768, -32768, -9223372036854775808, + -9223372036854775808); +-- All these INSERTs should fail: +INSERT INTO serialTest2 (f1, f3) + VALUES ('bogus', -32769); +ERROR: smallint out of range +INSERT INTO serialTest2 (f1, f4) + VALUES ('bogus', -32769); +ERROR: smallint out of range +INSERT INTO serialTest2 (f1, f3) + VALUES ('bogus', 32768); +ERROR: smallint out of range +INSERT INTO serialTest2 (f1, f4) + VALUES ('bogus', 32768); +ERROR: smallint out of range +INSERT INTO serialTest2 (f1, f5) + VALUES ('bogus', -9223372036854775809); +ERROR: bigint out of range +INSERT INTO serialTest2 (f1, f6) + VALUES ('bogus', -9223372036854775809); +ERROR: bigint out of range +INSERT INTO serialTest2 (f1, f5) + VALUES ('bogus', 9223372036854775808); +ERROR: bigint out of range +INSERT INTO serialTest2 (f1, f6) + VALUES ('bogus', 9223372036854775808); +ERROR: bigint out of range +SELECT * FROM serialTest2 ORDER BY f2 ASC; + f1 | f2 | f3 | f4 | f5 | f6 +---------------+-------------+--------+--------+----------------------+---------------------- + test_min_vals | -2147483648 | -32768 | -32768 | -9223372036854775808 | -9223372036854775808 + test_defaults | 1 | 1 | 1 | 1 | 1 + test_max_vals | 2147483647 | 32767 | 32767 | 9223372036854775807 | 9223372036854775807 +(3 rows) + +SELECT nextval('serialTest2_f2_seq'); + nextval +--------- + 2 +(1 row) + +SELECT nextval('serialTest2_f3_seq'); + nextval +--------- + 2 +(1 row) + +SELECT nextval('serialTest2_f4_seq'); + nextval +--------- + 2 +(1 row) + +SELECT nextval('serialTest2_f5_seq'); + nextval +--------- + 2 +(1 row) + +SELECT nextval('serialTest2_f6_seq'); + nextval +--------- + 2 +(1 row) + +-- basic sequence operations using both text and oid references +CREATE SEQUENCE sequence_test; +CREATE SEQUENCE IF NOT EXISTS sequence_test; +NOTICE: relation "sequence_test" already exists, skipping +SELECT nextval('sequence_test'::text); + nextval +--------- + 1 +(1 row) + +SELECT nextval('sequence_test'::regclass); + nextval +--------- + 2 +(1 row) + +SELECT currval('sequence_test'::text); + currval +--------- + 2 +(1 row) + +SELECT currval('sequence_test'::regclass); + currval +--------- + 2 +(1 row) + +SELECT setval('sequence_test'::text, 32); + setval +-------- + 32 +(1 row) + +SELECT nextval('sequence_test'::regclass); + nextval +--------- + 33 +(1 row) + +SELECT setval('sequence_test'::text, 99, false); + setval +-------- + 99 +(1 row) + +SELECT nextval('sequence_test'::regclass); + nextval +--------- + 99 +(1 row) + +SELECT setval('sequence_test'::regclass, 32); + setval +-------- + 32 +(1 row) + +SELECT nextval('sequence_test'::text); + nextval +--------- + 33 +(1 row) + +SELECT setval('sequence_test'::regclass, 99, false); + setval +-------- + 99 +(1 row) + +SELECT nextval('sequence_test'::text); + nextval +--------- + 99 +(1 row) + +DISCARD SEQUENCES; +SELECT currval('sequence_test'::regclass); +ERROR: currval of sequence "sequence_test" is not yet defined in this session +DROP SEQUENCE sequence_test; +-- renaming sequences +CREATE SEQUENCE foo_seq; +ALTER TABLE foo_seq RENAME TO foo_seq_new; +SELECT * FROM foo_seq_new; + last_value | log_cnt | is_called +------------+---------+----------- + 1 | 0 | f +(1 row) + +SELECT nextval('foo_seq_new'); + nextval +--------- + 1 +(1 row) + +SELECT nextval('foo_seq_new'); + nextval +--------- + 2 +(1 row) + +-- log_cnt can be higher if there is a checkpoint just at the right +-- time, so just test for the expected range +SELECT last_value, log_cnt IN (31, 32) AS log_cnt_ok, is_called FROM foo_seq_new; + last_value | log_cnt_ok | is_called +------------+------------+----------- + 2 | f | t +(1 row) + +DROP SEQUENCE foo_seq_new; +-- renaming serial sequences +ALTER TABLE serialtest1_f2_seq RENAME TO serialtest1_f2_foo; +INSERT INTO serialTest1 VALUES ('more'); +SELECT * FROM serialTest1; + f1 | f2 +-------+----- + foo | 1 + bar | 2 + force | 100 + more | 3 +(4 rows) + +-- +-- Check dependencies of serial and ordinary sequences +-- +CREATE TEMP SEQUENCE myseq2; +CREATE TEMP SEQUENCE myseq3; +CREATE TEMP TABLE t1 ( + f1 serial, + f2 int DEFAULT nextval('myseq2'), + f3 int DEFAULT nextval('myseq3'::text) +); +-- Both drops should fail, but with different error messages: +DROP SEQUENCE t1_f1_seq; +ERROR: cannot drop sequence t1_f1_seq because other objects depend on it +DETAIL: default value for column f1 of table t1 depends on sequence t1_f1_seq +HINT: Use DROP ... CASCADE to drop the dependent objects too. +DROP SEQUENCE myseq2; +ERROR: cannot drop sequence myseq2 because other objects depend on it +DETAIL: default value for column f2 of table t1 depends on sequence myseq2 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- This however will work: +DROP SEQUENCE myseq3; +DROP TABLE t1; +-- Fails because no longer existent: +DROP SEQUENCE t1_f1_seq; +ERROR: sequence "t1_f1_seq" does not exist +-- Now OK: +DROP SEQUENCE myseq2; +-- +-- Alter sequence +-- +ALTER SEQUENCE IF EXISTS sequence_test2 RESTART WITH 24 + INCREMENT BY 4 MAXVALUE 36 MINVALUE 5 CYCLE; +NOTICE: relation "sequence_test2" does not exist, skipping +ALTER SEQUENCE serialTest1 CYCLE; -- error, not a sequence +ERROR: "serialtest1" is not a sequence +CREATE SEQUENCE sequence_test2 START WITH 32; +CREATE SEQUENCE sequence_test4 INCREMENT BY -1; +SELECT nextval('sequence_test2'); + nextval +--------- + 32 +(1 row) + +SELECT nextval('sequence_test4'); + nextval +--------- + -1 +(1 row) + +ALTER SEQUENCE sequence_test2 RESTART; +SELECT nextval('sequence_test2'); + nextval +--------- + 32 +(1 row) + +ALTER SEQUENCE sequence_test2 RESTART WITH 0; -- error +ERROR: RESTART value (0) cannot be less than MINVALUE (1) +ALTER SEQUENCE sequence_test4 RESTART WITH 40; -- error +ERROR: RESTART value (40) cannot be greater than MAXVALUE (-1) +-- test CYCLE and NO CYCLE +ALTER SEQUENCE sequence_test2 RESTART WITH 24 + INCREMENT BY 4 MAXVALUE 36 MINVALUE 5 CYCLE; +SELECT nextval('sequence_test2'); + nextval +--------- + 24 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + 28 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + 32 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + 36 +(1 row) + +SELECT nextval('sequence_test2'); -- cycled + nextval +--------- + 5 +(1 row) + +ALTER SEQUENCE sequence_test2 RESTART WITH 24 + NO CYCLE; +SELECT nextval('sequence_test2'); + nextval +--------- + 24 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + 28 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + 32 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + 36 +(1 row) + +SELECT nextval('sequence_test2'); -- error +ERROR: nextval: reached maximum value of sequence "sequence_test2" (36) +ALTER SEQUENCE sequence_test2 RESTART WITH -24 START WITH -24 + INCREMENT BY -4 MINVALUE -36 MAXVALUE -5 CYCLE; +SELECT nextval('sequence_test2'); + nextval +--------- + -24 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + -28 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + -32 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + -36 +(1 row) + +SELECT nextval('sequence_test2'); -- cycled + nextval +--------- + -5 +(1 row) + +ALTER SEQUENCE sequence_test2 RESTART WITH -24 + NO CYCLE; +SELECT nextval('sequence_test2'); + nextval +--------- + -24 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + -28 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + -32 +(1 row) + +SELECT nextval('sequence_test2'); + nextval +--------- + -36 +(1 row) + +SELECT nextval('sequence_test2'); -- error +ERROR: nextval: reached minimum value of sequence "sequence_test2" (-36) +-- reset +ALTER SEQUENCE IF EXISTS sequence_test2 RESTART WITH 32 START WITH 32 + INCREMENT BY 4 MAXVALUE 36 MINVALUE 5 CYCLE; +SELECT setval('sequence_test2', -100); -- error +ERROR: setval: value -100 is out of bounds for sequence "sequence_test2" (5..36) +SELECT setval('sequence_test2', 100); -- error +ERROR: setval: value 100 is out of bounds for sequence "sequence_test2" (5..36) +SELECT setval('sequence_test2', 5); + setval +-------- + 5 +(1 row) + +CREATE SEQUENCE sequence_test3; -- not read from, to test is_called +-- Information schema +SELECT * FROM information_schema.sequences + WHERE sequence_name ~ ANY(ARRAY['sequence_test', 'serialtest']) + ORDER BY sequence_name ASC; + sequence_catalog | sequence_schema | sequence_name | data_type | numeric_precision | numeric_precision_radix | numeric_scale | start_value | minimum_value | maximum_value | increment | cycle_option +------------------+-----------------+--------------------+-----------+-------------------+-------------------------+---------------+-------------+----------------------+---------------------+-----------+-------------- + regression | public | sequence_test10 | smallint | 16 | 2 | 0 | 1 | -20000 | 32767 | 1 | NO + regression | public | sequence_test11 | integer | 32 | 2 | 0 | 1 | 1 | 2147483647 | 1 | NO + regression | public | sequence_test12 | integer | 32 | 2 | 0 | -1 | -2147483648 | -1 | -1 | NO + regression | public | sequence_test13 | integer | 32 | 2 | 0 | -32768 | -2147483648 | 2147483647 | 1 | NO + regression | public | sequence_test14 | integer | 32 | 2 | 0 | 32767 | -2147483648 | 2147483647 | -1 | NO + regression | public | sequence_test2 | bigint | 64 | 2 | 0 | 32 | 5 | 36 | 4 | YES + regression | public | sequence_test3 | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO + regression | public | sequence_test4 | bigint | 64 | 2 | 0 | -1 | -9223372036854775808 | -1 | -1 | NO + regression | public | sequence_test5 | smallint | 16 | 2 | 0 | 1 | 1 | 32767 | 1 | NO + regression | public | sequence_test6 | smallint | 16 | 2 | 0 | 1 | 1 | 32767 | 1 | NO + regression | public | sequence_test7 | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO + regression | public | sequence_test8 | smallint | 16 | 2 | 0 | 1 | 1 | 20000 | 1 | NO + regression | public | sequence_test9 | smallint | 16 | 2 | 0 | -1 | -32768 | -1 | -1 | NO + regression | public | serialtest1_f2_foo | integer | 32 | 2 | 0 | 1 | 1 | 2147483647 | 1 | NO + regression | public | serialtest2_f2_seq | integer | 32 | 2 | 0 | 1 | 1 | 2147483647 | 1 | NO + regression | public | serialtest2_f3_seq | smallint | 16 | 2 | 0 | 1 | 1 | 32767 | 1 | NO + regression | public | serialtest2_f4_seq | smallint | 16 | 2 | 0 | 1 | 1 | 32767 | 1 | NO + regression | public | serialtest2_f5_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO + regression | public | serialtest2_f6_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO +(19 rows) + +SELECT schemaname, sequencename, start_value, min_value, max_value, increment_by, cycle, cache_size, last_value +FROM pg_sequences +WHERE sequencename ~ ANY(ARRAY['sequence_test', 'serialtest']) + ORDER BY sequencename ASC; + schemaname | sequencename | start_value | min_value | max_value | increment_by | cycle | cache_size | last_value +------------+--------------------+-------------+----------------------+---------------------+--------------+-------+------------+------------ + public | sequence_test10 | 1 | -20000 | 32767 | 1 | f | 1 | + public | sequence_test11 | 1 | 1 | 2147483647 | 1 | f | 1 | + public | sequence_test12 | -1 | -2147483648 | -1 | -1 | f | 1 | + public | sequence_test13 | -32768 | -2147483648 | 2147483647 | 1 | f | 1 | + public | sequence_test14 | 32767 | -2147483648 | 2147483647 | -1 | f | 1 | + public | sequence_test2 | 32 | 5 | 36 | 4 | t | 1 | 5 + public | sequence_test3 | 1 | 1 | 9223372036854775807 | 1 | f | 1 | + public | sequence_test4 | -1 | -9223372036854775808 | -1 | -1 | f | 1 | -1 + public | sequence_test5 | 1 | 1 | 32767 | 1 | f | 1 | + public | sequence_test6 | 1 | 1 | 32767 | 1 | f | 1 | + public | sequence_test7 | 1 | 1 | 9223372036854775807 | 1 | f | 1 | + public | sequence_test8 | 1 | 1 | 20000 | 1 | f | 1 | + public | sequence_test9 | -1 | -32768 | -1 | -1 | f | 1 | + public | serialtest1_f2_foo | 1 | 1 | 2147483647 | 1 | f | 1 | 3 + public | serialtest2_f2_seq | 1 | 1 | 2147483647 | 1 | f | 1 | 2 + public | serialtest2_f3_seq | 1 | 1 | 32767 | 1 | f | 1 | 2 + public | serialtest2_f4_seq | 1 | 1 | 32767 | 1 | f | 1 | 2 + public | serialtest2_f5_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2 + public | serialtest2_f6_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2 +(19 rows) + +SELECT * FROM pg_sequence_parameters('sequence_test4'::regclass); + start_value | minimum_value | maximum_value | increment | cycle_option | cache_size | data_type +-------------+----------------------+---------------+-----------+--------------+------------+----------- + -1 | -9223372036854775808 | -1 | -1 | f | 1 | 20 +(1 row) + +\d sequence_test4 + Sequence "public.sequence_test4" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +--------+-------+----------------------+---------+-----------+---------+------- + bigint | -1 | -9223372036854775808 | -1 | -1 | no | 1 + +\d serialtest2_f2_seq + Sequence "public.serialtest2_f2_seq" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +---------+-------+---------+------------+-----------+---------+------- + integer | 1 | 1 | 2147483647 | 1 | no | 1 +Owned by: public.serialtest2.f2 + +-- Test comments +COMMENT ON SEQUENCE asdf IS 'won''t work'; +ERROR: relation "asdf" does not exist +COMMENT ON SEQUENCE sequence_test2 IS 'will work'; +COMMENT ON SEQUENCE sequence_test2 IS NULL; +-- Test lastval() +CREATE SEQUENCE seq; +SELECT nextval('seq'); + nextval +--------- + 1 +(1 row) + +SELECT lastval(); + lastval +--------- + 1 +(1 row) + +SELECT setval('seq', 99); + setval +-------- + 99 +(1 row) + +SELECT lastval(); + lastval +--------- + 99 +(1 row) + +DISCARD SEQUENCES; +SELECT lastval(); +ERROR: lastval is not yet defined in this session +CREATE SEQUENCE seq2; +SELECT nextval('seq2'); + nextval +--------- + 1 +(1 row) + +SELECT lastval(); + lastval +--------- + 1 +(1 row) + +DROP SEQUENCE seq2; +-- should fail +SELECT lastval(); +ERROR: lastval is not yet defined in this session +-- Test sequences in read-only transactions +CREATE TEMPORARY SEQUENCE sequence_test_temp1; +START TRANSACTION READ ONLY; +SELECT nextval('sequence_test_temp1'); -- ok + nextval +--------- + 1 +(1 row) + +SELECT nextval('sequence_test2'); -- error +ERROR: cannot execute nextval() in a read-only transaction +ROLLBACK; +START TRANSACTION READ ONLY; +SELECT setval('sequence_test_temp1', 1); -- ok + setval +-------- + 1 +(1 row) + +SELECT setval('sequence_test2', 1); -- error +ERROR: cannot execute setval() in a read-only transaction +ROLLBACK; +-- privileges tests +CREATE USER regress_seq_user; +-- nextval +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT SELECT ON seq3 TO regress_seq_user; +SELECT nextval('seq3'); +ERROR: permission denied for sequence seq3 +ROLLBACK; +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT UPDATE ON seq3 TO regress_seq_user; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +ROLLBACK; +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT USAGE ON seq3 TO regress_seq_user; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +ROLLBACK; +-- currval +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT SELECT ON seq3 TO regress_seq_user; +SELECT currval('seq3'); + currval +--------- + 1 +(1 row) + +ROLLBACK; +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT UPDATE ON seq3 TO regress_seq_user; +SELECT currval('seq3'); +ERROR: permission denied for sequence seq3 +ROLLBACK; +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT USAGE ON seq3 TO regress_seq_user; +SELECT currval('seq3'); + currval +--------- + 1 +(1 row) + +ROLLBACK; +-- lastval +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT SELECT ON seq3 TO regress_seq_user; +SELECT lastval(); + lastval +--------- + 1 +(1 row) + +ROLLBACK; +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT UPDATE ON seq3 TO regress_seq_user; +SELECT lastval(); +ERROR: permission denied for sequence seq3 +ROLLBACK; +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +SELECT nextval('seq3'); + nextval +--------- + 1 +(1 row) + +REVOKE ALL ON seq3 FROM regress_seq_user; +GRANT USAGE ON seq3 TO regress_seq_user; +SELECT lastval(); + lastval +--------- + 1 +(1 row) + +ROLLBACK; +-- setval +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +CREATE SEQUENCE seq3; +REVOKE ALL ON seq3 FROM regress_seq_user; +SAVEPOINT save; +SELECT setval('seq3', 5); +ERROR: permission denied for sequence seq3 +ROLLBACK TO save; +GRANT UPDATE ON seq3 TO regress_seq_user; +SELECT setval('seq3', 5); + setval +-------- + 5 +(1 row) + +SELECT nextval('seq3'); + nextval +--------- + 6 +(1 row) + +ROLLBACK; +-- ALTER SEQUENCE +BEGIN; +SET LOCAL SESSION AUTHORIZATION regress_seq_user; +ALTER SEQUENCE sequence_test2 START WITH 1; +ERROR: must be owner of sequence sequence_test2 +ROLLBACK; +-- Sequences should get wiped out as well: +DROP TABLE serialTest1, serialTest2; +-- Make sure sequences are gone: +SELECT * FROM information_schema.sequences WHERE sequence_name IN + ('sequence_test2', 'serialtest2_f2_seq', 'serialtest2_f3_seq', + 'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq') + ORDER BY sequence_name ASC; + sequence_catalog | sequence_schema | sequence_name | data_type | numeric_precision | numeric_precision_radix | numeric_scale | start_value | minimum_value | maximum_value | increment | cycle_option +------------------+-----------------+----------------+-----------+-------------------+-------------------------+---------------+-------------+---------------+---------------+-----------+-------------- + regression | public | sequence_test2 | bigint | 64 | 2 | 0 | 32 | 5 | 36 | 4 | YES +(1 row) + +DROP USER regress_seq_user; +DROP SEQUENCE seq; +-- cache tests +CREATE SEQUENCE test_seq1 CACHE 10; +SELECT nextval('test_seq1'); + nextval +--------- + 1 +(1 row) + +SELECT nextval('test_seq1'); + nextval +--------- + 2 +(1 row) + +SELECT nextval('test_seq1'); + nextval +--------- + 3 +(1 row) + +DROP SEQUENCE test_seq1; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 2088857615a..4453fcaa517 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -120,9 +120,10 @@ select name, setting from pg_settings where name like 'enable%'; enable_partitionwise_aggregate | off enable_partitionwise_join | off enable_seqscan | on + enable_seqscan_prefetch | on enable_sort | on enable_tidscan | on -(20 rows) +(21 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/output/tablespace_1.source b/src/test/regress/output/tablespace_1.source new file mode 100644 index 00000000000..1c3b75cb6d1 --- /dev/null +++ b/src/test/regress/output/tablespace_1.source @@ -0,0 +1,941 @@ +-- create a tablespace using WITH clause +CREATE TABLESPACE regress_tblspacewith LOCATION '@testtablespace@' WITH (some_nonexistent_parameter = true); -- fail +ERROR: unrecognized parameter "some_nonexistent_parameter" +CREATE TABLESPACE regress_tblspacewith LOCATION '@testtablespace@' WITH (random_page_cost = 3.0); -- ok +-- check to see the parameter was used +SELECT spcoptions FROM pg_tablespace WHERE spcname = 'regress_tblspacewith'; + spcoptions +------------------------ + {random_page_cost=3.0} +(1 row) + +-- drop the tablespace so we can re-use the location +DROP TABLESPACE regress_tblspacewith; +-- create a tablespace we can use +CREATE TABLESPACE regress_tblspace LOCATION '@testtablespace@'; +-- try setting and resetting some properties for the new tablespace +ALTER TABLESPACE regress_tblspace SET (random_page_cost = 1.0, seq_page_cost = 1.1); +ALTER TABLESPACE regress_tblspace SET (some_nonexistent_parameter = true); -- fail +ERROR: unrecognized parameter "some_nonexistent_parameter" +ALTER TABLESPACE regress_tblspace RESET (random_page_cost = 2.0); -- fail +ERROR: RESET must not include values for parameters +ALTER TABLESPACE regress_tblspace RESET (random_page_cost, effective_io_concurrency); -- ok +-- REINDEX (TABLESPACE) +-- catalogs and system tablespaces +-- system catalog, fail +REINDEX (TABLESPACE regress_tblspace) TABLE pg_am; +ERROR: cannot move system relation "pg_am_name_index" +REINDEX (TABLESPACE regress_tblspace) TABLE CONCURRENTLY pg_am; +ERROR: cannot reindex system catalogs concurrently +-- shared catalog, fail +REINDEX (TABLESPACE regress_tblspace) TABLE pg_authid; +ERROR: cannot move system relation "pg_authid_rolname_index" +REINDEX (TABLESPACE regress_tblspace) TABLE CONCURRENTLY pg_authid; +ERROR: cannot reindex system catalogs concurrently +-- toast relations, fail +REINDEX (TABLESPACE regress_tblspace) INDEX pg_toast.pg_toast_1260_index; +ERROR: cannot move system relation "pg_toast_1260_index" +REINDEX (TABLESPACE regress_tblspace) INDEX CONCURRENTLY pg_toast.pg_toast_1260_index; +ERROR: cannot reindex system catalogs concurrently +REINDEX (TABLESPACE regress_tblspace) TABLE pg_toast.pg_toast_1260; +ERROR: cannot move system relation "pg_toast_1260_index" +REINDEX (TABLESPACE regress_tblspace) TABLE CONCURRENTLY pg_toast.pg_toast_1260; +ERROR: cannot reindex system catalogs concurrently +-- system catalog, fail +REINDEX (TABLESPACE pg_global) TABLE pg_authid; +ERROR: cannot move system relation "pg_authid_rolname_index" +REINDEX (TABLESPACE pg_global) TABLE CONCURRENTLY pg_authid; +ERROR: cannot reindex system catalogs concurrently +-- table with toast relation +CREATE TABLE regress_tblspace_test_tbl (num1 bigint, num2 double precision, t text); +INSERT INTO regress_tblspace_test_tbl (num1, num2, t) + SELECT round(random()*100), random(), 'text' + FROM generate_series(1, 10) s(i); +CREATE INDEX regress_tblspace_test_tbl_idx ON regress_tblspace_test_tbl (num1); +-- move to global tablespace, fail +REINDEX (TABLESPACE pg_global) INDEX regress_tblspace_test_tbl_idx; +ERROR: only shared relations can be placed in pg_global tablespace +REINDEX (TABLESPACE pg_global) INDEX CONCURRENTLY regress_tblspace_test_tbl_idx; +ERROR: cannot move non-shared relation to tablespace "pg_global" +-- check transactional behavior of REINDEX (TABLESPACE) +BEGIN; +REINDEX (TABLESPACE regress_tblspace) INDEX regress_tblspace_test_tbl_idx; +REINDEX (TABLESPACE regress_tblspace) TABLE regress_tblspace_test_tbl; +ROLLBACK; +-- no relation moved to the new tablespace +SELECT c.relname FROM pg_class c, pg_tablespace s + WHERE c.reltablespace = s.oid AND s.spcname = 'regress_tblspace'; + relname +--------- +(0 rows) + +-- check that all indexes are moved to a new tablespace with different +-- relfilenode. +-- Save first the existing relfilenode for the toast and main relations. +SELECT relfilenode as main_filenode FROM pg_class + WHERE relname = 'regress_tblspace_test_tbl_idx' \gset +SELECT relfilenode as toast_filenode FROM pg_class + WHERE oid = + (SELECT i.indexrelid + FROM pg_class c, + pg_index i + WHERE i.indrelid = c.reltoastrelid AND + c.relname = 'regress_tblspace_test_tbl') \gset +REINDEX (TABLESPACE regress_tblspace) TABLE regress_tblspace_test_tbl; +SELECT c.relname FROM pg_class c, pg_tablespace s + WHERE c.reltablespace = s.oid AND s.spcname = 'regress_tblspace' + ORDER BY c.relname; + relname +------------------------------- + regress_tblspace_test_tbl_idx +(1 row) + +ALTER TABLE regress_tblspace_test_tbl SET TABLESPACE regress_tblspace; +ALTER TABLE regress_tblspace_test_tbl SET TABLESPACE pg_default; +SELECT c.relname FROM pg_class c, pg_tablespace s + WHERE c.reltablespace = s.oid AND s.spcname = 'regress_tblspace' + ORDER BY c.relname; + relname +------------------------------- + regress_tblspace_test_tbl_idx +(1 row) + +-- Move back to the default tablespace. +ALTER INDEX regress_tblspace_test_tbl_idx SET TABLESPACE pg_default; +SELECT c.relname FROM pg_class c, pg_tablespace s + WHERE c.reltablespace = s.oid AND s.spcname = 'regress_tblspace' + ORDER BY c.relname; + relname +--------- +(0 rows) + +REINDEX (TABLESPACE regress_tblspace, CONCURRENTLY) TABLE regress_tblspace_test_tbl; +SELECT c.relname FROM pg_class c, pg_tablespace s + WHERE c.reltablespace = s.oid AND s.spcname = 'regress_tblspace' + ORDER BY c.relname; + relname +------------------------------- + regress_tblspace_test_tbl_idx +(1 row) + +SELECT relfilenode = :main_filenode AS main_same FROM pg_class + WHERE relname = 'regress_tblspace_test_tbl_idx'; + main_same +----------- + f +(1 row) + +SELECT relfilenode = :toast_filenode as toast_same FROM pg_class + WHERE oid = + (SELECT i.indexrelid + FROM pg_class c, + pg_index i + WHERE i.indrelid = c.reltoastrelid AND + c.relname = 'regress_tblspace_test_tbl'); + toast_same +------------ + f +(1 row) + +DROP TABLE regress_tblspace_test_tbl; +-- REINDEX (TABLESPACE) with partitions +-- Create a partition tree and check the set of relations reindexed +-- with their new tablespace. +CREATE TABLE tbspace_reindex_part (c1 int, c2 int) PARTITION BY RANGE (c1); +CREATE TABLE tbspace_reindex_part_0 PARTITION OF tbspace_reindex_part + FOR VALUES FROM (0) TO (10) PARTITION BY list (c2); +CREATE TABLE tbspace_reindex_part_0_1 PARTITION OF tbspace_reindex_part_0 + FOR VALUES IN (1); +CREATE TABLE tbspace_reindex_part_0_2 PARTITION OF tbspace_reindex_part_0 + FOR VALUES IN (2); +-- This partitioned table will have no partitions. +CREATE TABLE tbspace_reindex_part_10 PARTITION OF tbspace_reindex_part + FOR VALUES FROM (10) TO (20) PARTITION BY list (c2); +-- Create some partitioned indexes +CREATE INDEX tbspace_reindex_part_index ON ONLY tbspace_reindex_part (c1); +CREATE INDEX tbspace_reindex_part_index_0 ON ONLY tbspace_reindex_part_0 (c1); +ALTER INDEX tbspace_reindex_part_index ATTACH PARTITION tbspace_reindex_part_index_0; +-- This partitioned index will have no partitions. +CREATE INDEX tbspace_reindex_part_index_10 ON ONLY tbspace_reindex_part_10 (c1); +ALTER INDEX tbspace_reindex_part_index ATTACH PARTITION tbspace_reindex_part_index_10; +CREATE INDEX tbspace_reindex_part_index_0_1 ON ONLY tbspace_reindex_part_0_1 (c1); +ALTER INDEX tbspace_reindex_part_index_0 ATTACH PARTITION tbspace_reindex_part_index_0_1; +CREATE INDEX tbspace_reindex_part_index_0_2 ON ONLY tbspace_reindex_part_0_2 (c1); +ALTER INDEX tbspace_reindex_part_index_0 ATTACH PARTITION tbspace_reindex_part_index_0_2; +SELECT relid, parentrelid, level FROM pg_partition_tree('tbspace_reindex_part_index') + ORDER BY relid, level; + relid | parentrelid | level +--------------------------------+------------------------------+------- + tbspace_reindex_part_index | | 0 + tbspace_reindex_part_index_0 | tbspace_reindex_part_index | 1 + tbspace_reindex_part_index_10 | tbspace_reindex_part_index | 1 + tbspace_reindex_part_index_0_1 | tbspace_reindex_part_index_0 | 2 + tbspace_reindex_part_index_0_2 | tbspace_reindex_part_index_0 | 2 +(5 rows) + +-- Track the original tablespace, relfilenode and OID of each index +-- in the tree. +CREATE TEMP TABLE reindex_temp_before AS + SELECT oid, relname, relfilenode, reltablespace + FROM pg_class + WHERE relname ~ 'tbspace_reindex_part_index'; +REINDEX (TABLESPACE regress_tblspace, CONCURRENTLY) TABLE tbspace_reindex_part; +-- REINDEX CONCURRENTLY changes the OID of the old relation, hence a check +-- based on the relation name below. +SELECT b.relname, + CASE WHEN a.relfilenode = b.relfilenode THEN 'relfilenode is unchanged' + ELSE 'relfilenode has changed' END AS filenode, + CASE WHEN a.reltablespace = b.reltablespace THEN 'reltablespace is unchanged' + ELSE 'reltablespace has changed' END AS tbspace + FROM reindex_temp_before b JOIN pg_class a ON b.relname = a.relname + ORDER BY 1; + relname | filenode | tbspace +--------------------------------+--------------------------+---------------------------- + tbspace_reindex_part_index | relfilenode is unchanged | reltablespace is unchanged + tbspace_reindex_part_index_0 | relfilenode is unchanged | reltablespace is unchanged + tbspace_reindex_part_index_0_1 | relfilenode has changed | reltablespace has changed + tbspace_reindex_part_index_0_2 | relfilenode has changed | reltablespace has changed + tbspace_reindex_part_index_10 | relfilenode is unchanged | reltablespace is unchanged +(5 rows) + +DROP TABLE tbspace_reindex_part; +-- create a schema we can use +CREATE SCHEMA testschema; +-- try a table +CREATE TABLE testschema.foo (i int) TABLESPACE regress_tblspace; +SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c + where c.reltablespace = t.oid AND c.relname = 'foo'; + relname | spcname +---------+------------------ + foo | regress_tblspace +(1 row) + +INSERT INTO testschema.foo VALUES(1); +INSERT INTO testschema.foo VALUES(2); +-- tables from dynamic sources +CREATE TABLE testschema.asselect TABLESPACE regress_tblspace AS SELECT 1; +SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c + where c.reltablespace = t.oid AND c.relname = 'asselect'; + relname | spcname +----------+------------------ + asselect | regress_tblspace +(1 row) + +PREPARE selectsource(int) AS SELECT $1; +CREATE TABLE testschema.asexecute TABLESPACE regress_tblspace + AS EXECUTE selectsource(2); +SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c + where c.reltablespace = t.oid AND c.relname = 'asexecute'; + relname | spcname +-----------+------------------ + asexecute | regress_tblspace +(1 row) + +-- index +CREATE INDEX foo_idx on testschema.foo(i) TABLESPACE regress_tblspace; +SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c + where c.reltablespace = t.oid AND c.relname = 'foo_idx'; + relname | spcname +---------+------------------ + foo_idx | regress_tblspace +(1 row) + +-- check \d output +\d testschema.foo + Table "testschema.foo" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + i | integer | | | +Indexes: + "foo_idx" btree (i), tablespace "regress_tblspace" +Tablespace: "regress_tblspace" + +\d testschema.foo_idx + Index "testschema.foo_idx" + Column | Type | Key? | Definition +--------+---------+------+------------ + i | integer | yes | i +btree, for table "testschema.foo" +Tablespace: "regress_tblspace" + +-- +-- partitioned table +-- +CREATE TABLE testschema.part (a int) PARTITION BY LIST (a); +SET default_tablespace TO pg_global; +CREATE TABLE testschema.part_1 PARTITION OF testschema.part FOR VALUES IN (1); +ERROR: only shared relations can be placed in pg_global tablespace +RESET default_tablespace; +CREATE TABLE testschema.part_1 PARTITION OF testschema.part FOR VALUES IN (1); +SET default_tablespace TO regress_tblspace; +CREATE TABLE testschema.part_2 PARTITION OF testschema.part FOR VALUES IN (2); +SET default_tablespace TO pg_global; +CREATE TABLE testschema.part_3 PARTITION OF testschema.part FOR VALUES IN (3); +ERROR: only shared relations can be placed in pg_global tablespace +ALTER TABLE testschema.part SET TABLESPACE regress_tblspace; +CREATE TABLE testschema.part_3 PARTITION OF testschema.part FOR VALUES IN (3); +CREATE TABLE testschema.part_4 PARTITION OF testschema.part FOR VALUES IN (4) + TABLESPACE pg_default; +CREATE TABLE testschema.part_56 PARTITION OF testschema.part FOR VALUES IN (5, 6) + PARTITION BY LIST (a); +ALTER TABLE testschema.part SET TABLESPACE pg_default; +CREATE TABLE testschema.part_78 PARTITION OF testschema.part FOR VALUES IN (7, 8) + PARTITION BY LIST (a); +ERROR: only shared relations can be placed in pg_global tablespace +CREATE TABLE testschema.part_910 PARTITION OF testschema.part FOR VALUES IN (9, 10) + PARTITION BY LIST (a) TABLESPACE regress_tblspace; +RESET default_tablespace; +CREATE TABLE testschema.part_78 PARTITION OF testschema.part FOR VALUES IN (7, 8) + PARTITION BY LIST (a); +SELECT relname, spcname FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON (c.relnamespace = n.oid) + LEFT JOIN pg_catalog.pg_tablespace t ON c.reltablespace = t.oid + where c.relname LIKE 'part%' AND n.nspname = 'testschema' order by relname; + relname | spcname +----------+------------------ + part | + part_1 | + part_2 | regress_tblspace + part_3 | regress_tblspace + part_4 | + part_56 | regress_tblspace + part_78 | + part_910 | regress_tblspace +(8 rows) + +RESET default_tablespace; +DROP TABLE testschema.part; +-- partitioned index +CREATE TABLE testschema.part (a int) PARTITION BY LIST (a); +CREATE TABLE testschema.part1 PARTITION OF testschema.part FOR VALUES IN (1); +CREATE INDEX part_a_idx ON testschema.part (a) TABLESPACE regress_tblspace; +CREATE TABLE testschema.part2 PARTITION OF testschema.part FOR VALUES IN (2); +SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c + where c.reltablespace = t.oid AND c.relname LIKE 'part%_idx'; + relname | spcname +-------------+------------------ + part1_a_idx | regress_tblspace + part2_a_idx | regress_tblspace + part_a_idx | regress_tblspace +(3 rows) + +\d testschema.part + Partitioned table "testschema.part" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: LIST (a) +Indexes: + "part_a_idx" btree (a), tablespace "regress_tblspace" +Number of partitions: 2 (Use \d+ to list them.) + +\d+ testschema.part + Partitioned table "testschema.part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | +Partition key: LIST (a) +Indexes: + "part_a_idx" btree (a), tablespace "regress_tblspace" +Partitions: testschema.part1 FOR VALUES IN (1), + testschema.part2 FOR VALUES IN (2) + +\d testschema.part1 + Table "testschema.part1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: testschema.part FOR VALUES IN (1) +Indexes: + "part1_a_idx" btree (a), tablespace "regress_tblspace" + +\d+ testschema.part1 + Table "testschema.part1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | +Partition of: testschema.part FOR VALUES IN (1) +Partition constraint: ((a IS NOT NULL) AND (a = 1)) +Indexes: + "part1_a_idx" btree (a), tablespace "regress_tblspace" + +\d testschema.part_a_idx +Partitioned index "testschema.part_a_idx" + Column | Type | Key? | Definition +--------+---------+------+------------ + a | integer | yes | a +btree, for table "testschema.part" +Number of partitions: 2 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +\d+ testschema.part_a_idx + Partitioned index "testschema.part_a_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + a | integer | yes | a | plain | +btree, for table "testschema.part" +Partitions: testschema.part1_a_idx, + testschema.part2_a_idx +Tablespace: "regress_tblspace" + +-- partitioned rels cannot specify the default tablespace. These fail: +CREATE TABLE testschema.dflt (a int PRIMARY KEY) PARTITION BY LIST (a) TABLESPACE pg_default; +ERROR: cannot specify default tablespace for partitioned relations +CREATE TABLE testschema.dflt (a int PRIMARY KEY USING INDEX TABLESPACE pg_default) PARTITION BY LIST (a); +ERROR: cannot specify default tablespace for partitioned relations +SET default_tablespace TO 'pg_default'; +CREATE TABLE testschema.dflt (a int PRIMARY KEY) PARTITION BY LIST (a) TABLESPACE regress_tblspace; +ERROR: cannot specify default tablespace for partitioned relations +CREATE TABLE testschema.dflt (a int PRIMARY KEY USING INDEX TABLESPACE regress_tblspace) PARTITION BY LIST (a); +ERROR: cannot specify default tablespace for partitioned relations +-- but these work: +CREATE TABLE testschema.dflt (a int PRIMARY KEY USING INDEX TABLESPACE regress_tblspace) PARTITION BY LIST (a) TABLESPACE regress_tblspace; +SET default_tablespace TO ''; +CREATE TABLE testschema.dflt2 (a int PRIMARY KEY) PARTITION BY LIST (a); +DROP TABLE testschema.dflt, testschema.dflt2; +-- check that default_tablespace doesn't affect ALTER TABLE index rebuilds +CREATE TABLE testschema.test_default_tab(id bigint) TABLESPACE regress_tblspace; +INSERT INTO testschema.test_default_tab VALUES (1); +CREATE INDEX test_index1 on testschema.test_default_tab (id); +CREATE INDEX test_index2 on testschema.test_default_tab (id) TABLESPACE regress_tblspace; +ALTER TABLE testschema.test_default_tab ADD CONSTRAINT test_index3 PRIMARY KEY (id); +ALTER TABLE testschema.test_default_tab ADD CONSTRAINT test_index4 UNIQUE (id) USING INDEX TABLESPACE regress_tblspace; +\d testschema.test_index1 + Index "testschema.test_index1" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +btree, for table "testschema.test_default_tab" + +\d testschema.test_index2 + Index "testschema.test_index2" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_index3 + Index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab" + +\d testschema.test_index4 + Index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +-- use a custom tablespace for default_tablespace +SET default_tablespace TO regress_tblspace; +-- tablespace should not change if no rewrite +ALTER TABLE testschema.test_default_tab ALTER id TYPE bigint; +\d testschema.test_index1 + Index "testschema.test_index1" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +btree, for table "testschema.test_default_tab" + +\d testschema.test_index2 + Index "testschema.test_index2" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_index3 + Index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab" + +\d testschema.test_index4 + Index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +SELECT * FROM testschema.test_default_tab; + id +---- + 1 +(1 row) + +-- tablespace should not change even if there is an index rewrite +ALTER TABLE testschema.test_default_tab ALTER id TYPE int; +\d testschema.test_index1 + Index "testschema.test_index1" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +btree, for table "testschema.test_default_tab" + +\d testschema.test_index2 + Index "testschema.test_index2" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_index3 + Index "testschema.test_index3" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +primary key, btree, for table "testschema.test_default_tab" + +\d testschema.test_index4 + Index "testschema.test_index4" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +unique, btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +SELECT * FROM testschema.test_default_tab; + id +---- + 1 +(1 row) + +-- now use the default tablespace for default_tablespace +SET default_tablespace TO ''; +-- tablespace should not change if no rewrite +ALTER TABLE testschema.test_default_tab ALTER id TYPE int; +\d testschema.test_index1 + Index "testschema.test_index1" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +btree, for table "testschema.test_default_tab" + +\d testschema.test_index2 + Index "testschema.test_index2" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_index3 + Index "testschema.test_index3" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +primary key, btree, for table "testschema.test_default_tab" + +\d testschema.test_index4 + Index "testschema.test_index4" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +unique, btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +-- tablespace should not change even if there is an index rewrite +ALTER TABLE testschema.test_default_tab ALTER id TYPE bigint; +\d testschema.test_index1 + Index "testschema.test_index1" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +btree, for table "testschema.test_default_tab" + +\d testschema.test_index2 + Index "testschema.test_index2" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_index3 + Index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab" + +\d testschema.test_index4 + Index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab" +Tablespace: "regress_tblspace" + +DROP TABLE testschema.test_default_tab; +-- check that default_tablespace doesn't affect ALTER TABLE index rebuilds +-- (this time with a partitioned table) +CREATE TABLE testschema.test_default_tab_p(id bigint, val bigint) + PARTITION BY LIST (id) TABLESPACE regress_tblspace; +CREATE TABLE testschema.test_default_tab_p1 PARTITION OF testschema.test_default_tab_p + FOR VALUES IN (1); +INSERT INTO testschema.test_default_tab_p VALUES (1); +CREATE INDEX test_index1 on testschema.test_default_tab_p (val); +CREATE INDEX test_index2 on testschema.test_default_tab_p (val) TABLESPACE regress_tblspace; +ALTER TABLE testschema.test_default_tab_p ADD CONSTRAINT test_index3 PRIMARY KEY (id); +ALTER TABLE testschema.test_default_tab_p ADD CONSTRAINT test_index4 UNIQUE (id) USING INDEX TABLESPACE regress_tblspace; +\d testschema.test_index1 +Partitioned index "testschema.test_index1" + Column | Type | Key? | Definition +--------+--------+------+------------ + val | bigint | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index2 +Partitioned index "testschema.test_index2" + Column | Type | Key? | Definition +--------+--------+------+------------ + val | bigint | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +\d testschema.test_index3 +Partitioned index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index4 +Partitioned index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +-- use a custom tablespace for default_tablespace +SET default_tablespace TO regress_tblspace; +-- tablespace should not change if no rewrite +ALTER TABLE testschema.test_default_tab_p ALTER val TYPE bigint; +\d testschema.test_index1 +Partitioned index "testschema.test_index1" + Column | Type | Key? | Definition +--------+--------+------+------------ + val | bigint | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index2 +Partitioned index "testschema.test_index2" + Column | Type | Key? | Definition +--------+--------+------+------------ + val | bigint | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +\d testschema.test_index3 +Partitioned index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index4 +Partitioned index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +SELECT * FROM testschema.test_default_tab_p; + id | val +----+----- + 1 | +(1 row) + +-- tablespace should not change even if there is an index rewrite +ALTER TABLE testschema.test_default_tab_p ALTER val TYPE int; +\d testschema.test_index1 +Partitioned index "testschema.test_index1" + Column | Type | Key? | Definition +--------+---------+------+------------ + val | integer | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index2 +Partitioned index "testschema.test_index2" + Column | Type | Key? | Definition +--------+---------+------+------------ + val | integer | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +\d testschema.test_index3 +Partitioned index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index4 +Partitioned index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +SELECT * FROM testschema.test_default_tab_p; + id | val +----+----- + 1 | +(1 row) + +-- now use the default tablespace for default_tablespace +SET default_tablespace TO ''; +-- tablespace should not change if no rewrite +ALTER TABLE testschema.test_default_tab_p ALTER val TYPE int; +\d testschema.test_index1 +Partitioned index "testschema.test_index1" + Column | Type | Key? | Definition +--------+---------+------+------------ + val | integer | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index2 +Partitioned index "testschema.test_index2" + Column | Type | Key? | Definition +--------+---------+------+------------ + val | integer | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +\d testschema.test_index3 +Partitioned index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index4 +Partitioned index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +-- tablespace should not change even if there is an index rewrite +ALTER TABLE testschema.test_default_tab_p ALTER val TYPE bigint; +\d testschema.test_index1 +Partitioned index "testschema.test_index1" + Column | Type | Key? | Definition +--------+--------+------+------------ + val | bigint | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index2 +Partitioned index "testschema.test_index2" + Column | Type | Key? | Definition +--------+--------+------+------------ + val | bigint | yes | val +btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +\d testschema.test_index3 +Partitioned index "testschema.test_index3" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +primary key, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) + +\d testschema.test_index4 +Partitioned index "testschema.test_index4" + Column | Type | Key? | Definition +--------+--------+------+------------ + id | bigint | yes | id +unique, btree, for table "testschema.test_default_tab_p" +Number of partitions: 1 (Use \d+ to list them.) +Tablespace: "regress_tblspace" + +DROP TABLE testschema.test_default_tab_p; +-- check that default_tablespace affects index additions in ALTER TABLE +CREATE TABLE testschema.test_tab(id int) TABLESPACE regress_tblspace; +INSERT INTO testschema.test_tab VALUES (1); +SET default_tablespace TO regress_tblspace; +ALTER TABLE testschema.test_tab ADD CONSTRAINT test_tab_unique UNIQUE (id); +SET default_tablespace TO ''; +ALTER TABLE testschema.test_tab ADD CONSTRAINT test_tab_pkey PRIMARY KEY (id); +\d testschema.test_tab_unique + Index "testschema.test_tab_unique" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +unique, btree, for table "testschema.test_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_tab_pkey + Index "testschema.test_tab_pkey" + Column | Type | Key? | Definition +--------+---------+------+------------ + id | integer | yes | id +primary key, btree, for table "testschema.test_tab" + +SELECT * FROM testschema.test_tab; + id +---- + 1 +(1 row) + +DROP TABLE testschema.test_tab; +-- check that default_tablespace is handled correctly by multi-command +-- ALTER TABLE that includes a tablespace-preserving rewrite +CREATE TABLE testschema.test_tab(a int, b int, c int); +SET default_tablespace TO regress_tblspace; +ALTER TABLE testschema.test_tab ADD CONSTRAINT test_tab_unique UNIQUE (a); +CREATE INDEX test_tab_a_idx ON testschema.test_tab (a); +SET default_tablespace TO ''; +CREATE INDEX test_tab_b_idx ON testschema.test_tab (b); +\d testschema.test_tab_unique + Index "testschema.test_tab_unique" + Column | Type | Key? | Definition +--------+---------+------+------------ + a | integer | yes | a +unique, btree, for table "testschema.test_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_tab_a_idx + Index "testschema.test_tab_a_idx" + Column | Type | Key? | Definition +--------+---------+------+------------ + a | integer | yes | a +btree, for table "testschema.test_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_tab_b_idx + Index "testschema.test_tab_b_idx" + Column | Type | Key? | Definition +--------+---------+------+------------ + b | integer | yes | b +btree, for table "testschema.test_tab" + +ALTER TABLE testschema.test_tab ALTER b TYPE bigint, ADD UNIQUE (c); +\d testschema.test_tab_unique + Index "testschema.test_tab_unique" + Column | Type | Key? | Definition +--------+---------+------+------------ + a | integer | yes | a +unique, btree, for table "testschema.test_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_tab_a_idx + Index "testschema.test_tab_a_idx" + Column | Type | Key? | Definition +--------+---------+------+------------ + a | integer | yes | a +btree, for table "testschema.test_tab" +Tablespace: "regress_tblspace" + +\d testschema.test_tab_b_idx + Index "testschema.test_tab_b_idx" + Column | Type | Key? | Definition +--------+--------+------+------------ + b | bigint | yes | b +btree, for table "testschema.test_tab" + +DROP TABLE testschema.test_tab; +-- let's try moving a table from one place to another +CREATE TABLE testschema.atable AS VALUES (1), (2); +CREATE UNIQUE INDEX anindex ON testschema.atable(column1); +ALTER TABLE testschema.atable SET TABLESPACE regress_tblspace; +ALTER INDEX testschema.anindex SET TABLESPACE regress_tblspace; +ALTER INDEX testschema.part_a_idx SET TABLESPACE pg_global; +ERROR: only shared relations can be placed in pg_global tablespace +ALTER INDEX testschema.part_a_idx SET TABLESPACE pg_default; +ALTER INDEX testschema.part_a_idx SET TABLESPACE regress_tblspace; +INSERT INTO testschema.atable VALUES(3); -- ok +INSERT INTO testschema.atable VALUES(1); -- fail (checks index) +ERROR: duplicate key value violates unique constraint "anindex" +DETAIL: Key (column1)=(1) already exists. +SELECT COUNT(*) FROM testschema.atable; -- checks heap + count +------- + 3 +(1 row) + +-- Will fail with bad path +CREATE TABLESPACE regress_badspace LOCATION '/no/such/location'; +ERROR: directory "/no/such/location" does not exist +-- No such tablespace +CREATE TABLE bar (i int) TABLESPACE regress_nosuchspace; +ERROR: tablespace "regress_nosuchspace" does not exist +-- Fail, in use for some partitioned object +DROP TABLESPACE regress_tblspace; +ERROR: tablespace "regress_tblspace" cannot be dropped because some objects depend on it +DETAIL: tablespace for index testschema.part_a_idx +ALTER INDEX testschema.part_a_idx SET TABLESPACE pg_default; +-- Fail, not empty +DROP TABLESPACE regress_tblspace; +CREATE ROLE regress_tablespace_user1 login; +CREATE ROLE regress_tablespace_user2 login; +GRANT USAGE ON SCHEMA testschema TO regress_tablespace_user2; +ALTER TABLESPACE regress_tblspace OWNER TO regress_tablespace_user1; +ERROR: tablespace "regress_tblspace" does not exist +CREATE TABLE testschema.tablespace_acl (c int); +-- new owner lacks permission to create this index from scratch +CREATE INDEX k ON testschema.tablespace_acl (c) TABLESPACE regress_tblspace; +ERROR: tablespace "regress_tblspace" does not exist +ALTER TABLE testschema.tablespace_acl OWNER TO regress_tablespace_user2; +SET SESSION ROLE regress_tablespace_user2; +CREATE TABLE tablespace_table (i int) TABLESPACE regress_tblspace; -- fail +ERROR: tablespace "regress_tblspace" does not exist +ALTER TABLE testschema.tablespace_acl ALTER c TYPE bigint; +REINDEX (TABLESPACE regress_tblspace) TABLE tablespace_table; -- fail +ERROR: tablespace "regress_tblspace" does not exist +REINDEX (TABLESPACE regress_tblspace, CONCURRENTLY) TABLE tablespace_table; -- fail +ERROR: tablespace "regress_tblspace" does not exist +RESET ROLE; +ALTER TABLESPACE regress_tblspace RENAME TO regress_tblspace_renamed; +ERROR: tablespace "regress_tblspace" does not exist +ALTER TABLE ALL IN TABLESPACE regress_tblspace_renamed SET TABLESPACE pg_default; +ERROR: tablespace "regress_tblspace_renamed" does not exist +ALTER INDEX ALL IN TABLESPACE regress_tblspace_renamed SET TABLESPACE pg_default; +ERROR: tablespace "regress_tblspace_renamed" does not exist +-- Should show notice that nothing was done +ALTER TABLE ALL IN TABLESPACE regress_tblspace_renamed SET TABLESPACE pg_default; +ERROR: tablespace "regress_tblspace_renamed" does not exist +-- Should succeed +DROP TABLESPACE regress_tblspace_renamed; +ERROR: tablespace "regress_tblspace_renamed" does not exist +DROP SCHEMA testschema CASCADE; +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table testschema.foo +drop cascades to table testschema.asselect +drop cascades to table testschema.asexecute +drop cascades to table testschema.part +drop cascades to table testschema.atable +drop cascades to table testschema.tablespace_acl +DROP ROLE regress_tablespace_user1; +DROP ROLE regress_tablespace_user2; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d71d1adbecd..760f3842121 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2159,6 +2159,8 @@ RelMapFile RelMapping RelOptInfo RelOptKind +RelSizeEntry +RelTag RelToCheck RelToCluster RelabelType @@ -2846,6 +2848,8 @@ WaitEventTimeout WaitPMResult WalCloseMethod WalLevel +Safekeeper +WalMessage WalRcvData WalRcvExecResult WalRcvExecStatus @@ -2949,6 +2953,17 @@ XmlTableBuilderData YYLTYPE YYSTYPE YY_BUFFER_STATE +ZenithErrorResponse +ZenithExistsRequest +ZenithExistsResponse +ZenithGetPageRequest +ZenithGetPageResponse +ZenithMessage +ZenithMessageTag +ZenithNblocksRequest +ZenithNblocksResponse +ZenithRequest +ZenithResponse _SPI_connection _SPI_plan __AssignProcessToJobObject