From 8987ef3e80279c12992d1c6c6b89f48d04e1d11d Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 29 Aug 2022 23:25:09 +0300 Subject: [PATCH] Merge last written cache lsn with new main branch --- src/backend/access/gin/gininsert.c | 3 +- src/backend/access/gist/gistbuild.c | 10 +- src/backend/access/spgist/spginsert.c | 4 +- src/backend/access/transam/xlog.c | 199 +++++++++++++++++++++-- src/backend/commands/dbcommands.c | 5 +- src/backend/storage/lmgr/lwlocknames.txt | 1 + src/backend/utils/misc/guc.c | 10 ++ src/include/access/xlog.h | 13 +- 8 files changed, 217 insertions(+), 28 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index dfad28d1f61..77af193cfbb 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) log_newpage_range(index, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index), true); + SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } - SetLastWrittenPageLSN(XactLastRecEnd); smgr_end_unlogged_build(index->rd_smgr); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 8fb778012d1..cef605175ff 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -335,9 +335,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) log_newpage_range(index, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index), true); + SetLastWrittenLSNForBlockRange(XactLastRecEnd, + index->rd_smgr->smgr_rnode.node, + MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } - SetLastWrittenPageLSN(XactLastRecEnd); - smgr_end_unlogged_build(index->rd_smgr); } @@ -469,7 +471,9 @@ gist_indexsortbuild(GISTBuildState *state) lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO, pagestate->page, true); - SetLastWrittenPageLSN(lsn); + SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node, + MAIN_FORKNUM, GIST_ROOT_BLKNO); + SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } pfree(pagestate->page); diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index d85dd54e4df..e83595c80bb 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) log_newpage_range(index, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index), true); + SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, + MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM); } - SetLastWrittenPageLSN(XactLastRecEnd); smgr_end_unlogged_build(index->rd_smgr); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9e0c77bedf8..88ea165cdca 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -61,6 +61,7 @@ #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" +#include "storage/buf_internals.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/large_object.h" @@ -113,6 +114,7 @@ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; bool track_wal_io_timing = false; uint64 predefined_sysidentifier; +int lastWrittenLsnCacheSize; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -182,6 +184,26 @@ const struct config_enum_entry recovery_target_action_options[] = { {NULL, 0, false} }; + +typedef struct LastWrittenLsnCacheEntry +{ + BufferTag key; + XLogRecPtr lsn; + /* double linked list for LRU replacement algorithm */ + dlist_node lru_node; +} LastWrittenLsnCacheEntry; + + +/* + * Cache of last written LSN for each relation chunk (hash bucket). + * Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last + * relation metadata update. + * Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"), + * pages are replaced using LRU algorithm, based on L2-list. + * Access to this cache is protected by 'LastWrittenLsnLock'. + */ +static HTAB *lastWrittenLsnCache; + /* * Statistics for current checkpoint are collected in this global struct. * Because only the checkpointer or a stand-alone backend can perform @@ -749,7 +771,17 @@ typedef struct XLogCtlData * XLOG_FPW_CHANGE record that instructs full_page_writes is disabled. */ XLogRecPtr lastFpwDisableRecPtr; - XLogRecPtr lastWrittenPageLSN; + + /* + * Maximal last written LSN for pages not present in lastWrittenLsnCache + */ + XLogRecPtr maxLastWrittenLsn; + + /* + * Double linked list to implement LRU replacement policy for last written LSN cache. + * Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'. + */ + dlist_head lastWrittenLsnLRU; /* neon: copy of startup's RedoStartLSN for walproposer's use */ XLogRecPtr RedoStartLSN; @@ -772,6 +804,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL; */ static ControlFileData *ControlFile = NULL; +#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */ + /* * Calculate the amount of space left on the page after 'endptr'. Beware * multiple evaluation! @@ -5135,11 +5169,8 @@ LocalProcessControlFile(bool reset) ReadControlFile(); } -/* - * Initialization of shared memory for XLOG - */ -Size -XLOGShmemSize(void) +static Size +XLOGCtlShmemSize(void) { Size size; @@ -5179,6 +5210,16 @@ XLOGShmemSize(void) return size; } +/* + * Initialization of shared memory for XLOG + */ +Size +XLOGShmemSize(void) +{ + return XLOGCtlShmemSize() + + hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry)); +} + void XLOGShmemInit(void) { @@ -5208,6 +5249,15 @@ XLOGShmemInit(void) XLogCtl = (XLogCtlData *) ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog); + { + static HASHCTL info; + info.keysize = sizeof(BufferTag); + info.entrysize = sizeof(LastWrittenLsnCacheEntry); + lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache", + lastWrittenLsnCacheSize, lastWrittenLsnCacheSize, + &info, + HASH_ELEM | HASH_BLOBS); + } localControlFile = ControlFile; ControlFile = (ControlFileData *) ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile); @@ -8098,7 +8148,8 @@ StartupXLOG(void) XLogCtl->LogwrtRqst.Write = EndOfLog; XLogCtl->LogwrtRqst.Flush = EndOfLog; - XLogCtl->lastWrittenPageLSN = EndOfLog; + XLogCtl->maxLastWrittenLsn = EndOfLog; + dlist_init(&XLogCtl->lastWrittenLsnLRU); LocalSetXLogInsertAllowed(); @@ -8870,29 +8921,141 @@ GetInsertRecPtr(void) } /* - * GetLastWrittenPageLSN -- Returns maximal LSN of written page + * GetLastWrittenLSN -- Returns maximal LSN of written page. + * It returns an upper bound for the last written LSN of a given page, + * either from a cached last written LSN or a global maximum last written LSN. + * If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn. + * If cache is large enough ,iterting through all hash items may be rather expensive. + * But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical. */ XLogRecPtr -GetLastWrittenPageLSN(void) +GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno) { XLogRecPtr lsn; - SpinLockAcquire(&XLogCtl->info_lck); - lsn = XLogCtl->lastWrittenPageLSN; - SpinLockRelease(&XLogCtl->info_lck); + LastWrittenLsnCacheEntry* entry; + + LWLockAcquire(LastWrittenLsnLock, LW_SHARED); + + /* Maximal last written LSN among all non-cached pages */ + lsn = XLogCtl->maxLastWrittenLsn; + + if (rnode.relNode != InvalidOid) + { + BufferTag key; + key.rnode = rnode; + key.forkNum = forknum; + key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET; + entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL); + if (entry != NULL) + lsn = entry->lsn; + } + else + { + HASH_SEQ_STATUS seq; + /* Find maximum of all cached LSNs */ + hash_seq_init(&seq, lastWrittenLsnCache); + while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL) + { + if (entry->lsn > lsn) + lsn = entry->lsn; + } + } + LWLockRelease(LastWrittenLsnLock); return lsn; } /* - * SetLastWrittenPageLSN -- Set maximal LSN of written page + * SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range. + * We maintain cache of last written LSNs with limited size and LRU replacement + * policy. To reduce cache size we store max LSN not for each page, but for + * bucket (1024 blocks). This cache allows to use old LSN when + * requesting pages of unchanged or appended relations. + * + * rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated. + * SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions. */ void -SetLastWrittenPageLSN(XLogRecPtr lsn) +SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till) { - SpinLockAcquire(&XLogCtl->info_lck); - if (lsn > XLogCtl->lastWrittenPageLSN) - XLogCtl->lastWrittenPageLSN = lsn; - SpinLockRelease(&XLogCtl->info_lck); + if (lsn == InvalidXLogRecPtr) + return; + + LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE); + if (rnode.relNode == InvalidOid) + { + if (lsn > XLogCtl->maxLastWrittenLsn) + XLogCtl->maxLastWrittenLsn = lsn; + } + else + { + LastWrittenLsnCacheEntry* entry; + BufferTag key; + bool found; + BlockNumber bucket; + + key.rnode = rnode; + key.forkNum = forknum; + for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET; + bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET; + bucket++) + { + key.blockNum = bucket; + entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found); + if (found) + { + if (lsn > entry->lsn) + entry->lsn = lsn; + /* Unlink from LRU list */ + dlist_delete(&entry->lru_node); + } + else + { + entry->lsn = lsn; + if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize) + { + /* Replace least recently used entry */ + LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU)); + /* Adjust max LSN for not cached relations/chunks if needed */ + if (victim->lsn > XLogCtl->maxLastWrittenLsn) + XLogCtl->maxLastWrittenLsn = victim->lsn; + + hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL); + } + } + /* Link to the end of LRU list */ + dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node); + } + } + LWLockRelease(LastWrittenLsnLock); +} + +/* + * SetLastWrittenLSNForBlock -- Set maximal LSN for block + */ +void +SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno) +{ + SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno); +} + +/* + * SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata + */ +void +SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum) +{ + SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO); +} + +/* + * SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database + */ +void +SetLastWrittenLSNForDatabase(XLogRecPtr lsn) +{ + RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid}; + SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0); } /* diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 090bcf817d8..680bed295fd 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -675,7 +675,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); - SetLastWrittenPageLSN(lsn); + SetLastWrittenLSNForDatabase(lsn); } } table_endscan(scan); @@ -2293,8 +2293,7 @@ dbase_redo(XLogReaderState *record) */ { XLogRecPtr lsn = record->EndRecPtr; - - SetLastWrittenPageLSN(lsn); + SetLastWrittenLSNForDatabase(lsn); } } else if (info == XLOG_DBASE_DROP) diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index 6c7cf6c2956..b4652c33ff6 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -53,3 +53,4 @@ XactTruncationLock 44 # 45 was XactTruncationLock until removal of BackendRandomLock WrapLimitsVacuumLock 46 NotifyQueueTailLock 47 +LastWrittenLsnLock 48 diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index d19c90bd155..54601e60d3b 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2357,6 +2357,16 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"lsn_cache_size", PGC_POSTMASTER, UNGROUPED, + gettext_noop("Size of las written LSN cache used by Neon."), + NULL + }, + &lastWrittenLsnCacheSize, + 1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */ + NULL, NULL, NULL + }, + { {"temp_buffers", PGC_USERSET, RESOURCES_MEM, gettext_noop("Sets the maximum number of temporary buffers used by each session."), diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 66fe9dfcd9e..182f6c6449f 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -31,6 +31,11 @@ extern int sync_method; extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */ +/* + * Pseudo block number used to associate LSN with relation metadata (relation size) + */ +#define REL_METADATA_PSEUDO_BLOCKNO InvalidBlockNumber + /* * Prior to 8.4, all activity during recovery was carried out by the startup * process. This local variable continues to be used in many parts of the @@ -132,6 +137,7 @@ extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; extern bool track_wal_io_timing; +extern int lastWrittenLsnCacheSize; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; @@ -351,8 +357,11 @@ extern XLogRecPtr GetFlushRecPtr(void); extern XLogRecPtr GetLastImportantRecPtr(void); extern void RemovePromoteSignalFiles(void); -extern void SetLastWrittenPageLSN(XLogRecPtr lsn); -extern XLogRecPtr GetLastWrittenPageLSN(void); +extern void SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode relfilenode, ForkNumber forknum, BlockNumber blkno); +extern void SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode relfilenode, ForkNumber forknum, BlockNumber from, BlockNumber till); +extern void SetLastWrittenLSNForDatabase(XLogRecPtr lsn); +extern void SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode relfilenode, ForkNumber forknum); +extern XLogRecPtr GetLastWrittenLSN(RelFileNode relfilenode, ForkNumber forknum, BlockNumber blkno); extern XLogRecPtr GetRedoStartLsn(void);