Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge last written cache lsn with new main branch #201

Merged
merged 1 commit into from
Sep 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/backend/access/gin/gininsert.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
log_newpage_range(index, MAIN_FORKNUM,
0, RelationGetNumberOfBlocks(index),
true);
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
}
SetLastWrittenPageLSN(XactLastRecEnd);

smgr_end_unlogged_build(index->rd_smgr);

Expand Down
10 changes: 7 additions & 3 deletions src/backend/access/gist/gistbuild.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
log_newpage_range(index, MAIN_FORKNUM,
0, RelationGetNumberOfBlocks(index),
true);
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
index->rd_smgr->smgr_rnode.node,
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
}
SetLastWrittenPageLSN(XactLastRecEnd);

smgr_end_unlogged_build(index->rd_smgr);
}

Expand Down Expand Up @@ -469,7 +471,9 @@ gist_indexsortbuild(GISTBuildState *state)

lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
pagestate->page, true);
SetLastWrittenPageLSN(lsn);
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
MAIN_FORKNUM, GIST_ROOT_BLKNO);
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
}

pfree(pagestate->page);
Expand Down
4 changes: 3 additions & 1 deletion src/backend/access/spgist/spginsert.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
log_newpage_range(index, MAIN_FORKNUM,
0, RelationGetNumberOfBlocks(index),
true);
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
}
SetLastWrittenPageLSN(XactLastRecEnd);

smgr_end_unlogged_build(index->rd_smgr);

Expand Down
199 changes: 181 additions & 18 deletions src/backend/access/transam/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/buf_internals.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
Expand Down Expand Up @@ -113,6 +114,7 @@ int wal_retrieve_retry_interval = 5000;
int max_slot_wal_keep_size_mb = -1;
bool track_wal_io_timing = false;
uint64 predefined_sysidentifier;
int lastWrittenLsnCacheSize;

#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
Expand Down Expand Up @@ -182,6 +184,26 @@ const struct config_enum_entry recovery_target_action_options[] = {
{NULL, 0, false}
};


typedef struct LastWrittenLsnCacheEntry
{
BufferTag key;
XLogRecPtr lsn;
/* double linked list for LRU replacement algorithm */
dlist_node lru_node;
} LastWrittenLsnCacheEntry;


/*
* Cache of last written LSN for each relation chunk (hash bucket).
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
* relation metadata update.
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
* pages are replaced using LRU algorithm, based on L2-list.
* Access to this cache is protected by 'LastWrittenLsnLock'.
*/
static HTAB *lastWrittenLsnCache;

/*
* Statistics for current checkpoint are collected in this global struct.
* Because only the checkpointer or a stand-alone backend can perform
Expand Down Expand Up @@ -749,7 +771,17 @@ typedef struct XLogCtlData
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
*/
XLogRecPtr lastFpwDisableRecPtr;
XLogRecPtr lastWrittenPageLSN;

/*
* Maximal last written LSN for pages not present in lastWrittenLsnCache
*/
XLogRecPtr maxLastWrittenLsn;

/*
* Double linked list to implement LRU replacement policy for last written LSN cache.
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
*/
dlist_head lastWrittenLsnLRU;

/* neon: copy of startup's RedoStartLSN for walproposer's use */
XLogRecPtr RedoStartLSN;
Expand All @@ -772,6 +804,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
*/
static ControlFileData *ControlFile = NULL;

#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */

/*
* Calculate the amount of space left on the page after 'endptr'. Beware
* multiple evaluation!
Expand Down Expand Up @@ -5135,11 +5169,8 @@ LocalProcessControlFile(bool reset)
ReadControlFile();
}

/*
* Initialization of shared memory for XLOG
*/
Size
XLOGShmemSize(void)
static Size
XLOGCtlShmemSize(void)
{
Size size;

Expand Down Expand Up @@ -5179,6 +5210,16 @@ XLOGShmemSize(void)
return size;
}

/*
* Initialization of shared memory for XLOG
*/
Size
XLOGShmemSize(void)
{
return XLOGCtlShmemSize() +
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
}

void
XLOGShmemInit(void)
{
Expand Down Expand Up @@ -5208,6 +5249,15 @@ XLOGShmemInit(void)
XLogCtl = (XLogCtlData *)
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);

{
static HASHCTL info;
info.keysize = sizeof(BufferTag);
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
&info,
HASH_ELEM | HASH_BLOBS);
}
localControlFile = ControlFile;
ControlFile = (ControlFileData *)
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
Expand Down Expand Up @@ -8098,7 +8148,8 @@ StartupXLOG(void)

XLogCtl->LogwrtRqst.Write = EndOfLog;
XLogCtl->LogwrtRqst.Flush = EndOfLog;
XLogCtl->lastWrittenPageLSN = EndOfLog;
XLogCtl->maxLastWrittenLsn = EndOfLog;
dlist_init(&XLogCtl->lastWrittenLsnLRU);

LocalSetXLogInsertAllowed();

Expand Down Expand Up @@ -8870,29 +8921,141 @@ GetInsertRecPtr(void)
}

/*
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
* GetLastWrittenLSN -- Returns maximal LSN of written page.
* It returns an upper bound for the last written LSN of a given page,
* either from a cached last written LSN or a global maximum last written LSN.
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
* If cache is large enough ,iterting through all hash items may be rather expensive.
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
*/
XLogRecPtr
GetLastWrittenPageLSN(void)
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
{
XLogRecPtr lsn;
SpinLockAcquire(&XLogCtl->info_lck);
lsn = XLogCtl->lastWrittenPageLSN;
SpinLockRelease(&XLogCtl->info_lck);
LastWrittenLsnCacheEntry* entry;

LWLockAcquire(LastWrittenLsnLock, LW_SHARED);

/* Maximal last written LSN among all non-cached pages */
lsn = XLogCtl->maxLastWrittenLsn;

if (rnode.relNode != InvalidOid)
{
BufferTag key;
key.rnode = rnode;
key.forkNum = forknum;
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
if (entry != NULL)
lsn = entry->lsn;
}
else
{
HASH_SEQ_STATUS seq;
/* Find maximum of all cached LSNs */
hash_seq_init(&seq, lastWrittenLsnCache);
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
{
if (entry->lsn > lsn)
lsn = entry->lsn;
}
}
LWLockRelease(LastWrittenLsnLock);

return lsn;
}

/*
* SetLastWrittenPageLSN -- Set maximal LSN of written page
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
* We maintain cache of last written LSNs with limited size and LRU replacement
* policy. To reduce cache size we store max LSN not for each page, but for
* bucket (1024 blocks). This cache allows to use old LSN when
* requesting pages of unchanged or appended relations.
*
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
*/
void
SetLastWrittenPageLSN(XLogRecPtr lsn)
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
{
SpinLockAcquire(&XLogCtl->info_lck);
if (lsn > XLogCtl->lastWrittenPageLSN)
XLogCtl->lastWrittenPageLSN = lsn;
SpinLockRelease(&XLogCtl->info_lck);
if (lsn == InvalidXLogRecPtr)
return;

LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
if (rnode.relNode == InvalidOid)
{
if (lsn > XLogCtl->maxLastWrittenLsn)
XLogCtl->maxLastWrittenLsn = lsn;
}
else
{
LastWrittenLsnCacheEntry* entry;
BufferTag key;
bool found;
BlockNumber bucket;

key.rnode = rnode;
key.forkNum = forknum;
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
bucket++)
{
key.blockNum = bucket;
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
if (found)
{
if (lsn > entry->lsn)
entry->lsn = lsn;
/* Unlink from LRU list */
dlist_delete(&entry->lru_node);
}
else
{
entry->lsn = lsn;
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
{
/* Replace least recently used entry */
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
/* Adjust max LSN for not cached relations/chunks if needed */
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
XLogCtl->maxLastWrittenLsn = victim->lsn;

hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
}
}
/* Link to the end of LRU list */
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
}
}
LWLockRelease(LastWrittenLsnLock);
}

/*
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
*/
void
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
{
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
}

/*
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
*/
void
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
{
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
}

/*
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
*/
void
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
{
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
}

/*
Expand Down
5 changes: 2 additions & 3 deletions src/backend/commands/dbcommands.c
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)

lsn = XLogInsert(RM_DBASE_ID,
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
SetLastWrittenPageLSN(lsn);
SetLastWrittenLSNForDatabase(lsn);
}
}
table_endscan(scan);
Expand Down Expand Up @@ -2293,8 +2293,7 @@ dbase_redo(XLogReaderState *record)
*/
{
XLogRecPtr lsn = record->EndRecPtr;

SetLastWrittenPageLSN(lsn);
SetLastWrittenLSNForDatabase(lsn);
}
}
else if (info == XLOG_DBASE_DROP)
Expand Down
1 change: 1 addition & 0 deletions src/backend/storage/lmgr/lwlocknames.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ XactTruncationLock 44
# 45 was XactTruncationLock until removal of BackendRandomLock
WrapLimitsVacuumLock 46
NotifyQueueTailLock 47
LastWrittenLsnLock 48
10 changes: 10 additions & 0 deletions src/backend/utils/misc/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2357,6 +2357,16 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},

{
{"lsn_cache_size", PGC_POSTMASTER, UNGROUPED,
gettext_noop("Size of las written LSN cache used by Neon."),
NULL
},
&lastWrittenLsnCacheSize,
1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */
NULL, NULL, NULL
},

{
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
gettext_noop("Sets the maximum number of temporary buffers used by each session."),
Expand Down
Loading