Skip to content

Commit 5e2df82

Browse files
knizhniktristan957
authored andcommitted
Merge last written cache lsn with new main branch (#201)
1 parent 561daab commit 5e2df82

File tree

8 files changed

+217
-28
lines changed

8 files changed

+217
-28
lines changed

src/backend/access/gin/gininsert.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
421421
log_newpage_range(index, MAIN_FORKNUM,
422422
0, RelationGetNumberOfBlocks(index),
423423
true);
424+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
425+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
424426
}
425-
SetLastWrittenPageLSN(XactLastRecEnd);
426427

427428
smgr_end_unlogged_build(index->rd_smgr);
428429

src/backend/access/gist/gistbuild.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
335335
log_newpage_range(index, MAIN_FORKNUM,
336336
0, RelationGetNumberOfBlocks(index),
337337
true);
338+
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
339+
index->rd_smgr->smgr_rnode.node,
340+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
341+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
338342
}
339-
SetLastWrittenPageLSN(XactLastRecEnd);
340-
341343
smgr_end_unlogged_build(index->rd_smgr);
342344
}
343345

@@ -467,7 +469,9 @@ gist_indexsortbuild(GISTBuildState *state)
467469

468470
lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
469471
pagestate->page, true);
470-
SetLastWrittenPageLSN(lsn);
472+
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
473+
MAIN_FORKNUM, GIST_ROOT_BLKNO);
474+
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
471475
}
472476

473477
pfree(pagestate->page);

src/backend/access/spgist/spginsert.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
143143
log_newpage_range(index, MAIN_FORKNUM,
144144
0, RelationGetNumberOfBlocks(index),
145145
true);
146+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
147+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
148+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
146149
}
147-
SetLastWrittenPageLSN(XactLastRecEnd);
148150

149151
smgr_end_unlogged_build(index->rd_smgr);
150152

src/backend/access/transam/xlog.c

+181-18
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "replication/walreceiver.h"
6262
#include "replication/walsender.h"
6363
#include "storage/bufmgr.h"
64+
#include "storage/buf_internals.h"
6465
#include "storage/fd.h"
6566
#include "storage/ipc.h"
6667
#include "storage/large_object.h"
@@ -113,6 +114,7 @@ int wal_retrieve_retry_interval = 5000;
113114
int max_slot_wal_keep_size_mb = -1;
114115
bool track_wal_io_timing = false;
115116
uint64 predefined_sysidentifier;
117+
int lastWrittenLsnCacheSize;
116118

117119
#ifdef WAL_DEBUG
118120
bool XLOG_DEBUG = false;
@@ -182,6 +184,26 @@ const struct config_enum_entry recovery_target_action_options[] = {
182184
{NULL, 0, false}
183185
};
184186

187+
188+
typedef struct LastWrittenLsnCacheEntry
189+
{
190+
BufferTag key;
191+
XLogRecPtr lsn;
192+
/* double linked list for LRU replacement algorithm */
193+
dlist_node lru_node;
194+
} LastWrittenLsnCacheEntry;
195+
196+
197+
/*
198+
* Cache of last written LSN for each relation chunk (hash bucket).
199+
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
200+
* relation metadata update.
201+
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
202+
* pages are replaced using LRU algorithm, based on L2-list.
203+
* Access to this cache is protected by 'LastWrittenLsnLock'.
204+
*/
205+
static HTAB *lastWrittenLsnCache;
206+
185207
/*
186208
* Statistics for current checkpoint are collected in this global struct.
187209
* Because only the checkpointer or a stand-alone backend can perform
@@ -749,7 +771,17 @@ typedef struct XLogCtlData
749771
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
750772
*/
751773
XLogRecPtr lastFpwDisableRecPtr;
752-
XLogRecPtr lastWrittenPageLSN;
774+
775+
/*
776+
* Maximal last written LSN for pages not present in lastWrittenLsnCache
777+
*/
778+
XLogRecPtr maxLastWrittenLsn;
779+
780+
/*
781+
* Double linked list to implement LRU replacement policy for last written LSN cache.
782+
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
783+
*/
784+
dlist_head lastWrittenLsnLRU;
753785

754786
/* neon: copy of startup's RedoStartLSN for walproposer's use */
755787
XLogRecPtr RedoStartLSN;
@@ -772,6 +804,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
772804
*/
773805
static ControlFileData *ControlFile = NULL;
774806

807+
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
808+
775809
/*
776810
* Calculate the amount of space left on the page after 'endptr'. Beware
777811
* multiple evaluation!
@@ -5143,11 +5177,8 @@ LocalProcessControlFile(bool reset)
51435177
ReadControlFile();
51445178
}
51455179

5146-
/*
5147-
* Initialization of shared memory for XLOG
5148-
*/
5149-
Size
5150-
XLOGShmemSize(void)
5180+
static Size
5181+
XLOGCtlShmemSize(void)
51515182
{
51525183
Size size;
51535184

@@ -5187,6 +5218,16 @@ XLOGShmemSize(void)
51875218
return size;
51885219
}
51895220

5221+
/*
5222+
* Initialization of shared memory for XLOG
5223+
*/
5224+
Size
5225+
XLOGShmemSize(void)
5226+
{
5227+
return XLOGCtlShmemSize() +
5228+
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
5229+
}
5230+
51905231
void
51915232
XLOGShmemInit(void)
51925233
{
@@ -5216,6 +5257,15 @@ XLOGShmemInit(void)
52165257
XLogCtl = (XLogCtlData *)
52175258
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
52185259

5260+
{
5261+
static HASHCTL info;
5262+
info.keysize = sizeof(BufferTag);
5263+
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
5264+
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
5265+
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
5266+
&info,
5267+
HASH_ELEM | HASH_BLOBS);
5268+
}
52195269
localControlFile = ControlFile;
52205270
ControlFile = (ControlFileData *)
52215271
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
@@ -8119,7 +8169,8 @@ StartupXLOG(void)
81198169

81208170
XLogCtl->LogwrtRqst.Write = EndOfLog;
81218171
XLogCtl->LogwrtRqst.Flush = EndOfLog;
8122-
XLogCtl->lastWrittenPageLSN = EndOfLog;
8172+
XLogCtl->maxLastWrittenLsn = EndOfLog;
8173+
dlist_init(&XLogCtl->lastWrittenLsnLRU);
81238174

81248175
LocalSetXLogInsertAllowed();
81258176

@@ -8895,29 +8946,141 @@ GetInsertRecPtr(void)
88958946
}
88968947

88978948
/*
8898-
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
8949+
* GetLastWrittenLSN -- Returns maximal LSN of written page.
8950+
* It returns an upper bound for the last written LSN of a given page,
8951+
* either from a cached last written LSN or a global maximum last written LSN.
8952+
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
8953+
* If cache is large enough ,iterting through all hash items may be rather expensive.
8954+
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
88998955
*/
89008956
XLogRecPtr
8901-
GetLastWrittenPageLSN(void)
8957+
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
89028958
{
89038959
XLogRecPtr lsn;
8904-
SpinLockAcquire(&XLogCtl->info_lck);
8905-
lsn = XLogCtl->lastWrittenPageLSN;
8906-
SpinLockRelease(&XLogCtl->info_lck);
8960+
LastWrittenLsnCacheEntry* entry;
8961+
8962+
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
8963+
8964+
/* Maximal last written LSN among all non-cached pages */
8965+
lsn = XLogCtl->maxLastWrittenLsn;
8966+
8967+
if (rnode.relNode != InvalidOid)
8968+
{
8969+
BufferTag key;
8970+
key.rnode = rnode;
8971+
key.forkNum = forknum;
8972+
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
8973+
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
8974+
if (entry != NULL)
8975+
lsn = entry->lsn;
8976+
}
8977+
else
8978+
{
8979+
HASH_SEQ_STATUS seq;
8980+
/* Find maximum of all cached LSNs */
8981+
hash_seq_init(&seq, lastWrittenLsnCache);
8982+
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
8983+
{
8984+
if (entry->lsn > lsn)
8985+
lsn = entry->lsn;
8986+
}
8987+
}
8988+
LWLockRelease(LastWrittenLsnLock);
89078989

89088990
return lsn;
89098991
}
89108992

89118993
/*
8912-
* SetLastWrittenPageLSN -- Set maximal LSN of written page
8994+
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
8995+
* We maintain cache of last written LSNs with limited size and LRU replacement
8996+
* policy. To reduce cache size we store max LSN not for each page, but for
8997+
* bucket (1024 blocks). This cache allows to use old LSN when
8998+
* requesting pages of unchanged or appended relations.
8999+
*
9000+
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
9001+
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
89139002
*/
89149003
void
8915-
SetLastWrittenPageLSN(XLogRecPtr lsn)
9004+
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
89169005
{
8917-
SpinLockAcquire(&XLogCtl->info_lck);
8918-
if (lsn > XLogCtl->lastWrittenPageLSN)
8919-
XLogCtl->lastWrittenPageLSN = lsn;
8920-
SpinLockRelease(&XLogCtl->info_lck);
9006+
if (lsn == InvalidXLogRecPtr)
9007+
return;
9008+
9009+
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
9010+
if (rnode.relNode == InvalidOid)
9011+
{
9012+
if (lsn > XLogCtl->maxLastWrittenLsn)
9013+
XLogCtl->maxLastWrittenLsn = lsn;
9014+
}
9015+
else
9016+
{
9017+
LastWrittenLsnCacheEntry* entry;
9018+
BufferTag key;
9019+
bool found;
9020+
BlockNumber bucket;
9021+
9022+
key.rnode = rnode;
9023+
key.forkNum = forknum;
9024+
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
9025+
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
9026+
bucket++)
9027+
{
9028+
key.blockNum = bucket;
9029+
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
9030+
if (found)
9031+
{
9032+
if (lsn > entry->lsn)
9033+
entry->lsn = lsn;
9034+
/* Unlink from LRU list */
9035+
dlist_delete(&entry->lru_node);
9036+
}
9037+
else
9038+
{
9039+
entry->lsn = lsn;
9040+
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
9041+
{
9042+
/* Replace least recently used entry */
9043+
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
9044+
/* Adjust max LSN for not cached relations/chunks if needed */
9045+
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
9046+
XLogCtl->maxLastWrittenLsn = victim->lsn;
9047+
9048+
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
9049+
}
9050+
}
9051+
/* Link to the end of LRU list */
9052+
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
9053+
}
9054+
}
9055+
LWLockRelease(LastWrittenLsnLock);
9056+
}
9057+
9058+
/*
9059+
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
9060+
*/
9061+
void
9062+
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
9063+
{
9064+
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
9065+
}
9066+
9067+
/*
9068+
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
9069+
*/
9070+
void
9071+
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
9072+
{
9073+
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
9074+
}
9075+
9076+
/*
9077+
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
9078+
*/
9079+
void
9080+
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
9081+
{
9082+
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
9083+
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
89219084
}
89229085

89239086
/*

src/backend/commands/dbcommands.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
685685

686686
lsn = XLogInsert(RM_DBASE_ID,
687687
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
688-
SetLastWrittenPageLSN(lsn);
688+
SetLastWrittenLSNForDatabase(lsn);
689689
}
690690
}
691691
table_endscan(scan);
@@ -2363,8 +2363,7 @@ dbase_redo(XLogReaderState *record)
23632363
*/
23642364
{
23652365
XLogRecPtr lsn = record->EndRecPtr;
2366-
2367-
SetLastWrittenPageLSN(lsn);
2366+
SetLastWrittenLSNForDatabase(lsn);
23682367
}
23692368
}
23702369
else if (info == XLOG_DBASE_DROP)

src/backend/storage/lmgr/lwlocknames.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@ XactTruncationLock 44
5353
# 45 was XactTruncationLock until removal of BackendRandomLock
5454
WrapLimitsVacuumLock 46
5555
NotifyQueueTailLock 47
56+
LastWrittenLsnLock 48

src/backend/utils/misc/guc.c

+10
Original file line numberDiff line numberDiff line change
@@ -2357,6 +2357,16 @@ static struct config_int ConfigureNamesInt[] =
23572357
NULL, NULL, NULL
23582358
},
23592359

2360+
{
2361+
{"lsn_cache_size", PGC_POSTMASTER, UNGROUPED,
2362+
gettext_noop("Size of las written LSN cache used by Neon."),
2363+
NULL
2364+
},
2365+
&lastWrittenLsnCacheSize,
2366+
1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */
2367+
NULL, NULL, NULL
2368+
},
2369+
23602370
{
23612371
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
23622372
gettext_noop("Sets the maximum number of temporary buffers used by each session."),

0 commit comments

Comments
 (0)