Skip to content

Commit a4963aa

Browse files
authored
Merge last written cache lsn with new main branch (#201)
1 parent bbd2ab1 commit a4963aa

File tree

8 files changed

+217
-28
lines changed

8 files changed

+217
-28
lines changed

src/backend/access/gin/gininsert.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
421421
log_newpage_range(index, MAIN_FORKNUM,
422422
0, RelationGetNumberOfBlocks(index),
423423
true);
424+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
425+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
424426
}
425-
SetLastWrittenPageLSN(XactLastRecEnd);
426427

427428
smgr_end_unlogged_build(index->rd_smgr);
428429

src/backend/access/gist/gistbuild.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
335335
log_newpage_range(index, MAIN_FORKNUM,
336336
0, RelationGetNumberOfBlocks(index),
337337
true);
338+
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
339+
index->rd_smgr->smgr_rnode.node,
340+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
341+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
338342
}
339-
SetLastWrittenPageLSN(XactLastRecEnd);
340-
341343
smgr_end_unlogged_build(index->rd_smgr);
342344
}
343345

@@ -469,7 +471,9 @@ gist_indexsortbuild(GISTBuildState *state)
469471

470472
lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
471473
pagestate->page, true);
472-
SetLastWrittenPageLSN(lsn);
474+
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
475+
MAIN_FORKNUM, GIST_ROOT_BLKNO);
476+
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
473477
}
474478

475479
pfree(pagestate->page);

src/backend/access/spgist/spginsert.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
143143
log_newpage_range(index, MAIN_FORKNUM,
144144
0, RelationGetNumberOfBlocks(index),
145145
true);
146+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
147+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
148+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
146149
}
147-
SetLastWrittenPageLSN(XactLastRecEnd);
148150

149151
smgr_end_unlogged_build(index->rd_smgr);
150152

src/backend/access/transam/xlog.c

+181-18
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "replication/walreceiver.h"
6262
#include "replication/walsender.h"
6363
#include "storage/bufmgr.h"
64+
#include "storage/buf_internals.h"
6465
#include "storage/fd.h"
6566
#include "storage/ipc.h"
6667
#include "storage/large_object.h"
@@ -113,6 +114,7 @@ int wal_retrieve_retry_interval = 5000;
113114
int max_slot_wal_keep_size_mb = -1;
114115
bool track_wal_io_timing = false;
115116
uint64 predefined_sysidentifier;
117+
int lastWrittenLsnCacheSize;
116118

117119
#ifdef WAL_DEBUG
118120
bool XLOG_DEBUG = false;
@@ -182,6 +184,26 @@ const struct config_enum_entry recovery_target_action_options[] = {
182184
{NULL, 0, false}
183185
};
184186

187+
188+
typedef struct LastWrittenLsnCacheEntry
189+
{
190+
BufferTag key;
191+
XLogRecPtr lsn;
192+
/* double linked list for LRU replacement algorithm */
193+
dlist_node lru_node;
194+
} LastWrittenLsnCacheEntry;
195+
196+
197+
/*
198+
* Cache of last written LSN for each relation chunk (hash bucket).
199+
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
200+
* relation metadata update.
201+
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
202+
* pages are replaced using LRU algorithm, based on L2-list.
203+
* Access to this cache is protected by 'LastWrittenLsnLock'.
204+
*/
205+
static HTAB *lastWrittenLsnCache;
206+
185207
/*
186208
* Statistics for current checkpoint are collected in this global struct.
187209
* Because only the checkpointer or a stand-alone backend can perform
@@ -749,7 +771,17 @@ typedef struct XLogCtlData
749771
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
750772
*/
751773
XLogRecPtr lastFpwDisableRecPtr;
752-
XLogRecPtr lastWrittenPageLSN;
774+
775+
/*
776+
* Maximal last written LSN for pages not present in lastWrittenLsnCache
777+
*/
778+
XLogRecPtr maxLastWrittenLsn;
779+
780+
/*
781+
* Double linked list to implement LRU replacement policy for last written LSN cache.
782+
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
783+
*/
784+
dlist_head lastWrittenLsnLRU;
753785

754786
/* neon: copy of startup's RedoStartLSN for walproposer's use */
755787
XLogRecPtr RedoStartLSN;
@@ -772,6 +804,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
772804
*/
773805
static ControlFileData *ControlFile = NULL;
774806

807+
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
808+
775809
/*
776810
* Calculate the amount of space left on the page after 'endptr'. Beware
777811
* multiple evaluation!
@@ -5135,11 +5169,8 @@ LocalProcessControlFile(bool reset)
51355169
ReadControlFile();
51365170
}
51375171

5138-
/*
5139-
* Initialization of shared memory for XLOG
5140-
*/
5141-
Size
5142-
XLOGShmemSize(void)
5172+
static Size
5173+
XLOGCtlShmemSize(void)
51435174
{
51445175
Size size;
51455176

@@ -5179,6 +5210,16 @@ XLOGShmemSize(void)
51795210
return size;
51805211
}
51815212

5213+
/*
5214+
* Initialization of shared memory for XLOG
5215+
*/
5216+
Size
5217+
XLOGShmemSize(void)
5218+
{
5219+
return XLOGCtlShmemSize() +
5220+
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
5221+
}
5222+
51825223
void
51835224
XLOGShmemInit(void)
51845225
{
@@ -5208,6 +5249,15 @@ XLOGShmemInit(void)
52085249
XLogCtl = (XLogCtlData *)
52095250
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
52105251

5252+
{
5253+
static HASHCTL info;
5254+
info.keysize = sizeof(BufferTag);
5255+
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
5256+
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
5257+
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
5258+
&info,
5259+
HASH_ELEM | HASH_BLOBS);
5260+
}
52115261
localControlFile = ControlFile;
52125262
ControlFile = (ControlFileData *)
52135263
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
@@ -8098,7 +8148,8 @@ StartupXLOG(void)
80988148

80998149
XLogCtl->LogwrtRqst.Write = EndOfLog;
81008150
XLogCtl->LogwrtRqst.Flush = EndOfLog;
8101-
XLogCtl->lastWrittenPageLSN = EndOfLog;
8151+
XLogCtl->maxLastWrittenLsn = EndOfLog;
8152+
dlist_init(&XLogCtl->lastWrittenLsnLRU);
81028153

81038154
LocalSetXLogInsertAllowed();
81048155

@@ -8870,29 +8921,141 @@ GetInsertRecPtr(void)
88708921
}
88718922

88728923
/*
8873-
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
8924+
* GetLastWrittenLSN -- Returns maximal LSN of written page.
8925+
* It returns an upper bound for the last written LSN of a given page,
8926+
* either from a cached last written LSN or a global maximum last written LSN.
8927+
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
8928+
* If cache is large enough ,iterting through all hash items may be rather expensive.
8929+
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
88748930
*/
88758931
XLogRecPtr
8876-
GetLastWrittenPageLSN(void)
8932+
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
88778933
{
88788934
XLogRecPtr lsn;
8879-
SpinLockAcquire(&XLogCtl->info_lck);
8880-
lsn = XLogCtl->lastWrittenPageLSN;
8881-
SpinLockRelease(&XLogCtl->info_lck);
8935+
LastWrittenLsnCacheEntry* entry;
8936+
8937+
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
8938+
8939+
/* Maximal last written LSN among all non-cached pages */
8940+
lsn = XLogCtl->maxLastWrittenLsn;
8941+
8942+
if (rnode.relNode != InvalidOid)
8943+
{
8944+
BufferTag key;
8945+
key.rnode = rnode;
8946+
key.forkNum = forknum;
8947+
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
8948+
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
8949+
if (entry != NULL)
8950+
lsn = entry->lsn;
8951+
}
8952+
else
8953+
{
8954+
HASH_SEQ_STATUS seq;
8955+
/* Find maximum of all cached LSNs */
8956+
hash_seq_init(&seq, lastWrittenLsnCache);
8957+
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
8958+
{
8959+
if (entry->lsn > lsn)
8960+
lsn = entry->lsn;
8961+
}
8962+
}
8963+
LWLockRelease(LastWrittenLsnLock);
88828964

88838965
return lsn;
88848966
}
88858967

88868968
/*
8887-
* SetLastWrittenPageLSN -- Set maximal LSN of written page
8969+
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
8970+
* We maintain cache of last written LSNs with limited size and LRU replacement
8971+
* policy. To reduce cache size we store max LSN not for each page, but for
8972+
* bucket (1024 blocks). This cache allows to use old LSN when
8973+
* requesting pages of unchanged or appended relations.
8974+
*
8975+
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
8976+
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
88888977
*/
88898978
void
8890-
SetLastWrittenPageLSN(XLogRecPtr lsn)
8979+
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
88918980
{
8892-
SpinLockAcquire(&XLogCtl->info_lck);
8893-
if (lsn > XLogCtl->lastWrittenPageLSN)
8894-
XLogCtl->lastWrittenPageLSN = lsn;
8895-
SpinLockRelease(&XLogCtl->info_lck);
8981+
if (lsn == InvalidXLogRecPtr)
8982+
return;
8983+
8984+
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
8985+
if (rnode.relNode == InvalidOid)
8986+
{
8987+
if (lsn > XLogCtl->maxLastWrittenLsn)
8988+
XLogCtl->maxLastWrittenLsn = lsn;
8989+
}
8990+
else
8991+
{
8992+
LastWrittenLsnCacheEntry* entry;
8993+
BufferTag key;
8994+
bool found;
8995+
BlockNumber bucket;
8996+
8997+
key.rnode = rnode;
8998+
key.forkNum = forknum;
8999+
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
9000+
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
9001+
bucket++)
9002+
{
9003+
key.blockNum = bucket;
9004+
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
9005+
if (found)
9006+
{
9007+
if (lsn > entry->lsn)
9008+
entry->lsn = lsn;
9009+
/* Unlink from LRU list */
9010+
dlist_delete(&entry->lru_node);
9011+
}
9012+
else
9013+
{
9014+
entry->lsn = lsn;
9015+
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
9016+
{
9017+
/* Replace least recently used entry */
9018+
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
9019+
/* Adjust max LSN for not cached relations/chunks if needed */
9020+
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
9021+
XLogCtl->maxLastWrittenLsn = victim->lsn;
9022+
9023+
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
9024+
}
9025+
}
9026+
/* Link to the end of LRU list */
9027+
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
9028+
}
9029+
}
9030+
LWLockRelease(LastWrittenLsnLock);
9031+
}
9032+
9033+
/*
9034+
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
9035+
*/
9036+
void
9037+
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
9038+
{
9039+
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
9040+
}
9041+
9042+
/*
9043+
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
9044+
*/
9045+
void
9046+
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
9047+
{
9048+
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
9049+
}
9050+
9051+
/*
9052+
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
9053+
*/
9054+
void
9055+
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
9056+
{
9057+
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
9058+
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
88969059
}
88979060

88989061
/*

src/backend/commands/dbcommands.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
675675

676676
lsn = XLogInsert(RM_DBASE_ID,
677677
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
678-
SetLastWrittenPageLSN(lsn);
678+
SetLastWrittenLSNForDatabase(lsn);
679679
}
680680
}
681681
table_endscan(scan);
@@ -2293,8 +2293,7 @@ dbase_redo(XLogReaderState *record)
22932293
*/
22942294
{
22952295
XLogRecPtr lsn = record->EndRecPtr;
2296-
2297-
SetLastWrittenPageLSN(lsn);
2296+
SetLastWrittenLSNForDatabase(lsn);
22982297
}
22992298
}
23002299
else if (info == XLOG_DBASE_DROP)

src/backend/storage/lmgr/lwlocknames.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@ XactTruncationLock 44
5353
# 45 was XactTruncationLock until removal of BackendRandomLock
5454
WrapLimitsVacuumLock 46
5555
NotifyQueueTailLock 47
56+
LastWrittenLsnLock 48

src/backend/utils/misc/guc.c

+10
Original file line numberDiff line numberDiff line change
@@ -2357,6 +2357,16 @@ static struct config_int ConfigureNamesInt[] =
23572357
NULL, NULL, NULL
23582358
},
23592359

2360+
{
2361+
{"lsn_cache_size", PGC_POSTMASTER, UNGROUPED,
2362+
gettext_noop("Size of las written LSN cache used by Neon."),
2363+
NULL
2364+
},
2365+
&lastWrittenLsnCacheSize,
2366+
1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */
2367+
NULL, NULL, NULL
2368+
},
2369+
23602370
{
23612371
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
23622372
gettext_noop("Sets the maximum number of temporary buffers used by each session."),

0 commit comments

Comments
 (0)