Skip to content

Commit a173fac

Browse files
knizhnikMMeent
authored andcommitted
Merge last written cache lsn with new main branch (#201)
1 parent 2e63703 commit a173fac

File tree

8 files changed

+217
-28
lines changed

8 files changed

+217
-28
lines changed

src/backend/access/gin/gininsert.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
421421
log_newpage_range(index, MAIN_FORKNUM,
422422
0, RelationGetNumberOfBlocks(index),
423423
true);
424+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
425+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
424426
}
425-
SetLastWrittenPageLSN(XactLastRecEnd);
426427

427428
smgr_end_unlogged_build(index->rd_smgr);
428429

src/backend/access/gist/gistbuild.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
335335
log_newpage_range(index, MAIN_FORKNUM,
336336
0, RelationGetNumberOfBlocks(index),
337337
true);
338+
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
339+
index->rd_smgr->smgr_rnode.node,
340+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
341+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
338342
}
339-
SetLastWrittenPageLSN(XactLastRecEnd);
340-
341343
smgr_end_unlogged_build(index->rd_smgr);
342344
}
343345

@@ -467,7 +469,9 @@ gist_indexsortbuild(GISTBuildState *state)
467469

468470
lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
469471
pagestate->page, true);
470-
SetLastWrittenPageLSN(lsn);
472+
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
473+
MAIN_FORKNUM, GIST_ROOT_BLKNO);
474+
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
471475
}
472476

473477
pfree(pagestate->page);

src/backend/access/spgist/spginsert.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
143143
log_newpage_range(index, MAIN_FORKNUM,
144144
0, RelationGetNumberOfBlocks(index),
145145
true);
146+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
147+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
148+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
146149
}
147-
SetLastWrittenPageLSN(XactLastRecEnd);
148150

149151
smgr_end_unlogged_build(index->rd_smgr);
150152

src/backend/access/transam/xlog.c

+181-18
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "replication/walreceiver.h"
6262
#include "replication/walsender.h"
6363
#include "storage/bufmgr.h"
64+
#include "storage/buf_internals.h"
6465
#include "storage/fd.h"
6566
#include "storage/ipc.h"
6667
#include "storage/large_object.h"
@@ -113,6 +114,7 @@ int wal_retrieve_retry_interval = 5000;
113114
int max_slot_wal_keep_size_mb = -1;
114115
bool track_wal_io_timing = false;
115116
uint64 predefined_sysidentifier;
117+
int lastWrittenLsnCacheSize;
116118

117119
#ifdef WAL_DEBUG
118120
bool XLOG_DEBUG = false;
@@ -182,6 +184,26 @@ const struct config_enum_entry recovery_target_action_options[] = {
182184
{NULL, 0, false}
183185
};
184186

187+
188+
typedef struct LastWrittenLsnCacheEntry
189+
{
190+
BufferTag key;
191+
XLogRecPtr lsn;
192+
/* double linked list for LRU replacement algorithm */
193+
dlist_node lru_node;
194+
} LastWrittenLsnCacheEntry;
195+
196+
197+
/*
198+
* Cache of last written LSN for each relation chunk (hash bucket).
199+
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
200+
* relation metadata update.
201+
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
202+
* pages are replaced using LRU algorithm, based on L2-list.
203+
* Access to this cache is protected by 'LastWrittenLsnLock'.
204+
*/
205+
static HTAB *lastWrittenLsnCache;
206+
185207
/*
186208
* Statistics for current checkpoint are collected in this global struct.
187209
* Because only the checkpointer or a stand-alone backend can perform
@@ -749,7 +771,17 @@ typedef struct XLogCtlData
749771
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
750772
*/
751773
XLogRecPtr lastFpwDisableRecPtr;
752-
XLogRecPtr lastWrittenPageLSN;
774+
775+
/*
776+
* Maximal last written LSN for pages not present in lastWrittenLsnCache
777+
*/
778+
XLogRecPtr maxLastWrittenLsn;
779+
780+
/*
781+
* Double linked list to implement LRU replacement policy for last written LSN cache.
782+
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
783+
*/
784+
dlist_head lastWrittenLsnLRU;
753785

754786
/* neon: copy of startup's RedoStartLSN for walproposer's use */
755787
XLogRecPtr RedoStartLSN;
@@ -772,6 +804,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
772804
*/
773805
static ControlFileData *ControlFile = NULL;
774806

807+
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
808+
775809
/*
776810
* Calculate the amount of space left on the page after 'endptr'. Beware
777811
* multiple evaluation!
@@ -5141,11 +5175,8 @@ LocalProcessControlFile(bool reset)
51415175
ReadControlFile();
51425176
}
51435177

5144-
/*
5145-
* Initialization of shared memory for XLOG
5146-
*/
5147-
Size
5148-
XLOGShmemSize(void)
5178+
static Size
5179+
XLOGCtlShmemSize(void)
51495180
{
51505181
Size size;
51515182

@@ -5185,6 +5216,16 @@ XLOGShmemSize(void)
51855216
return size;
51865217
}
51875218

5219+
/*
5220+
* Initialization of shared memory for XLOG
5221+
*/
5222+
Size
5223+
XLOGShmemSize(void)
5224+
{
5225+
return XLOGCtlShmemSize() +
5226+
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
5227+
}
5228+
51885229
void
51895230
XLOGShmemInit(void)
51905231
{
@@ -5214,6 +5255,15 @@ XLOGShmemInit(void)
52145255
XLogCtl = (XLogCtlData *)
52155256
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
52165257

5258+
{
5259+
static HASHCTL info;
5260+
info.keysize = sizeof(BufferTag);
5261+
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
5262+
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
5263+
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
5264+
&info,
5265+
HASH_ELEM | HASH_BLOBS);
5266+
}
52175267
localControlFile = ControlFile;
52185268
ControlFile = (ControlFileData *)
52195269
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
@@ -8117,7 +8167,8 @@ StartupXLOG(void)
81178167

81188168
XLogCtl->LogwrtRqst.Write = EndOfLog;
81198169
XLogCtl->LogwrtRqst.Flush = EndOfLog;
8120-
XLogCtl->lastWrittenPageLSN = EndOfLog;
8170+
XLogCtl->maxLastWrittenLsn = EndOfLog;
8171+
dlist_init(&XLogCtl->lastWrittenLsnLRU);
81218172

81228173
LocalSetXLogInsertAllowed();
81238174

@@ -8889,29 +8940,141 @@ GetInsertRecPtr(void)
88898940
}
88908941

88918942
/*
8892-
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
8943+
* GetLastWrittenLSN -- Returns maximal LSN of written page.
8944+
* It returns an upper bound for the last written LSN of a given page,
8945+
* either from a cached last written LSN or a global maximum last written LSN.
8946+
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
8947+
* If cache is large enough ,iterting through all hash items may be rather expensive.
8948+
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
88938949
*/
88948950
XLogRecPtr
8895-
GetLastWrittenPageLSN(void)
8951+
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
88968952
{
88978953
XLogRecPtr lsn;
8898-
SpinLockAcquire(&XLogCtl->info_lck);
8899-
lsn = XLogCtl->lastWrittenPageLSN;
8900-
SpinLockRelease(&XLogCtl->info_lck);
8954+
LastWrittenLsnCacheEntry* entry;
8955+
8956+
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
8957+
8958+
/* Maximal last written LSN among all non-cached pages */
8959+
lsn = XLogCtl->maxLastWrittenLsn;
8960+
8961+
if (rnode.relNode != InvalidOid)
8962+
{
8963+
BufferTag key;
8964+
key.rnode = rnode;
8965+
key.forkNum = forknum;
8966+
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
8967+
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
8968+
if (entry != NULL)
8969+
lsn = entry->lsn;
8970+
}
8971+
else
8972+
{
8973+
HASH_SEQ_STATUS seq;
8974+
/* Find maximum of all cached LSNs */
8975+
hash_seq_init(&seq, lastWrittenLsnCache);
8976+
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
8977+
{
8978+
if (entry->lsn > lsn)
8979+
lsn = entry->lsn;
8980+
}
8981+
}
8982+
LWLockRelease(LastWrittenLsnLock);
89018983

89028984
return lsn;
89038985
}
89048986

89058987
/*
8906-
* SetLastWrittenPageLSN -- Set maximal LSN of written page
8988+
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
8989+
* We maintain cache of last written LSNs with limited size and LRU replacement
8990+
* policy. To reduce cache size we store max LSN not for each page, but for
8991+
* bucket (1024 blocks). This cache allows to use old LSN when
8992+
* requesting pages of unchanged or appended relations.
8993+
*
8994+
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
8995+
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
89078996
*/
89088997
void
8909-
SetLastWrittenPageLSN(XLogRecPtr lsn)
8998+
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
89108999
{
8911-
SpinLockAcquire(&XLogCtl->info_lck);
8912-
if (lsn > XLogCtl->lastWrittenPageLSN)
8913-
XLogCtl->lastWrittenPageLSN = lsn;
8914-
SpinLockRelease(&XLogCtl->info_lck);
9000+
if (lsn == InvalidXLogRecPtr)
9001+
return;
9002+
9003+
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
9004+
if (rnode.relNode == InvalidOid)
9005+
{
9006+
if (lsn > XLogCtl->maxLastWrittenLsn)
9007+
XLogCtl->maxLastWrittenLsn = lsn;
9008+
}
9009+
else
9010+
{
9011+
LastWrittenLsnCacheEntry* entry;
9012+
BufferTag key;
9013+
bool found;
9014+
BlockNumber bucket;
9015+
9016+
key.rnode = rnode;
9017+
key.forkNum = forknum;
9018+
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
9019+
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
9020+
bucket++)
9021+
{
9022+
key.blockNum = bucket;
9023+
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
9024+
if (found)
9025+
{
9026+
if (lsn > entry->lsn)
9027+
entry->lsn = lsn;
9028+
/* Unlink from LRU list */
9029+
dlist_delete(&entry->lru_node);
9030+
}
9031+
else
9032+
{
9033+
entry->lsn = lsn;
9034+
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
9035+
{
9036+
/* Replace least recently used entry */
9037+
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
9038+
/* Adjust max LSN for not cached relations/chunks if needed */
9039+
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
9040+
XLogCtl->maxLastWrittenLsn = victim->lsn;
9041+
9042+
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
9043+
}
9044+
}
9045+
/* Link to the end of LRU list */
9046+
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
9047+
}
9048+
}
9049+
LWLockRelease(LastWrittenLsnLock);
9050+
}
9051+
9052+
/*
9053+
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
9054+
*/
9055+
void
9056+
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
9057+
{
9058+
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
9059+
}
9060+
9061+
/*
9062+
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
9063+
*/
9064+
void
9065+
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
9066+
{
9067+
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
9068+
}
9069+
9070+
/*
9071+
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
9072+
*/
9073+
void
9074+
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
9075+
{
9076+
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
9077+
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
89159078
}
89169079

89179080
/*

src/backend/commands/dbcommands.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
675675

676676
lsn = XLogInsert(RM_DBASE_ID,
677677
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
678-
SetLastWrittenPageLSN(lsn);
678+
SetLastWrittenLSNForDatabase(lsn);
679679
}
680680
}
681681
table_endscan(scan);
@@ -2293,8 +2293,7 @@ dbase_redo(XLogReaderState *record)
22932293
*/
22942294
{
22952295
XLogRecPtr lsn = record->EndRecPtr;
2296-
2297-
SetLastWrittenPageLSN(lsn);
2296+
SetLastWrittenLSNForDatabase(lsn);
22982297
}
22992298
}
23002299
else if (info == XLOG_DBASE_DROP)

src/backend/storage/lmgr/lwlocknames.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@ XactTruncationLock 44
5353
# 45 was XactTruncationLock until removal of BackendRandomLock
5454
WrapLimitsVacuumLock 46
5555
NotifyQueueTailLock 47
56+
LastWrittenLsnLock 48

src/backend/utils/misc/guc.c

+10
Original file line numberDiff line numberDiff line change
@@ -2357,6 +2357,16 @@ static struct config_int ConfigureNamesInt[] =
23572357
NULL, NULL, NULL
23582358
},
23592359

2360+
{
2361+
{"lsn_cache_size", PGC_POSTMASTER, UNGROUPED,
2362+
gettext_noop("Size of las written LSN cache used by Neon."),
2363+
NULL
2364+
},
2365+
&lastWrittenLsnCacheSize,
2366+
1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */
2367+
NULL, NULL, NULL
2368+
},
2369+
23602370
{
23612371
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
23622372
gettext_noop("Sets the maximum number of temporary buffers used by each session."),

0 commit comments

Comments
 (0)