Skip to content

Commit a298a8c

Browse files
knizhniktristan957
authored andcommitted
Merge last written cache lsn with new main branch (#201)
1 parent aa02529 commit a298a8c

File tree

8 files changed

+246
-29
lines changed

8 files changed

+246
-29
lines changed

src/backend/access/gin/gininsert.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
421421
log_newpage_range(index, MAIN_FORKNUM,
422422
0, RelationGetNumberOfBlocks(index),
423423
true);
424+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
425+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
424426
}
425-
SetLastWrittenPageLSN(XactLastRecEnd);
426427

427428
smgr_end_unlogged_build(index->rd_smgr);
428429

src/backend/access/gist/gistbuild.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
342342
log_newpage_range(index, MAIN_FORKNUM,
343343
0, RelationGetNumberOfBlocks(index),
344344
true);
345+
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
346+
index->rd_smgr->smgr_rnode.node,
347+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
348+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
345349
}
346-
SetLastWrittenPageLSN(XactLastRecEnd);
347-
348350
smgr_end_unlogged_build(index->rd_smgr);
349351
}
350352

@@ -475,7 +477,9 @@ gist_indexsortbuild(GISTBuildState *state)
475477

476478
lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
477479
levelstate->pages[0], true);
478-
SetLastWrittenPageLSN(lsn);
480+
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
481+
MAIN_FORKNUM, GIST_ROOT_BLKNO);
482+
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
479483
}
480484

481485
pfree(levelstate->pages[0]);

src/backend/access/spgist/spginsert.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
143143
log_newpage_range(index, MAIN_FORKNUM,
144144
0, RelationGetNumberOfBlocks(index),
145145
true);
146+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
147+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
148+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
146149
}
147-
SetLastWrittenPageLSN(XactLastRecEnd);
148150

149151
smgr_end_unlogged_build(index->rd_smgr);
150152

src/backend/access/transam/xlog.c

+181-19
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
#include "replication/walreceiver.h"
8686
#include "replication/walsender.h"
8787
#include "storage/bufmgr.h"
88+
#include "storage/buf_internals.h"
8889
#include "storage/fd.h"
8990
#include "storage/ipc.h"
9091
#include "storage/large_object.h"
@@ -137,6 +138,7 @@ int max_slot_wal_keep_size_mb = -1;
137138
int wal_decode_buffer_size = 512 * 1024;
138139
bool track_wal_io_timing = false;
139140
uint64 predefined_sysidentifier;
141+
int lastWrittenLsnCacheSize;
140142

141143
#ifdef WAL_DEBUG
142144
bool XLOG_DEBUG = false;
@@ -199,6 +201,25 @@ const struct config_enum_entry archive_mode_options[] = {
199201
{NULL, 0, false}
200202
};
201203

204+
typedef struct LastWrittenLsnCacheEntry
205+
{
206+
BufferTag key;
207+
XLogRecPtr lsn;
208+
/* double linked list for LRU replacement algorithm */
209+
dlist_node lru_node;
210+
} LastWrittenLsnCacheEntry;
211+
212+
213+
/*
214+
* Cache of last written LSN for each relation chunk (hash bucket).
215+
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
216+
* relation metadata update.
217+
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
218+
* pages are replaced using LRU algorithm, based on L2-list.
219+
* Access to this cache is protected by 'LastWrittenLsnLock'.
220+
*/
221+
static HTAB *lastWrittenLsnCache;
222+
202223
/*
203224
* Statistics for current checkpoint are collected in this global struct.
204225
* Because only the checkpointer or a stand-alone backend can perform
@@ -552,7 +573,17 @@ typedef struct XLogCtlData
552573
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
553574
*/
554575
XLogRecPtr lastFpwDisableRecPtr;
555-
XLogRecPtr lastWrittenPageLSN;
576+
577+
/*
578+
* Maximal last written LSN for pages not present in lastWrittenLsnCache
579+
*/
580+
XLogRecPtr maxLastWrittenLsn;
581+
582+
/*
583+
* Double linked list to implement LRU replacement policy for last written LSN cache.
584+
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
585+
*/
586+
dlist_head lastWrittenLsnLRU;
556587

557588
/* neon: copy of startup's RedoStartLSN for walproposer's use */
558589
XLogRecPtr RedoStartLSN;
@@ -575,6 +606,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
575606
*/
576607
static ControlFileData *ControlFile = NULL;
577608

609+
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
610+
578611
/*
579612
* Calculate the amount of space left on the page after 'endptr'. Beware
580613
* multiple evaluation!
@@ -4357,11 +4390,8 @@ LocalProcessControlFile(bool reset)
43574390
ReadControlFile();
43584391
}
43594392

4360-
/*
4361-
* Initialization of shared memory for XLOG
4362-
*/
4363-
Size
4364-
XLOGShmemSize(void)
4393+
static Size
4394+
XLOGCtlShmemSize(void)
43654395
{
43664396
Size size;
43674397

@@ -4410,6 +4440,16 @@ XLOGShmemSize(void)
44104440
return size;
44114441
}
44124442

4443+
/*
4444+
* Initialization of shared memory for XLOG
4445+
*/
4446+
Size
4447+
XLOGShmemSize(void)
4448+
{
4449+
return XLOGCtlShmemSize() +
4450+
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
4451+
}
4452+
44134453
void
44144454
XLOGShmemInit(void)
44154455
{
@@ -4439,6 +4479,15 @@ XLOGShmemInit(void)
44394479
XLogCtl = (XLogCtlData *)
44404480
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
44414481

4482+
{
4483+
static HASHCTL info;
4484+
info.keysize = sizeof(BufferTag);
4485+
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
4486+
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
4487+
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
4488+
&info,
4489+
HASH_ELEM | HASH_BLOBS);
4490+
}
44424491
localControlFile = ControlFile;
44434492
ControlFile = (ControlFileData *)
44444493
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
@@ -5625,7 +5674,8 @@ StartupXLOG(void)
56255674

56265675
XLogCtl->LogwrtRqst.Write = EndOfLog;
56275676
XLogCtl->LogwrtRqst.Flush = EndOfLog;
5628-
XLogCtl->lastWrittenPageLSN = EndOfLog;
5677+
XLogCtl->maxLastWrittenLsn = EndOfLog;
5678+
dlist_init(&XLogCtl->lastWrittenLsnLRU);
56295679

56305680
/*
56315681
* Preallocate additional log files, if wanted.
@@ -6053,29 +6103,141 @@ GetInsertRecPtr(void)
60536103
}
60546104

60556105
/*
6056-
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
6106+
* GetLastWrittenLSN -- Returns maximal LSN of written page.
6107+
* It returns an upper bound for the last written LSN of a given page,
6108+
* either from a cached last written LSN or a global maximum last written LSN.
6109+
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
6110+
* If cache is large enough ,iterting through all hash items may be rather expensive.
6111+
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
60576112
*/
60586113
XLogRecPtr
6059-
GetLastWrittenPageLSN(void)
6114+
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
60606115
{
60616116
XLogRecPtr lsn;
6062-
SpinLockAcquire(&XLogCtl->info_lck);
6063-
lsn = XLogCtl->lastWrittenPageLSN;
6064-
SpinLockRelease(&XLogCtl->info_lck);
6117+
LastWrittenLsnCacheEntry* entry;
6118+
6119+
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
6120+
6121+
/* Maximal last written LSN among all non-cached pages */
6122+
lsn = XLogCtl->maxLastWrittenLsn;
6123+
6124+
if (rnode.relNode != InvalidOid)
6125+
{
6126+
BufferTag key;
6127+
key.rnode = rnode;
6128+
key.forkNum = forknum;
6129+
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
6130+
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
6131+
if (entry != NULL)
6132+
lsn = entry->lsn;
6133+
}
6134+
else
6135+
{
6136+
HASH_SEQ_STATUS seq;
6137+
/* Find maximum of all cached LSNs */
6138+
hash_seq_init(&seq, lastWrittenLsnCache);
6139+
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
6140+
{
6141+
if (entry->lsn > lsn)
6142+
lsn = entry->lsn;
6143+
}
6144+
}
6145+
LWLockRelease(LastWrittenLsnLock);
60656146

60666147
return lsn;
60676148
}
60686149

60696150
/*
6070-
* SetLastWrittenPageLSN -- Set maximal LSN of written page
6151+
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
6152+
* We maintain cache of last written LSNs with limited size and LRU replacement
6153+
* policy. To reduce cache size we store max LSN not for each page, but for
6154+
* bucket (1024 blocks). This cache allows to use old LSN when
6155+
* requesting pages of unchanged or appended relations.
6156+
*
6157+
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
6158+
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
60716159
*/
60726160
void
6073-
SetLastWrittenPageLSN(XLogRecPtr lsn)
6161+
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
60746162
{
6075-
SpinLockAcquire(&XLogCtl->info_lck);
6076-
if (lsn > XLogCtl->lastWrittenPageLSN)
6077-
XLogCtl->lastWrittenPageLSN = lsn;
6078-
SpinLockRelease(&XLogCtl->info_lck);
6163+
if (lsn == InvalidXLogRecPtr)
6164+
return;
6165+
6166+
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
6167+
if (rnode.relNode == InvalidOid)
6168+
{
6169+
if (lsn > XLogCtl->maxLastWrittenLsn)
6170+
XLogCtl->maxLastWrittenLsn = lsn;
6171+
}
6172+
else
6173+
{
6174+
LastWrittenLsnCacheEntry* entry;
6175+
BufferTag key;
6176+
bool found;
6177+
BlockNumber bucket;
6178+
6179+
key.rnode = rnode;
6180+
key.forkNum = forknum;
6181+
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
6182+
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
6183+
bucket++)
6184+
{
6185+
key.blockNum = bucket;
6186+
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
6187+
if (found)
6188+
{
6189+
if (lsn > entry->lsn)
6190+
entry->lsn = lsn;
6191+
/* Unlink from LRU list */
6192+
dlist_delete(&entry->lru_node);
6193+
}
6194+
else
6195+
{
6196+
entry->lsn = lsn;
6197+
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
6198+
{
6199+
/* Replace least recently used entry */
6200+
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
6201+
/* Adjust max LSN for not cached relations/chunks if needed */
6202+
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
6203+
XLogCtl->maxLastWrittenLsn = victim->lsn;
6204+
6205+
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
6206+
}
6207+
}
6208+
/* Link to the end of LRU list */
6209+
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
6210+
}
6211+
}
6212+
LWLockRelease(LastWrittenLsnLock);
6213+
}
6214+
6215+
/*
6216+
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
6217+
*/
6218+
void
6219+
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
6220+
{
6221+
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
6222+
}
6223+
6224+
/*
6225+
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
6226+
*/
6227+
void
6228+
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
6229+
{
6230+
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
6231+
}
6232+
6233+
/*
6234+
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
6235+
*/
6236+
void
6237+
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
6238+
{
6239+
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
6240+
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
60796241
}
60806242

60816243
void
@@ -6326,7 +6488,7 @@ LogCheckpointEnd(bool restartpoint)
63266488
average_sync_time = 0;
63276489
if (CheckpointStats.ckpt_sync_rels > 0)
63286490
average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6329-
CheckpointStats.ckpt_sync_rels;
6491+
CheckpointStats.ckpt_sync_rels;
63306492
average_msecs = (long) ((average_sync_time + 999) / 1000);
63316493

63326494
if (restartpoint)

0 commit comments

Comments
 (0)