Skip to content

Commit 3bdb9bb

Browse files
knizhniktristan957
authored andcommitted
Merge last written cache lsn with new main branch (#201)
1 parent 65fdd36 commit 3bdb9bb

File tree

8 files changed

+244
-28
lines changed

8 files changed

+244
-28
lines changed

src/backend/access/gin/gininsert.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
421421
log_newpage_range(index, MAIN_FORKNUM,
422422
0, RelationGetNumberOfBlocks(index),
423423
true);
424+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
425+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
424426
}
425-
SetLastWrittenPageLSN(XactLastRecEnd);
426427

427428
smgr_end_unlogged_build(index->rd_smgr);
428429

src/backend/access/gist/gistbuild.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
342342
log_newpage_range(index, MAIN_FORKNUM,
343343
0, RelationGetNumberOfBlocks(index),
344344
true);
345+
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
346+
index->rd_smgr->smgr_rnode.node,
347+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
348+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
345349
}
346-
SetLastWrittenPageLSN(XactLastRecEnd);
347-
348350
smgr_end_unlogged_build(index->rd_smgr);
349351
}
350352

@@ -475,7 +477,9 @@ gist_indexsortbuild(GISTBuildState *state)
475477

476478
lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
477479
levelstate->pages[0], true);
478-
SetLastWrittenPageLSN(lsn);
480+
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
481+
MAIN_FORKNUM, GIST_ROOT_BLKNO);
482+
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
479483
}
480484

481485
pfree(levelstate->pages[0]);

src/backend/access/spgist/spginsert.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
143143
log_newpage_range(index, MAIN_FORKNUM,
144144
0, RelationGetNumberOfBlocks(index),
145145
true);
146+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
147+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
148+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
146149
}
147-
SetLastWrittenPageLSN(XactLastRecEnd);
148150

149151
smgr_end_unlogged_build(index->rd_smgr);
150152

src/backend/access/transam/xlog.c

+181-19
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
#include "replication/walreceiver.h"
8686
#include "replication/walsender.h"
8787
#include "storage/bufmgr.h"
88+
#include "storage/buf_internals.h"
8889
#include "storage/fd.h"
8990
#include "storage/ipc.h"
9091
#include "storage/large_object.h"
@@ -137,6 +138,7 @@ int max_slot_wal_keep_size_mb = -1;
137138
int wal_decode_buffer_size = 512 * 1024;
138139
bool track_wal_io_timing = false;
139140
uint64 predefined_sysidentifier;
141+
int lastWrittenLsnCacheSize;
140142

141143
#ifdef WAL_DEBUG
142144
bool XLOG_DEBUG = false;
@@ -199,6 +201,25 @@ const struct config_enum_entry archive_mode_options[] = {
199201
{NULL, 0, false}
200202
};
201203

204+
typedef struct LastWrittenLsnCacheEntry
205+
{
206+
BufferTag key;
207+
XLogRecPtr lsn;
208+
/* double linked list for LRU replacement algorithm */
209+
dlist_node lru_node;
210+
} LastWrittenLsnCacheEntry;
211+
212+
213+
/*
214+
* Cache of last written LSN for each relation chunk (hash bucket).
215+
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
216+
* relation metadata update.
217+
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
218+
* pages are replaced using LRU algorithm, based on L2-list.
219+
* Access to this cache is protected by 'LastWrittenLsnLock'.
220+
*/
221+
static HTAB *lastWrittenLsnCache;
222+
202223
/*
203224
* Statistics for current checkpoint are collected in this global struct.
204225
* Because only the checkpointer or a stand-alone backend can perform
@@ -552,7 +573,17 @@ typedef struct XLogCtlData
552573
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
553574
*/
554575
XLogRecPtr lastFpwDisableRecPtr;
555-
XLogRecPtr lastWrittenPageLSN;
576+
577+
/*
578+
* Maximal last written LSN for pages not present in lastWrittenLsnCache
579+
*/
580+
XLogRecPtr maxLastWrittenLsn;
581+
582+
/*
583+
* Double linked list to implement LRU replacement policy for last written LSN cache.
584+
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
585+
*/
586+
dlist_head lastWrittenLsnLRU;
556587

557588
/* neon: copy of startup's RedoStartLSN for walproposer's use */
558589
XLogRecPtr RedoStartLSN;
@@ -575,6 +606,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
575606
*/
576607
static ControlFileData *ControlFile = NULL;
577608

609+
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
610+
578611
/*
579612
* Calculate the amount of space left on the page after 'endptr'. Beware
580613
* multiple evaluation!
@@ -4355,11 +4388,8 @@ LocalProcessControlFile(bool reset)
43554388
ReadControlFile();
43564389
}
43574390

4358-
/*
4359-
* Initialization of shared memory for XLOG
4360-
*/
4361-
Size
4362-
XLOGShmemSize(void)
4391+
static Size
4392+
XLOGCtlShmemSize(void)
43634393
{
43644394
Size size;
43654395

@@ -4408,6 +4438,16 @@ XLOGShmemSize(void)
44084438
return size;
44094439
}
44104440

4441+
/*
4442+
* Initialization of shared memory for XLOG
4443+
*/
4444+
Size
4445+
XLOGShmemSize(void)
4446+
{
4447+
return XLOGCtlShmemSize() +
4448+
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
4449+
}
4450+
44114451
void
44124452
XLOGShmemInit(void)
44134453
{
@@ -4437,6 +4477,15 @@ XLOGShmemInit(void)
44374477
XLogCtl = (XLogCtlData *)
44384478
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
44394479

4480+
{
4481+
static HASHCTL info;
4482+
info.keysize = sizeof(BufferTag);
4483+
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
4484+
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
4485+
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
4486+
&info,
4487+
HASH_ELEM | HASH_BLOBS);
4488+
}
44404489
localControlFile = ControlFile;
44414490
ControlFile = (ControlFileData *)
44424491
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
@@ -5623,7 +5672,8 @@ StartupXLOG(void)
56235672

56245673
XLogCtl->LogwrtRqst.Write = EndOfLog;
56255674
XLogCtl->LogwrtRqst.Flush = EndOfLog;
5626-
XLogCtl->lastWrittenPageLSN = EndOfLog;
5675+
XLogCtl->maxLastWrittenLsn = EndOfLog;
5676+
dlist_init(&XLogCtl->lastWrittenLsnLRU);
56275677

56285678
/*
56295679
* Preallocate additional log files, if wanted.
@@ -6051,29 +6101,141 @@ GetInsertRecPtr(void)
60516101
}
60526102

60536103
/*
6054-
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
6104+
* GetLastWrittenLSN -- Returns maximal LSN of written page.
6105+
* It returns an upper bound for the last written LSN of a given page,
6106+
* either from a cached last written LSN or a global maximum last written LSN.
6107+
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
6108+
* If cache is large enough ,iterting through all hash items may be rather expensive.
6109+
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
60556110
*/
60566111
XLogRecPtr
6057-
GetLastWrittenPageLSN(void)
6112+
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
60586113
{
60596114
XLogRecPtr lsn;
6060-
SpinLockAcquire(&XLogCtl->info_lck);
6061-
lsn = XLogCtl->lastWrittenPageLSN;
6062-
SpinLockRelease(&XLogCtl->info_lck);
6115+
LastWrittenLsnCacheEntry* entry;
6116+
6117+
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
6118+
6119+
/* Maximal last written LSN among all non-cached pages */
6120+
lsn = XLogCtl->maxLastWrittenLsn;
6121+
6122+
if (rnode.relNode != InvalidOid)
6123+
{
6124+
BufferTag key;
6125+
key.rnode = rnode;
6126+
key.forkNum = forknum;
6127+
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
6128+
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
6129+
if (entry != NULL)
6130+
lsn = entry->lsn;
6131+
}
6132+
else
6133+
{
6134+
HASH_SEQ_STATUS seq;
6135+
/* Find maximum of all cached LSNs */
6136+
hash_seq_init(&seq, lastWrittenLsnCache);
6137+
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
6138+
{
6139+
if (entry->lsn > lsn)
6140+
lsn = entry->lsn;
6141+
}
6142+
}
6143+
LWLockRelease(LastWrittenLsnLock);
60636144

60646145
return lsn;
60656146
}
60666147

60676148
/*
6068-
* SetLastWrittenPageLSN -- Set maximal LSN of written page
6149+
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
6150+
* We maintain cache of last written LSNs with limited size and LRU replacement
6151+
* policy. To reduce cache size we store max LSN not for each page, but for
6152+
* bucket (1024 blocks). This cache allows to use old LSN when
6153+
* requesting pages of unchanged or appended relations.
6154+
*
6155+
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
6156+
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
60696157
*/
60706158
void
6071-
SetLastWrittenPageLSN(XLogRecPtr lsn)
6159+
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
60726160
{
6073-
SpinLockAcquire(&XLogCtl->info_lck);
6074-
if (lsn > XLogCtl->lastWrittenPageLSN)
6075-
XLogCtl->lastWrittenPageLSN = lsn;
6076-
SpinLockRelease(&XLogCtl->info_lck);
6161+
if (lsn == InvalidXLogRecPtr)
6162+
return;
6163+
6164+
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
6165+
if (rnode.relNode == InvalidOid)
6166+
{
6167+
if (lsn > XLogCtl->maxLastWrittenLsn)
6168+
XLogCtl->maxLastWrittenLsn = lsn;
6169+
}
6170+
else
6171+
{
6172+
LastWrittenLsnCacheEntry* entry;
6173+
BufferTag key;
6174+
bool found;
6175+
BlockNumber bucket;
6176+
6177+
key.rnode = rnode;
6178+
key.forkNum = forknum;
6179+
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
6180+
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
6181+
bucket++)
6182+
{
6183+
key.blockNum = bucket;
6184+
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
6185+
if (found)
6186+
{
6187+
if (lsn > entry->lsn)
6188+
entry->lsn = lsn;
6189+
/* Unlink from LRU list */
6190+
dlist_delete(&entry->lru_node);
6191+
}
6192+
else
6193+
{
6194+
entry->lsn = lsn;
6195+
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
6196+
{
6197+
/* Replace least recently used entry */
6198+
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
6199+
/* Adjust max LSN for not cached relations/chunks if needed */
6200+
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
6201+
XLogCtl->maxLastWrittenLsn = victim->lsn;
6202+
6203+
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
6204+
}
6205+
}
6206+
/* Link to the end of LRU list */
6207+
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
6208+
}
6209+
}
6210+
LWLockRelease(LastWrittenLsnLock);
6211+
}
6212+
6213+
/*
6214+
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
6215+
*/
6216+
void
6217+
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
6218+
{
6219+
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
6220+
}
6221+
6222+
/*
6223+
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
6224+
*/
6225+
void
6226+
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
6227+
{
6228+
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
6229+
}
6230+
6231+
/*
6232+
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
6233+
*/
6234+
void
6235+
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
6236+
{
6237+
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
6238+
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
60776239
}
60786240

60796241
void
@@ -6324,7 +6486,7 @@ LogCheckpointEnd(bool restartpoint)
63246486
average_sync_time = 0;
63256487
if (CheckpointStats.ckpt_sync_rels > 0)
63266488
average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6327-
CheckpointStats.ckpt_sync_rels;
6489+
CheckpointStats.ckpt_sync_rels;
63286490
average_msecs = (long) ((average_sync_time + 999) / 1000);
63296491

63306492
if (restartpoint)

0 commit comments

Comments
 (0)