Skip to content

Commit b4cffc1

Browse files
knizhniklubennikovaav
authored andcommitted
Merge last written cache lsn with new main branch (#201)
1 parent d486d71 commit b4cffc1

File tree

8 files changed

+244
-28
lines changed

8 files changed

+244
-28
lines changed

src/backend/access/gin/gininsert.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
421421
log_newpage_range(index, MAIN_FORKNUM,
422422
0, RelationGetNumberOfBlocks(index),
423423
true);
424+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
425+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
424426
}
425-
SetLastWrittenPageLSN(XactLastRecEnd);
426427

427428
smgr_end_unlogged_build(index->rd_smgr);
428429

src/backend/access/gist/gistbuild.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,11 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
342342
log_newpage_range(index, MAIN_FORKNUM,
343343
0, RelationGetNumberOfBlocks(index),
344344
true);
345+
SetLastWrittenLSNForBlockRange(XactLastRecEnd,
346+
index->rd_smgr->smgr_rnode.node,
347+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
348+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
345349
}
346-
SetLastWrittenPageLSN(XactLastRecEnd);
347-
348350
smgr_end_unlogged_build(index->rd_smgr);
349351
}
350352

@@ -475,7 +477,9 @@ gist_indexsortbuild(GISTBuildState *state)
475477

476478
lsn = log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
477479
levelstate->pages[0], true);
478-
SetLastWrittenPageLSN(lsn);
480+
SetLastWrittenLSNForBlock(lsn, state->indexrel->rd_smgr->smgr_rnode.node,
481+
MAIN_FORKNUM, GIST_ROOT_BLKNO);
482+
SetLastWrittenLSNForRelation(lsn, state->indexrel->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
479483
}
480484

481485
pfree(levelstate->pages[0]);

src/backend/access/spgist/spginsert.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
143143
log_newpage_range(index, MAIN_FORKNUM,
144144
0, RelationGetNumberOfBlocks(index),
145145
true);
146+
SetLastWrittenLSNForBlockRange(XactLastRecEnd, index->rd_smgr->smgr_rnode.node,
147+
MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
148+
SetLastWrittenLSNForRelation(XactLastRecEnd, index->rd_smgr->smgr_rnode.node, MAIN_FORKNUM);
146149
}
147-
SetLastWrittenPageLSN(XactLastRecEnd);
148150

149151
smgr_end_unlogged_build(index->rd_smgr);
150152

src/backend/access/transam/xlog.c

+181-19
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
#include "replication/walreceiver.h"
8686
#include "replication/walsender.h"
8787
#include "storage/bufmgr.h"
88+
#include "storage/buf_internals.h"
8889
#include "storage/fd.h"
8990
#include "storage/ipc.h"
9091
#include "storage/large_object.h"
@@ -137,6 +138,7 @@ int max_slot_wal_keep_size_mb = -1;
137138
int wal_decode_buffer_size = 512 * 1024;
138139
bool track_wal_io_timing = false;
139140
uint64 predefined_sysidentifier;
141+
int lastWrittenLsnCacheSize;
140142

141143
#ifdef WAL_DEBUG
142144
bool XLOG_DEBUG = false;
@@ -199,6 +201,25 @@ const struct config_enum_entry archive_mode_options[] = {
199201
{NULL, 0, false}
200202
};
201203

204+
typedef struct LastWrittenLsnCacheEntry
205+
{
206+
BufferTag key;
207+
XLogRecPtr lsn;
208+
/* double linked list for LRU replacement algorithm */
209+
dlist_node lru_node;
210+
} LastWrittenLsnCacheEntry;
211+
212+
213+
/*
214+
* Cache of last written LSN for each relation chunk (hash bucket).
215+
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
216+
* relation metadata update.
217+
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
218+
* pages are replaced using LRU algorithm, based on L2-list.
219+
* Access to this cache is protected by 'LastWrittenLsnLock'.
220+
*/
221+
static HTAB *lastWrittenLsnCache;
222+
202223
/*
203224
* Statistics for current checkpoint are collected in this global struct.
204225
* Because only the checkpointer or a stand-alone backend can perform
@@ -552,7 +573,17 @@ typedef struct XLogCtlData
552573
* XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
553574
*/
554575
XLogRecPtr lastFpwDisableRecPtr;
555-
XLogRecPtr lastWrittenPageLSN;
576+
577+
/*
578+
* Maximal last written LSN for pages not present in lastWrittenLsnCache
579+
*/
580+
XLogRecPtr maxLastWrittenLsn;
581+
582+
/*
583+
* Double linked list to implement LRU replacement policy for last written LSN cache.
584+
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
585+
*/
586+
dlist_head lastWrittenLsnLRU;
556587

557588
/* neon: copy of startup's RedoStartLSN for walproposer's use */
558589
XLogRecPtr RedoStartLSN;
@@ -575,6 +606,8 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
575606
*/
576607
static ControlFileData *ControlFile = NULL;
577608

609+
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
610+
578611
/*
579612
* Calculate the amount of space left on the page after 'endptr'. Beware
580613
* multiple evaluation!
@@ -4355,11 +4388,8 @@ LocalProcessControlFile(bool reset)
43554388
ReadControlFile();
43564389
}
43574390

4358-
/*
4359-
* Initialization of shared memory for XLOG
4360-
*/
4361-
Size
4362-
XLOGShmemSize(void)
4391+
static Size
4392+
XLOGCtlShmemSize(void)
43634393
{
43644394
Size size;
43654395

@@ -4408,6 +4438,16 @@ XLOGShmemSize(void)
44084438
return size;
44094439
}
44104440

4441+
/*
4442+
* Initialization of shared memory for XLOG
4443+
*/
4444+
Size
4445+
XLOGShmemSize(void)
4446+
{
4447+
return XLOGCtlShmemSize() +
4448+
hash_estimate_size(lastWrittenLsnCacheSize, sizeof(LastWrittenLsnCacheEntry));
4449+
}
4450+
44114451
void
44124452
XLOGShmemInit(void)
44134453
{
@@ -4437,6 +4477,15 @@ XLOGShmemInit(void)
44374477
XLogCtl = (XLogCtlData *)
44384478
ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
44394479

4480+
{
4481+
static HASHCTL info;
4482+
info.keysize = sizeof(BufferTag);
4483+
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
4484+
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
4485+
lastWrittenLsnCacheSize, lastWrittenLsnCacheSize,
4486+
&info,
4487+
HASH_ELEM | HASH_BLOBS);
4488+
}
44404489
localControlFile = ControlFile;
44414490
ControlFile = (ControlFileData *)
44424491
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
@@ -5623,7 +5672,8 @@ StartupXLOG(void)
56235672

56245673
XLogCtl->LogwrtRqst.Write = EndOfLog;
56255674
XLogCtl->LogwrtRqst.Flush = EndOfLog;
5626-
XLogCtl->lastWrittenPageLSN = EndOfLog;
5675+
XLogCtl->maxLastWrittenLsn = EndOfLog;
5676+
dlist_init(&XLogCtl->lastWrittenLsnLRU);
56275677

56285678
/*
56295679
* Preallocate additional log files, if wanted.
@@ -6047,29 +6097,141 @@ GetInsertRecPtr(void)
60476097
}
60486098

60496099
/*
6050-
* GetLastWrittenPageLSN -- Returns maximal LSN of written page
6100+
* GetLastWrittenLSN -- Returns maximal LSN of written page.
6101+
* It returns an upper bound for the last written LSN of a given page,
6102+
* either from a cached last written LSN or a global maximum last written LSN.
6103+
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
6104+
* If cache is large enough ,iterting through all hash items may be rather expensive.
6105+
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
60516106
*/
60526107
XLogRecPtr
6053-
GetLastWrittenPageLSN(void)
6108+
GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
60546109
{
60556110
XLogRecPtr lsn;
6056-
SpinLockAcquire(&XLogCtl->info_lck);
6057-
lsn = XLogCtl->lastWrittenPageLSN;
6058-
SpinLockRelease(&XLogCtl->info_lck);
6111+
LastWrittenLsnCacheEntry* entry;
6112+
6113+
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
6114+
6115+
/* Maximal last written LSN among all non-cached pages */
6116+
lsn = XLogCtl->maxLastWrittenLsn;
6117+
6118+
if (rnode.relNode != InvalidOid)
6119+
{
6120+
BufferTag key;
6121+
key.rnode = rnode;
6122+
key.forkNum = forknum;
6123+
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
6124+
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
6125+
if (entry != NULL)
6126+
lsn = entry->lsn;
6127+
}
6128+
else
6129+
{
6130+
HASH_SEQ_STATUS seq;
6131+
/* Find maximum of all cached LSNs */
6132+
hash_seq_init(&seq, lastWrittenLsnCache);
6133+
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
6134+
{
6135+
if (entry->lsn > lsn)
6136+
lsn = entry->lsn;
6137+
}
6138+
}
6139+
LWLockRelease(LastWrittenLsnLock);
60596140

60606141
return lsn;
60616142
}
60626143

60636144
/*
6064-
* SetLastWrittenPageLSN -- Set maximal LSN of written page
6145+
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
6146+
* We maintain cache of last written LSNs with limited size and LRU replacement
6147+
* policy. To reduce cache size we store max LSN not for each page, but for
6148+
* bucket (1024 blocks). This cache allows to use old LSN when
6149+
* requesting pages of unchanged or appended relations.
6150+
*
6151+
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
6152+
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
60656153
*/
60666154
void
6067-
SetLastWrittenPageLSN(XLogRecPtr lsn)
6155+
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber from, BlockNumber till)
60686156
{
6069-
SpinLockAcquire(&XLogCtl->info_lck);
6070-
if (lsn > XLogCtl->lastWrittenPageLSN)
6071-
XLogCtl->lastWrittenPageLSN = lsn;
6072-
SpinLockRelease(&XLogCtl->info_lck);
6157+
if (lsn == InvalidXLogRecPtr)
6158+
return;
6159+
6160+
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
6161+
if (rnode.relNode == InvalidOid)
6162+
{
6163+
if (lsn > XLogCtl->maxLastWrittenLsn)
6164+
XLogCtl->maxLastWrittenLsn = lsn;
6165+
}
6166+
else
6167+
{
6168+
LastWrittenLsnCacheEntry* entry;
6169+
BufferTag key;
6170+
bool found;
6171+
BlockNumber bucket;
6172+
6173+
key.rnode = rnode;
6174+
key.forkNum = forknum;
6175+
for (bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
6176+
bucket <= till / LAST_WRITTEN_LSN_CACHE_BUCKET;
6177+
bucket++)
6178+
{
6179+
key.blockNum = bucket;
6180+
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
6181+
if (found)
6182+
{
6183+
if (lsn > entry->lsn)
6184+
entry->lsn = lsn;
6185+
/* Unlink from LRU list */
6186+
dlist_delete(&entry->lru_node);
6187+
}
6188+
else
6189+
{
6190+
entry->lsn = lsn;
6191+
if (hash_get_num_entries(lastWrittenLsnCache) > lastWrittenLsnCacheSize)
6192+
{
6193+
/* Replace least recently used entry */
6194+
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&XLogCtl->lastWrittenLsnLRU));
6195+
/* Adjust max LSN for not cached relations/chunks if needed */
6196+
if (victim->lsn > XLogCtl->maxLastWrittenLsn)
6197+
XLogCtl->maxLastWrittenLsn = victim->lsn;
6198+
6199+
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
6200+
}
6201+
}
6202+
/* Link to the end of LRU list */
6203+
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
6204+
}
6205+
}
6206+
LWLockRelease(LastWrittenLsnLock);
6207+
}
6208+
6209+
/*
6210+
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
6211+
*/
6212+
void
6213+
SetLastWrittenLSNForBlock(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
6214+
{
6215+
SetLastWrittenLSNForBlockRange(lsn, rnode, forknum, blkno, blkno);
6216+
}
6217+
6218+
/*
6219+
* SetLastWrittenLSNForRelation -- Set maximal LSN for relation metadata
6220+
*/
6221+
void
6222+
SetLastWrittenLSNForRelation(XLogRecPtr lsn, RelFileNode rnode, ForkNumber forknum)
6223+
{
6224+
SetLastWrittenLSNForBlock(lsn, rnode, forknum, REL_METADATA_PSEUDO_BLOCKNO);
6225+
}
6226+
6227+
/*
6228+
* SetLastWrittenLSNForDatabase -- Set maximal LSN for the whole database
6229+
*/
6230+
void
6231+
SetLastWrittenLSNForDatabase(XLogRecPtr lsn)
6232+
{
6233+
RelFileNode dummyNode = {InvalidOid, InvalidOid, InvalidOid};
6234+
SetLastWrittenLSNForBlock(lsn, dummyNode, MAIN_FORKNUM, 0);
60736235
}
60746236

60756237
void
@@ -6320,7 +6482,7 @@ LogCheckpointEnd(bool restartpoint)
63206482
average_sync_time = 0;
63216483
if (CheckpointStats.ckpt_sync_rels > 0)
63226484
average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6323-
CheckpointStats.ckpt_sync_rels;
6485+
CheckpointStats.ckpt_sync_rels;
63246486
average_msecs = (long) ((average_sync_time + 999) / 1000);
63256487

63266488
if (restartpoint)

0 commit comments

Comments
 (0)