Skip to content

Commit cd64f21

Browse files
knizhniktristan957
authored andcommitted
Maintain last written LSN for each page to enable prefetch on vacuum,… (#245)
* Maintain last written LSN for each page to enable prefetch on vacuum, delete and other massive update operations * Move PageSetLSN in heap_xlog_visible before MarkBufferDirty
1 parent d2b3b09 commit cd64f21

File tree

3 files changed

+21
-21
lines changed

3 files changed

+21
-21
lines changed

src/backend/access/heap/heapam.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -8986,8 +8986,16 @@ heap_xlog_visible(XLogReaderState *record)
89868986

89878987
PageSetAllVisible(page);
89888988

8989-
if (XLogHintBitIsNeeded())
8990-
PageSetLSN(page, lsn);
8989+
/*
8990+
* NEON: despite to the comment above we need to update page LSN here.
8991+
* See discussion at hackers: https://www.postgresql.org/message-id/flat/039076d4f6cdd871691686361f83cb8a6913a86a.camel%40j-davis.com#101ba42b004f9988e3d54fce26fb3462
8992+
* For Neon this assignment is critical because otherwise last written LSN tracked at compute doesn't
8993+
* match with page LSN assignee by WAL-redo and as a result, prefetched page is rejected.
8994+
*
8995+
* It is fixed in upstream in https://github.com/neondatabase/postgres/commit/7bf713dd2d0739fbcd4103971ed69c17ebe677ea
8996+
* but until it is merged we still need to carry a patch here.
8997+
*/
8998+
PageSetLSN(page, lsn);
89918999

89929000
MarkBufferDirty(buffer);
89939001
}

src/backend/access/transam/xlog.c

+9-17
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ typedef struct LastWrittenLsnCacheEntry
211211

212212

213213
/*
214-
* Cache of last written LSN for each relation chunk (hash bucket).
214+
* Cache of last written LSN for each relation page.
215215
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
216216
* relation metadata update.
217217
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
@@ -606,8 +606,6 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
606606
*/
607607
static ControlFileData *ControlFile = NULL;
608608

609-
#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */
610-
611609
/*
612610
* Calculate the amount of space left on the page after 'endptr'. Beware
613611
* multiple evaluation!
@@ -6104,7 +6102,7 @@ GetInsertRecPtr(void)
61046102
* It returns an upper bound for the last written LSN of a given page,
61056103
* either from a cached last written LSN or a global maximum last written LSN.
61066104
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
6107-
* If cache is large enough ,iterting through all hash items may be rather expensive.
6105+
* If cache is large enough, iterating through all hash items may be rather expensive.
61086106
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
61096107
*/
61106108
XLogRecPtr
@@ -6123,7 +6121,7 @@ GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
61236121
BufferTag key;
61246122
key.rnode = rnode;
61256123
key.forkNum = forknum;
6126-
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
6124+
key.blockNum = blkno;
61276125
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
61286126
if (entry != NULL)
61296127
lsn = entry->lsn;
@@ -6147,9 +6145,9 @@ GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
61476145
/*
61486146
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
61496147
* We maintain cache of last written LSNs with limited size and LRU replacement
6150-
* policy. To reduce cache size we store max LSN not for each page, but for
6151-
* bucket (1024 blocks). This cache allows to use old LSN when
6152-
* requesting pages of unchanged or appended relations.
6148+
* policy. Keeping last written LSN for each page allows to use old LSN when
6149+
* requesting pages of unchanged or appended relations. Also it is critical for
6150+
* efficient work of prefetch in case massive update operations (like vacuum or remove).
61536151
*
61546152
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
61556153
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
@@ -6171,19 +6169,13 @@ SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber for
61716169
LastWrittenLsnCacheEntry* entry;
61726170
BufferTag key;
61736171
bool found;
6174-
BlockNumber bucket;
6175-
BlockNumber start_bucket; /* inclusive */
6176-
BlockNumber end_bucket; /* exclusive */
6177-
6178-
start_bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
6179-
end_bucket = from == REL_METADATA_PSEUDO_BLOCKNO
6180-
? start_bucket + 1 : (from + n_blocks + LAST_WRITTEN_LSN_CACHE_BUCKET - 1) / LAST_WRITTEN_LSN_CACHE_BUCKET;
6172+
BlockNumber i;
61816173

61826174
key.rnode = rnode;
61836175
key.forkNum = forknum;
6184-
for (bucket = start_bucket; bucket < end_bucket; bucket++)
6176+
for (i = 0; i < n_blocks; i++)
61856177
{
6186-
key.blockNum = bucket;
6178+
key.blockNum = from + i;
61876179
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
61886180
if (found)
61896181
{

src/backend/utils/misc/guc.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -2440,11 +2440,11 @@ static struct config_int ConfigureNamesInt[] =
24402440

24412441
{
24422442
{"lsn_cache_size", PGC_POSTMASTER, UNGROUPED,
2443-
gettext_noop("Size of las written LSN cache used by Neon."),
2443+
gettext_noop("Size of last written LSN cache used by Neon."),
24442444
NULL
24452445
},
24462446
&lastWrittenLsnCacheSize,
2447-
1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */
2447+
128*1024, 1024, INT_MAX,
24482448
NULL, NULL, NULL
24492449
},
24502450

0 commit comments

Comments
 (0)