Skip to content

Commit ad5e789

Browse files
knizhnikMMeent
authored andcommitted
Unlogged index fix v15 (#262)
* Avoid errors when accessing indexes of unlogge tables after compute restart * Support unlogged sequences * Extract sequence start value from pg_sequence * Initialize unlogged index undex eclusive lock
1 parent ba841dd commit ad5e789

File tree

3 files changed

+97
-19
lines changed

3 files changed

+97
-19
lines changed

src/backend/commands/sequence.c

+37-16
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */
9898
static SeqTableData *last_used_seq = NULL;
9999

100100
static void fill_seq_with_data(Relation rel, HeapTuple tuple);
101-
static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum);
101+
static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum, Buffer buf);
102102
static Relation lock_and_open_sequence(SeqTable seq);
103103
static void create_seq_hashtable(void);
104104
static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel);
@@ -351,7 +351,7 @@ ResetSequence(Oid seq_relid)
351351
static void
352352
fill_seq_with_data(Relation rel, HeapTuple tuple)
353353
{
354-
fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM);
354+
fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM, InvalidBuffer);
355355

356356
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
357357
{
@@ -360,7 +360,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
360360
srel = smgropen(rel->rd_node, InvalidBackendId, rel->rd_rel->relpersistence);
361361
smgrcreate(srel, INIT_FORKNUM, false);
362362
log_smgrcreate(&rel->rd_node, INIT_FORKNUM);
363-
fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
363+
fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM, InvalidBuffer);
364364
FlushRelationBuffers(rel);
365365
smgrclose(srel);
366366
}
@@ -370,28 +370,28 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
370370
* Initialize a sequence's relation fork with the specified tuple as content
371371
*/
372372
static void
373-
fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
373+
fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum, Buffer buf)
374374
{
375-
Buffer buf;
376375
Page page;
377376
sequence_magic *sm;
378377
OffsetNumber offnum;
378+
bool lockBuffer = false;
379379

380380
/* Initialize first page of relation with special magic number */
381-
382-
buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
383-
Assert(BufferGetBlockNumber(buf) == 0);
384-
381+
if (buf == InvalidBuffer)
382+
{
383+
buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
384+
Assert(BufferGetBlockNumber(buf) == 0);
385+
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
386+
lockBuffer = true;
387+
}
385388
page = BufferGetPage(buf);
386-
387389
PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
388390
sm = (sequence_magic *) PageGetSpecialPointer(page);
389391
sm->magic = SEQ_MAGIC;
390392

391393
/* Now insert sequence tuple */
392394

393-
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
394-
395395
/*
396396
* Since VACUUM does not process sequences, we have to force the tuple to
397397
* have xmin = FrozenTransactionId now. Otherwise it would become
@@ -440,7 +440,8 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
440440

441441
END_CRIT_SECTION();
442442

443-
UnlockReleaseBuffer(buf);
443+
if (lockBuffer)
444+
UnlockReleaseBuffer(buf);
444445
}
445446

446447
/*
@@ -1215,9 +1216,29 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
12151216
sm = (sequence_magic *) PageGetSpecialPointer(page);
12161217

12171218
if (sm->magic != SEQ_MAGIC)
1218-
elog(ERROR, "bad magic number in sequence \"%s\": %08X",
1219-
RelationGetRelationName(rel), sm->magic);
1220-
1219+
{
1220+
/* NEON: reinitialize unlogged sequence */
1221+
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
1222+
{
1223+
Datum value[SEQ_COL_LASTCOL] = {0};
1224+
bool null[SEQ_COL_LASTCOL] = {false};
1225+
HeapTuple tuple;
1226+
Form_pg_sequence pgsform;
1227+
1228+
tuple = SearchSysCache1(SEQRELID, RelationGetRelid(rel));
1229+
if (!HeapTupleIsValid(tuple))
1230+
elog(ERROR, "cache lookup failed for sequence %u", RelationGetRelid(rel));
1231+
pgsform = (Form_pg_sequence) GETSTRUCT(tuple);
1232+
value[SEQ_COL_LASTVAL-1] = Int64GetDatumFast(pgsform->seqstart);
1233+
ReleaseSysCache(tuple);
1234+
1235+
tuple = heap_form_tuple(RelationGetDescr(rel), value, null);
1236+
fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM, *buf);
1237+
}
1238+
else
1239+
elog(ERROR, "bad magic number in sequence \"%s\": %08X",
1240+
RelationGetRelationName(rel), sm->magic);
1241+
}
12211242
lp = PageGetItemId(page, FirstOffsetNumber);
12221243
Assert(ItemIdIsNormal(lp));
12231244

src/backend/optimizer/util/plancat.c

+37-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "access/xlog.h"
2828
#include "catalog/catalog.h"
2929
#include "catalog/heap.h"
30+
#include "catalog/index.h"
3031
#include "catalog/pg_am.h"
3132
#include "catalog/pg_proc.h"
3233
#include "catalog/pg_statistic_ext.h"
@@ -47,6 +48,8 @@
4748
#include "rewrite/rewriteManip.h"
4849
#include "statistics/statistics.h"
4950
#include "storage/bufmgr.h"
51+
#include "storage/buf_internals.h"
52+
#include "storage/lmgr.h"
5053
#include "utils/builtins.h"
5154
#include "utils/lsyscache.h"
5255
#include "utils/partcache.h"
@@ -81,6 +84,39 @@ static void set_baserel_partition_key_exprs(Relation relation,
8184
static void set_baserel_partition_constraint(Relation relation,
8285
RelOptInfo *rel);
8386

87+
static bool
88+
is_index_valid(Relation index, LOCKMODE lmode)
89+
{
90+
if (!index->rd_index->indisvalid)
91+
return false;
92+
93+
if (index->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
94+
{
95+
while (true)
96+
{
97+
Buffer metapage = ReadBuffer(index, 0);
98+
bool isNew = PageIsNew(BufferGetPage(metapage));
99+
ReleaseBuffer(metapage);
100+
if (isNew)
101+
{
102+
Relation heap;
103+
if (lmode != ExclusiveLock)
104+
{
105+
UnlockRelation(index, lmode);
106+
LockRelation(index, ExclusiveLock);
107+
lmode = ExclusiveLock;
108+
continue;
109+
}
110+
DropRelFileNodesAllBuffers(&index->rd_smgr, 1);
111+
heap = RelationIdGetRelation(index->rd_index->indrelid);
112+
index->rd_indam->ambuild(heap, index, BuildIndexInfo(index));
113+
RelationClose(heap);
114+
}
115+
break;
116+
}
117+
}
118+
return true;
119+
}
84120

85121
/*
86122
* get_relation_info -
@@ -224,7 +260,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
224260
* still needs to insert into "invalid" indexes, if they're marked
225261
* indisready.
226262
*/
227-
if (!index->indisvalid)
263+
if (!is_index_valid(indexRelation, lmode))
228264
{
229265
index_close(indexRelation, NoLock);
230266
continue;

src/backend/storage/smgr/md.c

+23-2
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,13 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
525525

526526
fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
527527

528+
/*
529+
* NEON: unlogged relation files are lost after compute restart - we need to implicitly recreate them
530+
* to allow data insertion
531+
*/
532+
if (fd < 0 && (behavior & EXTENSION_CREATE))
533+
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | PG_BINARY);
534+
528535
if (fd < 0)
529536
{
530537
if ((behavior & EXTENSION_RETURN_NULL) &&
@@ -689,9 +696,23 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
689696
reln->smgr_rnode.node.relNode,
690697
reln->smgr_rnode.backend);
691698

699+
/* NEON: md smgr is used in Neon for unlogged and temp relations.
700+
* After compute node restart their data is deleted but unlogged tables are still present in system catalog.
701+
* This is a difference with Vanilla Postgres where unlogged relations are truncated only after abnormal termination.
702+
* To avoid "could not open file" we have to use EXTENSION_RETURN_NULL hear instead of EXTENSION_FAIL
703+
*/
692704
v = _mdfd_getseg(reln, forknum, blocknum, false,
693-
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
694-
705+
RelFileNodeBackendIsTemp(reln->smgr_rnode)
706+
? EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY
707+
: EXTENSION_RETURN_NULL);
708+
if (v == NULL)
709+
{
710+
char* path = relpath(reln->smgr_rnode, forknum);
711+
(void)PathNameOpenFile(path, O_RDWR | O_CREAT | PG_BINARY);
712+
pfree(path);
713+
MemSet(buffer, 0, BLCKSZ);
714+
return;
715+
}
695716
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
696717

697718
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);

0 commit comments

Comments
 (0)