Skip to content

Commit a9f5034

Browse files
knizhnikMMeent
authored andcommitted
Unlogged index fix v14 (#259)
* Avoid errors when accessing indexes of unlogge tables after compute restart * Address review complaints: add comment to mdopenfork * Initialize unlogged index undex eclusive lock
1 parent 1b8c35b commit a9f5034

File tree

2 files changed

+60
-3
lines changed

2 files changed

+60
-3
lines changed

src/backend/optimizer/util/plancat.c

+37-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "access/xlog.h"
2828
#include "catalog/catalog.h"
2929
#include "catalog/heap.h"
30+
#include "catalog/index.h"
3031
#include "catalog/pg_am.h"
3132
#include "catalog/pg_proc.h"
3233
#include "catalog/pg_statistic_ext.h"
@@ -46,6 +47,8 @@
4647
#include "rewrite/rewriteManip.h"
4748
#include "statistics/statistics.h"
4849
#include "storage/bufmgr.h"
50+
#include "storage/buf_internals.h"
51+
#include "storage/lmgr.h"
4952
#include "utils/builtins.h"
5053
#include "utils/lsyscache.h"
5154
#include "utils/partcache.h"
@@ -80,6 +83,39 @@ static void set_baserel_partition_key_exprs(Relation relation,
8083
static void set_baserel_partition_constraint(Relation relation,
8184
RelOptInfo *rel);
8285

86+
static bool
87+
is_index_valid(Relation index, LOCKMODE lmode)
88+
{
89+
if (!index->rd_index->indisvalid)
90+
return false;
91+
92+
if (index->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
93+
{
94+
while (true)
95+
{
96+
Buffer metapage = ReadBuffer(index, 0);
97+
bool isNew = PageIsNew(BufferGetPage(metapage));
98+
ReleaseBuffer(metapage);
99+
if (isNew)
100+
{
101+
Relation heap;
102+
if (lmode != ExclusiveLock)
103+
{
104+
UnlockRelation(index, lmode);
105+
LockRelation(index, ExclusiveLock);
106+
lmode = ExclusiveLock;
107+
continue;
108+
}
109+
DropRelFileNodesAllBuffers(&index->rd_smgr, 1);
110+
heap = RelationIdGetRelation(index->rd_index->indrelid);
111+
index->rd_indam->ambuild(heap, index, BuildIndexInfo(index));
112+
RelationClose(heap);
113+
}
114+
break;
115+
}
116+
}
117+
return true;
118+
}
83119

84120
/*
85121
* get_relation_info -
@@ -221,7 +257,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
221257
* still needs to insert into "invalid" indexes, if they're marked
222258
* indisready.
223259
*/
224-
if (!index->indisvalid)
260+
if (!is_index_valid(indexRelation, lmode))
225261
{
226262
index_close(indexRelation, NoLock);
227263
continue;

src/backend/storage/smgr/md.c

+23-2
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,13 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
490490

491491
fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
492492

493+
/*
494+
* NEON: unlogged relation files are lost after compute restart - we need to implicitly recreate them
495+
* to allow data insertion
496+
*/
497+
if (fd < 0 && (behavior & EXTENSION_CREATE))
498+
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | PG_BINARY);
499+
493500
if (fd < 0)
494501
{
495502
if ((behavior & EXTENSION_RETURN_NULL) &&
@@ -652,9 +659,23 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
652659
reln->smgr_rnode.node.relNode,
653660
reln->smgr_rnode.backend);
654661

662+
/* NEON: md smgr is used in Neon for unlogged and temp relations.
663+
* After compute node restart their data is deleted but unlogged tables are still present in system catalog.
664+
* This is a difference with Vanilla Postgres where unlogged relations are truncated only after abnormal termination.
665+
* To avoid "could not open file" we have to use EXTENSION_RETURN_NULL hear instead of EXTENSION_FAIL
666+
*/
655667
v = _mdfd_getseg(reln, forknum, blocknum, false,
656-
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
657-
668+
RelFileNodeBackendIsTemp(reln->smgr_rnode)
669+
? EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY
670+
: EXTENSION_RETURN_NULL);
671+
if (v == NULL)
672+
{
673+
char* path = relpath(reln->smgr_rnode, forknum);
674+
(void)PathNameOpenFile(path, O_RDWR | O_CREAT | PG_BINARY);
675+
pfree(path);
676+
MemSet(buffer, 0, BLCKSZ);
677+
return;
678+
}
658679
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
659680

660681
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);

0 commit comments

Comments
 (0)