Skip to content

Commit 018fb05

Browse files
knizhnikKonstantin Knizhnik
authored andcommitted
On demand downloading of SLRU segments (#333)
* On demand downloading of SLRU segments * Fix smgr_read_slru_segment * Fix bug in SimpleLruDownloadSegment * Determine SLRU kind in extension * Use ctl->PagePrecedes for SLRU page comparison in SimpleLruDownloadSegment to address wraparround --------- Co-authored-by: Konstantin Knizhnik <[email protected]>
1 parent f378dcd commit 018fb05

File tree

3 files changed

+112
-14
lines changed

3 files changed

+112
-14
lines changed

src/backend/access/transam/slru.c

+92-13
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "pgstat.h"
5959
#include "storage/fd.h"
6060
#include "storage/shmem.h"
61+
#include "storage/smgr.h"
6162

6263
#define SlruFileName(ctl, path, seg) \
6364
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -616,6 +617,66 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
616617
SlruInternalWritePage(ctl, slotno, NULL);
617618
}
618619

620+
621+
/*
622+
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
623+
* to download them on demand to reduce startup time.
624+
* If SLRU segment is not found, we try to download it from page server
625+
*/
626+
static int
627+
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path)
628+
{
629+
int segno;
630+
int fd = -1;
631+
int n_blocks;
632+
char* buffer;
633+
634+
static SMgrRelationData dummy_smgr_rel = {0};
635+
636+
/* If page is greater than latest written page, then do not try to download segment from server */
637+
if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno))
638+
return -1;
639+
640+
if (!dummy_smgr_rel.smgr)
641+
{
642+
RelFileNode rnode = {0};
643+
dummy_smgr_rel.smgr = smgr(InvalidBackendId, rnode);
644+
}
645+
segno = pageno / SLRU_PAGES_PER_SEGMENT;
646+
647+
buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT);
648+
n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer);
649+
if (n_blocks > 0)
650+
{
651+
fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
652+
if (fd < 0)
653+
{
654+
slru_errcause = SLRU_OPEN_FAILED;
655+
slru_errno = errno;
656+
pfree(buffer);
657+
return -1;
658+
}
659+
errno = 0;
660+
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
661+
if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ)
662+
{
663+
pgstat_report_wait_end();
664+
/* if write didn't set errno, assume problem is no disk space */
665+
if (errno == 0)
666+
errno = ENOSPC;
667+
slru_errcause = SLRU_WRITE_FAILED;
668+
slru_errno = errno;
669+
670+
CloseTransientFile(fd);
671+
pfree(buffer);
672+
return -1;
673+
}
674+
pgstat_report_wait_end();
675+
}
676+
pfree(buffer);
677+
return fd;
678+
}
679+
619680
/*
620681
* Return whether the given page exists on disk.
621682
*
@@ -643,12 +704,18 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
643704
{
644705
/* expected: file doesn't exist */
645706
if (errno == ENOENT)
646-
return false;
647-
648-
/* report error normally */
649-
slru_errcause = SLRU_OPEN_FAILED;
650-
slru_errno = errno;
651-
SlruReportIOError(ctl, pageno, 0);
707+
{
708+
fd = SimpleLruDownloadSegment(ctl, pageno, path);
709+
if (fd < 0)
710+
return false;
711+
}
712+
else
713+
{
714+
/* report error normally */
715+
slru_errcause = SLRU_OPEN_FAILED;
716+
slru_errno = errno;
717+
SlruReportIOError(ctl, pageno, 0);
718+
}
652719
}
653720

654721
if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
@@ -702,18 +769,30 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
702769
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
703770
if (fd < 0)
704771
{
705-
if (errno != ENOENT || !InRecovery)
772+
if (errno != ENOENT)
706773
{
707774
slru_errcause = SLRU_OPEN_FAILED;
708775
slru_errno = errno;
709776
return false;
710777
}
711-
712-
ereport(LOG,
713-
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
714-
path)));
715-
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
716-
return true;
778+
fd = SimpleLruDownloadSegment(ctl, pageno, path);
779+
if (fd < 0)
780+
{
781+
if (!InRecovery)
782+
{
783+
slru_errcause = SLRU_OPEN_FAILED;
784+
slru_errno = errno;
785+
return false;
786+
}
787+
else
788+
{
789+
ereport(LOG,
790+
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
791+
path)));
792+
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
793+
return true;
794+
}
795+
}
717796
}
718797

719798
errno = 0;

src/backend/storage/smgr/smgr.c

+16
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,22 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
538538
buffer, skipFsync);
539539
}
540540

541+
/*
542+
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
543+
* to download them on demand to reduce startup time.
544+
* If SLRU segment is not found, we try to download it from page server
545+
*
546+
* This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case
547+
* of partial segment, it can be smaller. Zero value means that segment doesn't exist.
548+
* From Postgres point of view empty segment is the same as absent segment.
549+
*/
550+
int
551+
smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
552+
{
553+
return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0;
554+
}
555+
556+
541557

542558
/*
543559
* smgrwriteback() -- Trigger kernel writeback for the supplied range of

src/include/storage/smgr.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ typedef SMgrRelationData *SMgrRelation;
8888
#define SmgrIsTemp(smgr) \
8989
RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
9090

91-
9291
/*
9392
* This struct of function pointers defines the API between smgr.c and
9493
* any individual storage manager module. Note that smgr subfunctions are
@@ -129,6 +128,8 @@ typedef struct f_smgr
129128
void (*smgr_start_unlogged_build) (SMgrRelation reln);
130129
void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln);
131130
void (*smgr_end_unlogged_build) (SMgrRelation reln);
131+
132+
int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer);
132133
} f_smgr;
133134

134135
typedef void (*smgr_init_hook_type) (void);
@@ -180,4 +181,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln);
180181
extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln);
181182
extern void smgr_end_unlogged_build(SMgrRelation reln);
182183

184+
extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer);
185+
183186
#endif /* SMGR_H */

0 commit comments

Comments
 (0)