PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xlogprefetcher.c File Reference
#include "postgres.h"
#include "access/xlogprefetcher.h"
#include "access/xlogreader.h"
#include "catalog/pg_control.h"
#include "catalog/storage_xlog.h"
#include "commands/dbcommands_xlog.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "port/atomics.h"
#include "storage/bufmgr.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "utils/fmgrprotos.h"
#include "utils/guc_hooks.h"
#include "utils/hsearch.h"
#include "utils/timestamp.h"
Include dependency graph for xlogprefetcher.c:

Go to the source code of this file.

Data Structures

struct  LsnReadQueue
 
struct  XLogPrefetcher
 
struct  XLogPrefetcherFilter
 
struct  XLogPrefetchStats
 

Macros

#define XLOGPREFETCHER_STATS_DISTANCE   BLCKSZ
 
#define XLOGPREFETCHER_SEQ_WINDOW_SIZE   4
 
#define XLOGPREFETCHER_DISTANCE_MULTIPLIER   4
 
#define RecoveryPrefetchEnabled()   false
 
#define PG_STAT_GET_RECOVERY_PREFETCH_COLS   10
 

Typedefs

typedef LsnReadQueueNextStatus(* LsnReadQueueNextFun) (uintptr_t lrq_private, XLogRecPtr *lsn)
 
typedef struct LsnReadQueue LsnReadQueue
 
typedef struct XLogPrefetcherFilter XLogPrefetcherFilter
 
typedef struct XLogPrefetchStats XLogPrefetchStats
 

Enumerations

enum  LsnReadQueueNextStatus { LRQ_NEXT_NO_IO , LRQ_NEXT_IO , LRQ_NEXT_AGAIN }
 

Functions

static void XLogPrefetcherAddFilter (XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno, XLogRecPtr lsn)
 
static bool XLogPrefetcherIsFiltered (XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno)
 
static void XLogPrefetcherCompleteFilters (XLogPrefetcher *prefetcher, XLogRecPtr replaying_lsn)
 
static LsnReadQueueNextStatus XLogPrefetcherNextBlock (uintptr_t pgsr_private, XLogRecPtr *lsn)
 
static LsnReadQueuelrq_alloc (uint32 max_distance, uint32 max_inflight, uintptr_t lrq_private, LsnReadQueueNextFun next)
 
static void lrq_free (LsnReadQueue *lrq)
 
static uint32 lrq_inflight (LsnReadQueue *lrq)
 
static uint32 lrq_completed (LsnReadQueue *lrq)
 
static void lrq_prefetch (LsnReadQueue *lrq)
 
static void lrq_complete_lsn (LsnReadQueue *lrq, XLogRecPtr lsn)
 
size_t XLogPrefetchShmemSize (void)
 
void XLogPrefetchResetStats (void)
 
void XLogPrefetchShmemInit (void)
 
void XLogPrefetchReconfigure (void)
 
static void XLogPrefetchIncrement (pg_atomic_uint64 *counter)
 
XLogPrefetcherXLogPrefetcherAllocate (XLogReaderState *reader)
 
void XLogPrefetcherFree (XLogPrefetcher *prefetcher)
 
XLogReaderStateXLogPrefetcherGetReader (XLogPrefetcher *prefetcher)
 
void XLogPrefetcherComputeStats (XLogPrefetcher *prefetcher)
 
Datum pg_stat_get_recovery_prefetch (PG_FUNCTION_ARGS)
 
void XLogPrefetcherBeginRead (XLogPrefetcher *prefetcher, XLogRecPtr recPtr)
 
XLogRecordXLogPrefetcherReadRecord (XLogPrefetcher *prefetcher, char **errmsg)
 
bool check_recovery_prefetch (int *new_value, void **extra, GucSource source)
 
void assign_recovery_prefetch (int new_value, void *extra)
 

Variables

int recovery_prefetch = RECOVERY_PREFETCH_TRY
 
static int XLogPrefetchReconfigureCount = 0
 
static XLogPrefetchStatsSharedStats
 

Macro Definition Documentation

◆ PG_STAT_GET_RECOVERY_PREFETCH_COLS

#define PG_STAT_GET_RECOVERY_PREFETCH_COLS   10

◆ RecoveryPrefetchEnabled

#define RecoveryPrefetchEnabled ( )    false

Definition at line 75 of file xlogprefetcher.c.

◆ XLOGPREFETCHER_DISTANCE_MULTIPLIER

#define XLOGPREFETCHER_DISTANCE_MULTIPLIER   4

Definition at line 62 of file xlogprefetcher.c.

◆ XLOGPREFETCHER_SEQ_WINDOW_SIZE

#define XLOGPREFETCHER_SEQ_WINDOW_SIZE   4

Definition at line 56 of file xlogprefetcher.c.

◆ XLOGPREFETCHER_STATS_DISTANCE

#define XLOGPREFETCHER_STATS_DISTANCE   BLCKSZ

Definition at line 50 of file xlogprefetcher.c.

Typedef Documentation

◆ LsnReadQueue

◆ LsnReadQueueNextFun

typedef LsnReadQueueNextStatus(* LsnReadQueueNextFun) (uintptr_t lrq_private, XLogRecPtr *lsn)

Definition at line 94 of file xlogprefetcher.c.

◆ XLogPrefetcherFilter

◆ XLogPrefetchStats

Enumeration Type Documentation

◆ LsnReadQueueNextStatus

Enumerator
LRQ_NEXT_NO_IO 
LRQ_NEXT_IO 
LRQ_NEXT_AGAIN 

Definition at line 83 of file xlogprefetcher.c.

84{
LsnReadQueueNextStatus
@ LRQ_NEXT_NO_IO
@ LRQ_NEXT_IO
@ LRQ_NEXT_AGAIN

Function Documentation

◆ assign_recovery_prefetch()

void assign_recovery_prefetch ( int  new_value,
void extra 
)

Definition at line 1095 of file xlogprefetcher.c.

1096{
1097 /* Reconfigure prefetching, because a setting it depends on changed. */
1099 if (AmStartupProcess())
1101}
#define AmStartupProcess()
Definition miscadmin.h:390
static int fb(int x)
int recovery_prefetch
void XLogPrefetchReconfigure(void)

References AmStartupProcess, fb(), recovery_prefetch, and XLogPrefetchReconfigure().

◆ check_recovery_prefetch()

bool check_recovery_prefetch ( int new_value,
void **  extra,
GucSource  source 
)

Definition at line 1081 of file xlogprefetcher.c.

1082{
1083#ifndef USE_PREFETCH
1085 {
1086 GUC_check_errdetail("\"recovery_prefetch\" is not supported on platforms that lack support for issuing read-ahead advice.");
1087 return false;
1088 }
1089#endif
1090
1091 return true;
1092}
#define GUC_check_errdetail
Definition guc.h:505
@ RECOVERY_PREFETCH_ON

References fb(), GUC_check_errdetail, and RECOVERY_PREFETCH_ON.

◆ lrq_alloc()

static LsnReadQueue * lrq_alloc ( uint32  max_distance,
uint32  max_inflight,
uintptr_t  lrq_private,
LsnReadQueueNextFun  next 
)
inlinestatic

Definition at line 202 of file xlogprefetcher.c.

206{
208 uint32 size;
209
210 Assert(max_distance >= max_inflight);
211
212 size = max_distance + 1; /* full ring buffer has a gap */
213 lrq = palloc(offsetof(LsnReadQueue, queue) + sizeof(lrq->queue[0]) * size);
214 lrq->lrq_private = lrq_private;
215 lrq->max_inflight = max_inflight;
216 lrq->size = size;
217 lrq->next = next;
218 lrq->head = 0;
219 lrq->tail = 0;
220 lrq->inflight = 0;
221 lrq->completed = 0;
222
223 return lrq;
224}
static int32 next
Definition blutils.c:225
#define Assert(condition)
Definition c.h:873
uint32_t uint32
Definition c.h:546
void * palloc(Size size)
Definition mcxt.c:1387

References Assert, fb(), next, and palloc().

Referenced by XLogPrefetcherReadRecord().

◆ lrq_complete_lsn()

static void lrq_complete_lsn ( LsnReadQueue lrq,
XLogRecPtr  lsn 
)
inlinestatic

Definition at line 272 of file xlogprefetcher.c.

273{
274 /*
275 * We know that LSNs before 'lsn' have been replayed, so we can now assume
276 * that any IOs that were started before then have finished.
277 */
278 while (lrq->tail != lrq->head &&
279 lrq->queue[lrq->tail].lsn < lsn)
280 {
281 if (lrq->queue[lrq->tail].io)
282 lrq->inflight--;
283 else
284 lrq->completed--;
285 lrq->tail++;
286 if (lrq->tail == lrq->size)
287 lrq->tail = 0;
288 }
291}
#define RecoveryPrefetchEnabled()
static void lrq_prefetch(LsnReadQueue *lrq)

References fb(), lrq_prefetch(), and RecoveryPrefetchEnabled.

Referenced by XLogPrefetcherReadRecord().

◆ lrq_completed()

static uint32 lrq_completed ( LsnReadQueue lrq)
inlinestatic

Definition at line 239 of file xlogprefetcher.c.

240{
241 return lrq->completed;
242}

References fb().

Referenced by XLogPrefetcherComputeStats(), and XLogPrefetcherReadRecord().

◆ lrq_free()

static void lrq_free ( LsnReadQueue lrq)
inlinestatic

Definition at line 227 of file xlogprefetcher.c.

228{
229 pfree(lrq);
230}
void pfree(void *pointer)
Definition mcxt.c:1616

References fb(), and pfree().

Referenced by XLogPrefetcherFree(), and XLogPrefetcherReadRecord().

◆ lrq_inflight()

static uint32 lrq_inflight ( LsnReadQueue lrq)
inlinestatic

Definition at line 233 of file xlogprefetcher.c.

234{
235 return lrq->inflight;
236}

References fb().

Referenced by XLogPrefetcherComputeStats(), and XLogPrefetcherReadRecord().

◆ lrq_prefetch()

static void lrq_prefetch ( LsnReadQueue lrq)
inlinestatic

Definition at line 245 of file xlogprefetcher.c.

246{
247 /* Try to start as many IOs as we can within our limits. */
248 while (lrq->inflight < lrq->max_inflight &&
249 lrq->inflight + lrq->completed < lrq->size - 1)
250 {
251 Assert(((lrq->head + 1) % lrq->size) != lrq->tail);
252 switch (lrq->next(lrq->lrq_private, &lrq->queue[lrq->head].lsn))
253 {
254 case LRQ_NEXT_AGAIN:
255 return;
256 case LRQ_NEXT_IO:
257 lrq->queue[lrq->head].io = true;
258 lrq->inflight++;
259 break;
260 case LRQ_NEXT_NO_IO:
261 lrq->queue[lrq->head].io = false;
262 lrq->completed++;
263 break;
264 }
265 lrq->head++;
266 if (lrq->head == lrq->size)
267 lrq->head = 0;
268 }
269}

References Assert, fb(), LRQ_NEXT_AGAIN, LRQ_NEXT_IO, and LRQ_NEXT_NO_IO.

Referenced by lrq_complete_lsn(), and XLogPrefetcherReadRecord().

◆ pg_stat_get_recovery_prefetch()

Datum pg_stat_get_recovery_prefetch ( PG_FUNCTION_ARGS  )

Definition at line 824 of file xlogprefetcher.c.

825{
826#define PG_STAT_GET_RECOVERY_PREFETCH_COLS 10
827 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
830
831 InitMaterializedSRF(fcinfo, 0);
832
833 for (int i = 0; i < PG_STAT_GET_RECOVERY_PREFETCH_COLS; ++i)
834 nulls[i] = false;
835
846 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
847
848 return (Datum) 0;
849}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
static Datum values[MAXATTR]
Definition bootstrap.c:155
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition funcapi.c:76
int i
Definition isn.c:77
static Datum Int64GetDatum(int64 X)
Definition postgres.h:423
uint64_t Datum
Definition postgres.h:70
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222
pg_atomic_uint64 skip_fpw
pg_atomic_uint64 skip_init
pg_atomic_uint64 reset_time
pg_atomic_uint64 hit
pg_atomic_uint64 prefetch
pg_atomic_uint64 skip_rep
pg_atomic_uint64 skip_new
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:784
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
static XLogPrefetchStats * SharedStats
#define PG_STAT_GET_RECOVERY_PREFETCH_COLS

References XLogPrefetchStats::block_distance, fb(), XLogPrefetchStats::hit, i, InitMaterializedSRF(), Int32GetDatum(), Int64GetDatum(), XLogPrefetchStats::io_depth, pg_atomic_read_u64(), PG_STAT_GET_RECOVERY_PREFETCH_COLS, XLogPrefetchStats::prefetch, XLogPrefetchStats::reset_time, SharedStats, XLogPrefetchStats::skip_fpw, XLogPrefetchStats::skip_init, XLogPrefetchStats::skip_new, XLogPrefetchStats::skip_rep, TimestampTzGetDatum(), tuplestore_putvalues(), values, and XLogPrefetchStats::wal_distance.

◆ XLogPrefetcherAddFilter()

static void XLogPrefetcherAddFilter ( XLogPrefetcher prefetcher,
RelFileLocator  rlocator,
BlockNumber  blockno,
XLogRecPtr  lsn 
)
inlinestatic

Definition at line 856 of file xlogprefetcher.c.

858{
859 XLogPrefetcherFilter *filter;
860 bool found;
861
862 filter = hash_search(prefetcher->filter_table, &rlocator, HASH_ENTER, &found);
863 if (!found)
864 {
865 /*
866 * Don't allow any prefetching of this block or higher until replayed.
867 */
868 filter->filter_until_replayed = lsn;
869 filter->filter_from_block = blockno;
870 dlist_push_head(&prefetcher->filter_queue, &filter->link);
871 }
872 else
873 {
874 /*
875 * We were already filtering this rlocator. Extend the filter's
876 * lifetime to cover this WAL record, but leave the lower of the block
877 * numbers there because we don't want to have to track individual
878 * blocks.
879 */
880 filter->filter_until_replayed = lsn;
881 dlist_delete(&filter->link);
882 dlist_push_head(&prefetcher->filter_queue, &filter->link);
883 filter->filter_from_block = Min(filter->filter_from_block, blockno);
884 }
885}
#define Min(x, y)
Definition c.h:997
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_ENTER
Definition hsearch.h:114
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition ilist.h:347
XLogRecPtr filter_until_replayed
BlockNumber filter_from_block

References dlist_delete(), dlist_push_head(), fb(), XLogPrefetcherFilter::filter_from_block, XLogPrefetcherFilter::filter_until_replayed, HASH_ENTER, hash_search(), XLogPrefetcherFilter::link, and Min.

Referenced by XLogPrefetcherNextBlock().

◆ XLogPrefetcherAllocate()

XLogPrefetcher * XLogPrefetcherAllocate ( XLogReaderState reader)

Definition at line 362 of file xlogprefetcher.c.

363{
365 HASHCTL ctl;
366
368 prefetcher->reader = reader;
369
370 ctl.keysize = sizeof(RelFileLocator);
371 ctl.entrysize = sizeof(XLogPrefetcherFilter);
372 prefetcher->filter_table = hash_create("XLogPrefetcherFilterTable", 1024,
374 dlist_init(&prefetcher->filter_queue);
375
379
380 /* First usage will cause streaming_read to be allocated. */
381 prefetcher->reconfigure_count = XLogPrefetchReconfigureCount - 1;
382
383 return prefetcher;
384}
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
#define palloc0_object(type)
Definition fe_memutils.h:75
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
static void dlist_init(dlist_head *head)
Definition ilist.h:314
tree ctl
Definition radixtree.h:1838
static int XLogPrefetchReconfigureCount

References XLogPrefetchStats::block_distance, ctl, dlist_init(), fb(), HASH_BLOBS, hash_create(), HASH_ELEM, XLogPrefetchStats::io_depth, palloc0_object, SharedStats, XLogPrefetchStats::wal_distance, and XLogPrefetchReconfigureCount.

Referenced by InitWalRecovery().

◆ XLogPrefetcherBeginRead()

void XLogPrefetcherBeginRead ( XLogPrefetcher prefetcher,
XLogRecPtr  recPtr 
)

Definition at line 962 of file xlogprefetcher.c.

963{
964 /* This will forget about any in-flight IO. */
965 prefetcher->reconfigure_count--;
966
967 /* Book-keeping to avoid readahead on first read. */
968 prefetcher->begin_ptr = recPtr;
969
970 prefetcher->no_readahead_until = InvalidXLogRecPtr;
971
972 /* This will forget about any queued up records in the decoder. */
974}
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition xlogreader.c:232

References fb(), InvalidXLogRecPtr, and XLogBeginRead().

Referenced by FinishWalRecovery(), InitWalRecovery(), PerformWalRecovery(), and ReadCheckpointRecord().

◆ XLogPrefetcherCompleteFilters()

static void XLogPrefetcherCompleteFilters ( XLogPrefetcher prefetcher,
XLogRecPtr  replaying_lsn 
)
inlinestatic

Definition at line 894 of file xlogprefetcher.c.

895{
896 while (unlikely(!dlist_is_empty(&prefetcher->filter_queue)))
897 {
899 link,
900 &prefetcher->filter_queue);
901
903 break;
904
905 dlist_delete(&filter->link);
906 hash_search(prefetcher->filter_table, filter, HASH_REMOVE, NULL);
907 }
908}
#define unlikely(x)
Definition c.h:412
@ HASH_REMOVE
Definition hsearch.h:115
#define dlist_tail_element(type, membername, lhead)
Definition ilist.h:612
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336

References dlist_delete(), dlist_is_empty(), dlist_tail_element, fb(), XLogPrefetcherFilter::filter_until_replayed, HASH_REMOVE, hash_search(), XLogPrefetcherFilter::link, and unlikely.

Referenced by XLogPrefetcherReadRecord().

◆ XLogPrefetcherComputeStats()

void XLogPrefetcherComputeStats ( XLogPrefetcher prefetcher)

Definition at line 410 of file xlogprefetcher.c.

411{
412 uint32 io_depth;
413 uint32 completed;
414 int64 wal_distance;
415
416
417 /* How far ahead of replay are we now? */
418 if (prefetcher->reader->decode_queue_tail)
419 {
420 wal_distance =
421 prefetcher->reader->decode_queue_tail->lsn -
422 prefetcher->reader->decode_queue_head->lsn;
423 }
424 else
425 {
426 wal_distance = 0;
427 }
428
429 /* How many IOs are currently in flight and completed? */
430 io_depth = lrq_inflight(prefetcher->streaming_read);
431 completed = lrq_completed(prefetcher->streaming_read);
432
433 /* Update the instantaneous stats visible in pg_stat_recovery_prefetch. */
434 SharedStats->io_depth = io_depth;
435 SharedStats->block_distance = io_depth + completed;
436 SharedStats->wal_distance = wal_distance;
437
438 prefetcher->next_stats_shm_lsn =
439 prefetcher->reader->ReadRecPtr + XLOGPREFETCHER_STATS_DISTANCE;
440}
int64_t int64
Definition c.h:543
static uint32 lrq_completed(LsnReadQueue *lrq)
static uint32 lrq_inflight(LsnReadQueue *lrq)
#define XLOGPREFETCHER_STATS_DISTANCE

References XLogPrefetchStats::block_distance, XLogReaderState::decode_queue_tail, fb(), XLogPrefetchStats::io_depth, lrq_completed(), lrq_inflight(), DecodedXLogRecord::lsn, SharedStats, XLogPrefetchStats::wal_distance, and XLOGPREFETCHER_STATS_DISTANCE.

Referenced by ShutdownWalRecovery(), WaitForWALToBecomeAvailable(), and XLogPrefetcherReadRecord().

◆ XLogPrefetcherFree()

void XLogPrefetcherFree ( XLogPrefetcher prefetcher)

Definition at line 390 of file xlogprefetcher.c.

391{
392 lrq_free(prefetcher->streaming_read);
393 hash_destroy(prefetcher->filter_table);
395}
void hash_destroy(HTAB *hashp)
Definition dynahash.c:865
static void lrq_free(LsnReadQueue *lrq)

References fb(), hash_destroy(), lrq_free(), and pfree().

Referenced by ShutdownWalRecovery().

◆ XLogPrefetcherGetReader()

XLogReaderState * XLogPrefetcherGetReader ( XLogPrefetcher prefetcher)

Definition at line 401 of file xlogprefetcher.c.

402{
403 return prefetcher->reader;
404}

References fb().

Referenced by ReadRecord().

◆ XLogPrefetcherIsFiltered()

static bool XLogPrefetcherIsFiltered ( XLogPrefetcher prefetcher,
RelFileLocator  rlocator,
BlockNumber  blockno 
)
inlinestatic

Definition at line 914 of file xlogprefetcher.c.

916{
917 /*
918 * Test for empty queue first, because we expect it to be empty most of
919 * the time and we can avoid the hash table lookup in that case.
920 */
921 if (unlikely(!dlist_is_empty(&prefetcher->filter_queue)))
922 {
923 XLogPrefetcherFilter *filter;
924
925 /* See if the block range is filtered. */
926 filter = hash_search(prefetcher->filter_table, &rlocator, HASH_FIND, NULL);
927 if (filter && filter->filter_from_block <= blockno)
928 {
929#ifdef XLOGPREFETCHER_DEBUG_LEVEL
931 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (blocks >= %u filtered)",
932 rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
934 filter->filter_from_block);
935#endif
936 return true;
937 }
938
939 /* See if the whole database is filtered. */
941 rlocator.spcOid = InvalidOid;
942 filter = hash_search(prefetcher->filter_table, &rlocator, HASH_FIND, NULL);
943 if (filter)
944 {
945#ifdef XLOGPREFETCHER_DEBUG_LEVEL
947 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (whole database)",
948 rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
950#endif
951 return true;
952 }
953 }
954
955 return false;
956}
#define elog(elevel,...)
Definition elog.h:226
@ HASH_FIND
Definition hsearch.h:113
#define InvalidOid
#define InvalidRelFileNumber
Definition relpath.h:26
RelFileNumber relNumber
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47

References RelFileLocator::dbOid, dlist_is_empty(), elog, fb(), XLogPrefetcherFilter::filter_from_block, XLogPrefetcherFilter::filter_until_replayed, HASH_FIND, hash_search(), InvalidOid, InvalidRelFileNumber, LSN_FORMAT_ARGS, RelFileLocator::relNumber, RelFileLocator::spcOid, and unlikely.

Referenced by XLogPrefetcherNextBlock().

◆ XLogPrefetcherNextBlock()

static LsnReadQueueNextStatus XLogPrefetcherNextBlock ( uintptr_t  pgsr_private,
XLogRecPtr lsn 
)
static

Definition at line 459 of file xlogprefetcher.c.

460{
462 XLogReaderState *reader = prefetcher->reader;
464
465 /*
466 * We keep track of the record and block we're up to between calls with
467 * prefetcher->record and prefetcher->next_block_id.
468 */
469 for (;;)
470 {
471 DecodedXLogRecord *record;
472
473 /* Try to read a new future record, if we don't already have one. */
474 if (prefetcher->record == NULL)
475 {
476 bool nonblocking;
477
478 /*
479 * If there are already records or an error queued up that could
480 * be replayed, we don't want to block here. Otherwise, it's OK
481 * to block waiting for more data: presumably the caller has
482 * nothing else to do.
483 */
484 nonblocking = XLogReaderHasQueuedRecordOrError(reader);
485
486 /* Readahead is disabled until we replay past a certain point. */
487 if (nonblocking && replaying_lsn <= prefetcher->no_readahead_until)
488 return LRQ_NEXT_AGAIN;
489
490 record = XLogReadAhead(prefetcher->reader, nonblocking);
491 if (record == NULL)
492 {
493 /*
494 * We can't read any more, due to an error or lack of data in
495 * nonblocking mode. Don't try to read ahead again until
496 * we've replayed everything already decoded.
497 */
498 if (nonblocking && prefetcher->reader->decode_queue_tail)
499 prefetcher->no_readahead_until =
500 prefetcher->reader->decode_queue_tail->lsn;
501
502 return LRQ_NEXT_AGAIN;
503 }
504
505 /*
506 * If prefetching is disabled, we don't need to analyze the record
507 * or issue any prefetches. We just need to cause one record to
508 * be decoded.
509 */
511 {
512 *lsn = InvalidXLogRecPtr;
513 return LRQ_NEXT_NO_IO;
514 }
515
516 /* We have a new record to process. */
517 prefetcher->record = record;
518 prefetcher->next_block_id = 0;
519 }
520 else
521 {
522 /* Continue to process from last call, or last loop. */
523 record = prefetcher->record;
524 }
525
526 /*
527 * Check for operations that require us to filter out block ranges, or
528 * pause readahead completely.
529 */
531 {
532 uint8 rmid = record->header.xl_rmid;
533 uint8 record_type = record->header.xl_info & ~XLR_INFO_MASK;
534
535 if (rmid == RM_XLOG_ID)
536 {
537 if (record_type == XLOG_CHECKPOINT_SHUTDOWN ||
538 record_type == XLOG_END_OF_RECOVERY)
539 {
540 /*
541 * These records might change the TLI. Avoid potential
542 * bugs if we were to allow "read TLI" and "replay TLI" to
543 * differ without more analysis.
544 */
545 prefetcher->no_readahead_until = record->lsn;
546
547#ifdef XLOGPREFETCHER_DEBUG_LEVEL
549 "suppressing all readahead until %X/%08X is replayed due to possible TLI change",
550 LSN_FORMAT_ARGS(record->lsn));
551#endif
552
553 /* Fall through so we move past this record. */
554 }
555 }
556 else if (rmid == RM_DBASE_ID)
557 {
558 /*
559 * When databases are created with the file-copy strategy,
560 * there are no WAL records to tell us about the creation of
561 * individual relations.
562 */
563 if (record_type == XLOG_DBASE_CREATE_FILE_COPY)
564 {
567 RelFileLocator rlocator =
569
570 /*
571 * Don't try to prefetch anything in this database until
572 * it has been created, or we might confuse the blocks of
573 * different generations, if a database OID or
574 * relfilenumber is reused. It's also more efficient than
575 * discovering that relations don't exist on disk yet with
576 * ENOENT errors.
577 */
578 XLogPrefetcherAddFilter(prefetcher, rlocator, 0, record->lsn);
579
580#ifdef XLOGPREFETCHER_DEBUG_LEVEL
582 "suppressing prefetch in database %u until %X/%08X is replayed due to raw file copy",
583 rlocator.dbOid,
584 LSN_FORMAT_ARGS(record->lsn));
585#endif
586 }
587 }
588 else if (rmid == RM_SMGR_ID)
589 {
590 if (record_type == XLOG_SMGR_CREATE)
591 {
593 record->main_data;
594
595 if (xlrec->forkNum == MAIN_FORKNUM)
596 {
597 /*
598 * Don't prefetch anything for this whole relation
599 * until it has been created. Otherwise we might
600 * confuse the blocks of different generations, if a
601 * relfilenumber is reused. This also avoids the need
602 * to discover the problem via extra syscalls that
603 * report ENOENT.
604 */
606 record->lsn);
607
608#ifdef XLOGPREFETCHER_DEBUG_LEVEL
610 "suppressing prefetch in relation %u/%u/%u until %X/%08X is replayed, which creates the relation",
611 xlrec->rlocator.spcOid,
612 xlrec->rlocator.dbOid,
613 xlrec->rlocator.relNumber,
614 LSN_FORMAT_ARGS(record->lsn));
615#endif
616 }
617 }
618 else if (record_type == XLOG_SMGR_TRUNCATE)
619 {
621 record->main_data;
622
623 /*
624 * Don't consider prefetching anything in the truncated
625 * range until the truncation has been performed.
626 */
628 xlrec->blkno,
629 record->lsn);
630
631#ifdef XLOGPREFETCHER_DEBUG_LEVEL
633 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, which truncates the relation",
634 xlrec->rlocator.spcOid,
635 xlrec->rlocator.dbOid,
636 xlrec->rlocator.relNumber,
637 xlrec->blkno,
638 LSN_FORMAT_ARGS(record->lsn));
639#endif
640 }
641 }
642 }
643
644 /* Scan the block references, starting where we left off last time. */
645 while (prefetcher->next_block_id <= record->max_block_id)
646 {
647 int block_id = prefetcher->next_block_id++;
648 DecodedBkpBlock *block = &record->blocks[block_id];
651
652 if (!block->in_use)
653 continue;
654
656
657 /*
658 * Record the LSN of this record. When it's replayed,
659 * LsnReadQueue will consider any IOs submitted for earlier LSNs
660 * to be finished.
661 */
662 *lsn = record->lsn;
663
664 /* We don't try to prefetch anything but the main fork for now. */
665 if (block->forknum != MAIN_FORKNUM)
666 {
667 return LRQ_NEXT_NO_IO;
668 }
669
670 /*
671 * If there is a full page image attached, we won't be reading the
672 * page, so don't bother trying to prefetch.
673 */
674 if (block->has_image)
675 {
677 return LRQ_NEXT_NO_IO;
678 }
679
680 /* There is no point in reading a page that will be zeroed. */
681 if (block->flags & BKPBLOCK_WILL_INIT)
682 {
684 return LRQ_NEXT_NO_IO;
685 }
686
687 /* Should we skip prefetching this block due to a filter? */
689 {
691 return LRQ_NEXT_NO_IO;
692 }
693
694 /* There is no point in repeatedly prefetching the same block. */
695 for (int i = 0; i < XLOGPREFETCHER_SEQ_WINDOW_SIZE; ++i)
696 {
697 if (block->blkno == prefetcher->recent_block[i] &&
698 RelFileLocatorEquals(block->rlocator, prefetcher->recent_rlocator[i]))
699 {
700 /*
701 * XXX If we also remembered where it was, we could set
702 * recent_buffer so that recovery could skip smgropen()
703 * and a buffer table lookup.
704 */
706 return LRQ_NEXT_NO_IO;
707 }
708 }
709 prefetcher->recent_rlocator[prefetcher->recent_idx] = block->rlocator;
710 prefetcher->recent_block[prefetcher->recent_idx] = block->blkno;
711 prefetcher->recent_idx =
712 (prefetcher->recent_idx + 1) % XLOGPREFETCHER_SEQ_WINDOW_SIZE;
713
714 /*
715 * We could try to have a fast path for repeated references to the
716 * same relation (with some scheme to handle invalidations
717 * safely), but for now we'll call smgropen() every time.
718 */
720
721 /*
722 * If the relation file doesn't exist on disk, for example because
723 * we're replaying after a crash and the file will be created and
724 * then unlinked by WAL that hasn't been replayed yet, suppress
725 * further prefetching in the relation until this record is
726 * replayed.
727 */
729 {
730#ifdef XLOGPREFETCHER_DEBUG_LEVEL
732 "suppressing all prefetch in relation %u/%u/%u until %X/%08X is replayed, because the relation does not exist on disk",
733 reln->smgr_rlocator.locator.spcOid,
734 reln->smgr_rlocator.locator.dbOid,
735 reln->smgr_rlocator.locator.relNumber,
736 LSN_FORMAT_ARGS(record->lsn));
737#endif
739 record->lsn);
741 return LRQ_NEXT_NO_IO;
742 }
743
744 /*
745 * If the relation isn't big enough to contain the referenced
746 * block yet, suppress prefetching of this block and higher until
747 * this record is replayed.
748 */
749 if (block->blkno >= smgrnblocks(reln, block->forknum))
750 {
751#ifdef XLOGPREFETCHER_DEBUG_LEVEL
753 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, because the relation is too small",
754 reln->smgr_rlocator.locator.spcOid,
755 reln->smgr_rlocator.locator.dbOid,
756 reln->smgr_rlocator.locator.relNumber,
757 block->blkno,
758 LSN_FORMAT_ARGS(record->lsn));
759#endif
761 record->lsn);
763 return LRQ_NEXT_NO_IO;
764 }
765
766 /* Try to initiate prefetching. */
767 result = PrefetchSharedBuffer(reln, block->forknum, block->blkno);
768 if (BufferIsValid(result.recent_buffer))
769 {
770 /* Cache hit, nothing to do. */
772 block->prefetch_buffer = result.recent_buffer;
773 return LRQ_NEXT_NO_IO;
774 }
775 else if (result.initiated_io)
776 {
777 /* Cache miss, I/O (presumably) started. */
780 return LRQ_NEXT_IO;
781 }
782 else if ((io_direct_flags & IO_DIRECT_DATA) == 0)
783 {
784 /*
785 * This shouldn't be possible, because we already determined
786 * that the relation exists on disk and is big enough.
787 * Something is wrong with the cache invalidation for
788 * smgrexists(), smgrnblocks(), or the file was unlinked or
789 * truncated beneath our feet?
790 */
791 elog(ERROR,
792 "could not prefetch relation %u/%u/%u block %u",
793 reln->smgr_rlocator.locator.spcOid,
794 reln->smgr_rlocator.locator.dbOid,
795 reln->smgr_rlocator.locator.relNumber,
796 block->blkno);
797 }
798 }
799
800 /*
801 * Several callsites need to be able to read exactly one record
802 * without any internal readahead. Examples: xlog.c reading
803 * checkpoint records with emode set to PANIC, which might otherwise
804 * cause XLogPageRead() to panic on some future page, and xlog.c
805 * determining where to start writing WAL next, which depends on the
806 * contents of the reader's internal buffer after reading one record.
807 * Therefore, don't even think about prefetching until the first
808 * record after XLogPrefetcherBeginRead() has been consumed.
809 */
810 if (prefetcher->reader->decode_queue_tail &&
811 prefetcher->reader->decode_queue_tail->lsn == prefetcher->begin_ptr)
812 return LRQ_NEXT_AGAIN;
813
814 /* Advance to the next record. */
815 prefetcher->record = NULL;
816 }
818}
#define InvalidBuffer
Definition buf.h:25
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:682
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
uint8_t uint8
Definition c.h:544
#define pg_unreachable()
Definition c.h:341
#define XLOG_DBASE_CREATE_FILE_COPY
#define ERROR
Definition elog.h:39
int io_direct_flags
Definition fd.c:168
#define IO_DIRECT_DATA
Definition fd.h:54
#define XLOG_CHECKPOINT_SHUTDOWN
Definition pg_control.h:69
#define XLOG_END_OF_RECOVERY
Definition pg_control.h:78
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
#define RelFileLocatorEquals(locator1, locator2)
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:819
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:462
#define XLOG_SMGR_CREATE
#define XLOG_SMGR_TRUNCATE
Buffer prefetch_buffer
Definition xlogreader.h:130
RelFileLocator rlocator
Definition xlogreader.h:125
BlockNumber blkno
Definition xlogreader.h:127
ForkNumber forknum
Definition xlogreader.h:126
XLogRecord header
Definition xlogreader.h:165
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]
Definition xlogreader.h:171
Buffer recent_buffer
Definition bufmgr.h:61
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
uint8 xl_info
Definition xlogrecord.h:46
RmgrId xl_rmid
Definition xlogrecord.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
static bool XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno)
static void XLogPrefetchIncrement(pg_atomic_uint64 *counter)
#define XLOGPREFETCHER_SEQ_WINDOW_SIZE
static void XLogPrefetcherAddFilter(XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno, XLogRecPtr lsn)
DecodedXLogRecord * XLogReadAhead(XLogReaderState *state, bool nonblocking)
Definition xlogreader.c:977
static bool XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
Definition xlogreader.h:324
#define BKPBLOCK_WILL_INIT
Definition xlogrecord.h:199

References Assert, BKPBLOCK_WILL_INIT, DecodedBkpBlock::blkno, DecodedXLogRecord::blocks, BufferIsValid(), RelFileLocator::dbOid, elog, ERROR, fb(), DecodedBkpBlock::flags, DecodedBkpBlock::forknum, DecodedBkpBlock::has_image, DecodedXLogRecord::header, XLogPrefetchStats::hit, i, DecodedBkpBlock::in_use, PrefetchBufferResult::initiated_io, INVALID_PROC_NUMBER, InvalidBuffer, InvalidOid, InvalidRelFileNumber, InvalidXLogRecPtr, IO_DIRECT_DATA, io_direct_flags, LRQ_NEXT_AGAIN, LRQ_NEXT_IO, LRQ_NEXT_NO_IO, DecodedXLogRecord::lsn, LSN_FORMAT_ARGS, DecodedXLogRecord::main_data, MAIN_FORKNUM, DecodedXLogRecord::max_block_id, pg_unreachable, XLogPrefetchStats::prefetch, DecodedBkpBlock::prefetch_buffer, PrefetchSharedBuffer(), XLogReaderState::ReadRecPtr, PrefetchBufferResult::recent_buffer, RecoveryPrefetchEnabled, RelFileLocatorEquals, DecodedBkpBlock::rlocator, SharedStats, XLogPrefetchStats::skip_fpw, XLogPrefetchStats::skip_init, XLogPrefetchStats::skip_new, XLogPrefetchStats::skip_rep, smgrexists(), smgrnblocks(), smgropen(), XLogRecord::xl_info, XLogRecord::xl_rmid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_DBASE_CREATE_FILE_COPY, XLOG_END_OF_RECOVERY, XLOG_SMGR_CREATE, XLOG_SMGR_TRUNCATE, XLOGPREFETCHER_SEQ_WINDOW_SIZE, XLogPrefetcherAddFilter(), XLogPrefetcherIsFiltered(), XLogPrefetchIncrement(), XLogReadAhead(), and XLogReaderHasQueuedRecordOrError().

Referenced by XLogPrefetcherReadRecord().

◆ XLogPrefetcherReadRecord()

XLogRecord * XLogPrefetcherReadRecord ( XLogPrefetcher prefetcher,
char **  errmsg 
)

Definition at line 981 of file xlogprefetcher.c.

982{
983 DecodedXLogRecord *record;
985
986 /*
987 * See if it's time to reset the prefetching machinery, because a relevant
988 * GUC was changed.
989 */
990 if (unlikely(XLogPrefetchReconfigureCount != prefetcher->reconfigure_count))
991 {
993 uint32 max_inflight;
994
995 if (prefetcher->streaming_read)
996 lrq_free(prefetcher->streaming_read);
997
999 {
1001 max_inflight = maintenance_io_concurrency;
1003 }
1004 else
1005 {
1006 max_inflight = 1;
1007 max_distance = 1;
1008 }
1009
1010 prefetcher->streaming_read = lrq_alloc(max_distance,
1011 max_inflight,
1014
1015 prefetcher->reconfigure_count = XLogPrefetchReconfigureCount;
1016 }
1017
1018 /*
1019 * Release last returned record, if there is one, as it's now been
1020 * replayed.
1021 */
1023
1024 /*
1025 * Can we drop any filters yet? If we were waiting for a relation to be
1026 * created or extended, it is now OK to access blocks in the covered
1027 * range.
1028 */
1030
1031 /*
1032 * All IO initiated by earlier WAL is now completed. This might trigger
1033 * further prefetching.
1034 */
1035 lrq_complete_lsn(prefetcher->streaming_read, replayed_up_to);
1036
1037 /*
1038 * If there's nothing queued yet, then start prefetching to cause at least
1039 * one record to be queued.
1040 */
1042 {
1043 Assert(lrq_inflight(prefetcher->streaming_read) == 0);
1044 Assert(lrq_completed(prefetcher->streaming_read) == 0);
1045 lrq_prefetch(prefetcher->streaming_read);
1046 }
1047
1048 /* Read the next record. */
1049 record = XLogNextRecord(prefetcher->reader, errmsg);
1050 if (!record)
1051 return NULL;
1052
1053 /*
1054 * The record we just got is the "current" one, for the benefit of the
1055 * XLogRecXXX() macros.
1056 */
1057 Assert(record == prefetcher->reader->record);
1058
1059 /*
1060 * If maintenance_io_concurrency is set very low, we might have started
1061 * prefetching some but not all of the blocks referenced in the record
1062 * we're about to return. Forget about the rest of the blocks in this
1063 * record by dropping the prefetcher's reference to it.
1064 */
1065 if (record == prefetcher->record)
1066 prefetcher->record = NULL;
1067
1068 /*
1069 * See if it's time to compute some statistics, because enough WAL has
1070 * been processed.
1071 */
1072 if (unlikely(record->lsn >= prefetcher->next_stats_shm_lsn))
1074
1075 Assert(record == prefetcher->reader->record);
1076
1077 return &record->header;
1078}
int maintenance_io_concurrency
Definition bufmgr.c:191
int errmsg(const char *fmt,...)
Definition elog.c:1080
void XLogPrefetcherComputeStats(XLogPrefetcher *prefetcher)
static void XLogPrefetcherCompleteFilters(XLogPrefetcher *prefetcher, XLogRecPtr replaying_lsn)
static LsnReadQueueNextStatus XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
static LsnReadQueue * lrq_alloc(uint32 max_distance, uint32 max_inflight, uintptr_t lrq_private, LsnReadQueueNextFun next)
static void lrq_complete_lsn(LsnReadQueue *lrq, XLogRecPtr lsn)
#define XLOGPREFETCHER_DISTANCE_MULTIPLIER
DecodedXLogRecord * XLogNextRecord(XLogReaderState *state, char **errormsg)
Definition xlogreader.c:326
XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state)
Definition xlogreader.c:250

References Assert, errmsg(), fb(), DecodedXLogRecord::header, lrq_alloc(), lrq_complete_lsn(), lrq_completed(), lrq_free(), lrq_inflight(), lrq_prefetch(), DecodedXLogRecord::lsn, maintenance_io_concurrency, RecoveryPrefetchEnabled, unlikely, XLogNextRecord(), XLOGPREFETCHER_DISTANCE_MULTIPLIER, XLogPrefetcherCompleteFilters(), XLogPrefetcherComputeStats(), XLogPrefetcherNextBlock(), XLogPrefetchReconfigureCount, XLogReaderHasQueuedRecordOrError(), and XLogReleasePreviousRecord().

Referenced by ReadRecord().

◆ XLogPrefetchIncrement()

static void XLogPrefetchIncrement ( pg_atomic_uint64 counter)
inlinestatic

Definition at line 351 of file xlogprefetcher.c.

352{
354 pg_atomic_write_u64(counter, pg_atomic_read_u64(counter) + 1);
355}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
bool IsUnderPostmaster
Definition globals.c:120

References AmStartupProcess, Assert, IsUnderPostmaster, pg_atomic_read_u64(), and pg_atomic_write_u64().

Referenced by XLogPrefetcherNextBlock().

◆ XLogPrefetchReconfigure()

void XLogPrefetchReconfigure ( void  )

◆ XLogPrefetchResetStats()

◆ XLogPrefetchShmemInit()

void XLogPrefetchShmemInit ( void  )

◆ XLogPrefetchShmemSize()

size_t XLogPrefetchShmemSize ( void  )

Definition at line 294 of file xlogprefetcher.c.

295{
296 return sizeof(XLogPrefetchStats);
297}

Referenced by CalculateShmemSize().

Variable Documentation

◆ recovery_prefetch

int recovery_prefetch = RECOVERY_PREFETCH_TRY

Definition at line 68 of file xlogprefetcher.c.

Referenced by assign_recovery_prefetch().

◆ SharedStats

◆ XLogPrefetchReconfigureCount

int XLogPrefetchReconfigureCount = 0
static