PostgreSQL Source Code git master
Loading...
Searching...
No Matches
xlogprefetcher.c File Reference
#include "postgres.h"
#include "access/xlogprefetcher.h"
#include "access/xlogreader.h"
#include "catalog/pg_control.h"
#include "catalog/storage_xlog.h"
#include "commands/dbcommands_xlog.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "port/atomics.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "utils/fmgrprotos.h"
#include "utils/guc_hooks.h"
#include "utils/hsearch.h"
#include "utils/timestamp.h"
#include "utils/tuplestore.h"
Include dependency graph for xlogprefetcher.c:

Go to the source code of this file.

Data Structures

struct  LsnReadQueue
 
struct  XLogPrefetcher
 
struct  XLogPrefetcherFilter
 
struct  XLogPrefetchStats
 

Macros

#define XLOGPREFETCHER_STATS_DISTANCE   BLCKSZ
 
#define XLOGPREFETCHER_SEQ_WINDOW_SIZE   4
 
#define XLOGPREFETCHER_DISTANCE_MULTIPLIER   4
 
#define RecoveryPrefetchEnabled()   false
 
#define PG_STAT_GET_RECOVERY_PREFETCH_COLS   10
 

Typedefs

typedef LsnReadQueueNextStatus(* LsnReadQueueNextFun) (uintptr_t lrq_private, XLogRecPtr *lsn)
 
typedef struct LsnReadQueue LsnReadQueue
 
typedef struct XLogPrefetcherFilter XLogPrefetcherFilter
 
typedef struct XLogPrefetchStats XLogPrefetchStats
 

Enumerations

enum  LsnReadQueueNextStatus { LRQ_NEXT_NO_IO , LRQ_NEXT_IO , LRQ_NEXT_AGAIN }
 

Functions

static void XLogPrefetcherAddFilter (XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno, XLogRecPtr lsn)
 
static bool XLogPrefetcherIsFiltered (XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno)
 
static void XLogPrefetcherCompleteFilters (XLogPrefetcher *prefetcher, XLogRecPtr replaying_lsn)
 
static LsnReadQueueNextStatus XLogPrefetcherNextBlock (uintptr_t pgsr_private, XLogRecPtr *lsn)
 
static LsnReadQueuelrq_alloc (uint32 max_distance, uint32 max_inflight, uintptr_t lrq_private, LsnReadQueueNextFun next)
 
static void lrq_free (LsnReadQueue *lrq)
 
static uint32 lrq_inflight (LsnReadQueue *lrq)
 
static uint32 lrq_completed (LsnReadQueue *lrq)
 
static void lrq_prefetch (LsnReadQueue *lrq)
 
static void lrq_complete_lsn (LsnReadQueue *lrq, XLogRecPtr lsn)
 
size_t XLogPrefetchShmemSize (void)
 
void XLogPrefetchResetStats (void)
 
void XLogPrefetchShmemInit (void)
 
void XLogPrefetchReconfigure (void)
 
static void XLogPrefetchIncrement (pg_atomic_uint64 *counter)
 
XLogPrefetcherXLogPrefetcherAllocate (XLogReaderState *reader)
 
void XLogPrefetcherFree (XLogPrefetcher *prefetcher)
 
XLogReaderStateXLogPrefetcherGetReader (XLogPrefetcher *prefetcher)
 
void XLogPrefetcherComputeStats (XLogPrefetcher *prefetcher)
 
Datum pg_stat_get_recovery_prefetch (PG_FUNCTION_ARGS)
 
void XLogPrefetcherBeginRead (XLogPrefetcher *prefetcher, XLogRecPtr recPtr)
 
XLogRecordXLogPrefetcherReadRecord (XLogPrefetcher *prefetcher, char **errmsg)
 
bool check_recovery_prefetch (int *new_value, void **extra, GucSource source)
 
void assign_recovery_prefetch (int new_value, void *extra)
 

Variables

int recovery_prefetch = RECOVERY_PREFETCH_TRY
 
static int XLogPrefetchReconfigureCount = 0
 
static XLogPrefetchStatsSharedStats
 

Macro Definition Documentation

◆ PG_STAT_GET_RECOVERY_PREFETCH_COLS

#define PG_STAT_GET_RECOVERY_PREFETCH_COLS   10

◆ RecoveryPrefetchEnabled

#define RecoveryPrefetchEnabled ( )    false

Definition at line 77 of file xlogprefetcher.c.

◆ XLOGPREFETCHER_DISTANCE_MULTIPLIER

#define XLOGPREFETCHER_DISTANCE_MULTIPLIER   4

Definition at line 64 of file xlogprefetcher.c.

◆ XLOGPREFETCHER_SEQ_WINDOW_SIZE

#define XLOGPREFETCHER_SEQ_WINDOW_SIZE   4

Definition at line 58 of file xlogprefetcher.c.

◆ XLOGPREFETCHER_STATS_DISTANCE

#define XLOGPREFETCHER_STATS_DISTANCE   BLCKSZ

Definition at line 52 of file xlogprefetcher.c.

Typedef Documentation

◆ LsnReadQueue

◆ LsnReadQueueNextFun

typedef LsnReadQueueNextStatus(* LsnReadQueueNextFun) (uintptr_t lrq_private, XLogRecPtr *lsn)

Definition at line 96 of file xlogprefetcher.c.

◆ XLogPrefetcherFilter

◆ XLogPrefetchStats

Enumeration Type Documentation

◆ LsnReadQueueNextStatus

Enumerator
LRQ_NEXT_NO_IO 
LRQ_NEXT_IO 
LRQ_NEXT_AGAIN 

Definition at line 85 of file xlogprefetcher.c.

86{
LsnReadQueueNextStatus
@ LRQ_NEXT_NO_IO
@ LRQ_NEXT_IO
@ LRQ_NEXT_AGAIN

Function Documentation

◆ assign_recovery_prefetch()

void assign_recovery_prefetch ( int  new_value,
void extra 
)

Definition at line 1097 of file xlogprefetcher.c.

1098{
1099 /* Reconfigure prefetching, because a setting it depends on changed. */
1101 if (AmStartupProcess())
1103}
#define AmStartupProcess()
Definition miscadmin.h:390
static int fb(int x)
int recovery_prefetch
void XLogPrefetchReconfigure(void)

References AmStartupProcess, fb(), recovery_prefetch, and XLogPrefetchReconfigure().

◆ check_recovery_prefetch()

bool check_recovery_prefetch ( int new_value,
void **  extra,
GucSource  source 
)

Definition at line 1083 of file xlogprefetcher.c.

1084{
1085#ifndef USE_PREFETCH
1087 {
1088 GUC_check_errdetail("\"recovery_prefetch\" is not supported on platforms that lack support for issuing read-ahead advice.");
1089 return false;
1090 }
1091#endif
1092
1093 return true;
1094}
#define GUC_check_errdetail
Definition guc.h:507
@ RECOVERY_PREFETCH_ON

References fb(), GUC_check_errdetail, and RECOVERY_PREFETCH_ON.

◆ lrq_alloc()

static LsnReadQueue * lrq_alloc ( uint32  max_distance,
uint32  max_inflight,
uintptr_t  lrq_private,
LsnReadQueueNextFun  next 
)
inlinestatic

Definition at line 204 of file xlogprefetcher.c.

208{
210 uint32 size;
211
212 Assert(max_distance >= max_inflight);
213
214 size = max_distance + 1; /* full ring buffer has a gap */
215 lrq = palloc(offsetof(LsnReadQueue, queue) + sizeof(lrq->queue[0]) * size);
216 lrq->lrq_private = lrq_private;
217 lrq->max_inflight = max_inflight;
218 lrq->size = size;
219 lrq->next = next;
220 lrq->head = 0;
221 lrq->tail = 0;
222 lrq->inflight = 0;
223 lrq->completed = 0;
224
225 return lrq;
226}
static int32 next
Definition blutils.c:225
#define Assert(condition)
Definition c.h:943
uint32_t uint32
Definition c.h:624
void * palloc(Size size)
Definition mcxt.c:1387

References Assert, fb(), next, and palloc().

Referenced by XLogPrefetcherReadRecord().

◆ lrq_complete_lsn()

static void lrq_complete_lsn ( LsnReadQueue lrq,
XLogRecPtr  lsn 
)
inlinestatic

Definition at line 274 of file xlogprefetcher.c.

275{
276 /*
277 * We know that LSNs before 'lsn' have been replayed, so we can now assume
278 * that any IOs that were started before then have finished.
279 */
280 while (lrq->tail != lrq->head &&
281 lrq->queue[lrq->tail].lsn < lsn)
282 {
283 if (lrq->queue[lrq->tail].io)
284 lrq->inflight--;
285 else
286 lrq->completed--;
287 lrq->tail++;
288 if (lrq->tail == lrq->size)
289 lrq->tail = 0;
290 }
293}
#define RecoveryPrefetchEnabled()
static void lrq_prefetch(LsnReadQueue *lrq)

References fb(), lrq_prefetch(), and RecoveryPrefetchEnabled.

Referenced by XLogPrefetcherReadRecord().

◆ lrq_completed()

static uint32 lrq_completed ( LsnReadQueue lrq)
inlinestatic

Definition at line 241 of file xlogprefetcher.c.

242{
243 return lrq->completed;
244}

References fb().

Referenced by XLogPrefetcherComputeStats(), and XLogPrefetcherReadRecord().

◆ lrq_free()

static void lrq_free ( LsnReadQueue lrq)
inlinestatic

Definition at line 229 of file xlogprefetcher.c.

230{
231 pfree(lrq);
232}
void pfree(void *pointer)
Definition mcxt.c:1616

References fb(), and pfree().

Referenced by XLogPrefetcherFree(), and XLogPrefetcherReadRecord().

◆ lrq_inflight()

static uint32 lrq_inflight ( LsnReadQueue lrq)
inlinestatic

Definition at line 235 of file xlogprefetcher.c.

236{
237 return lrq->inflight;
238}

References fb().

Referenced by XLogPrefetcherComputeStats(), and XLogPrefetcherReadRecord().

◆ lrq_prefetch()

static void lrq_prefetch ( LsnReadQueue lrq)
inlinestatic

Definition at line 247 of file xlogprefetcher.c.

248{
249 /* Try to start as many IOs as we can within our limits. */
250 while (lrq->inflight < lrq->max_inflight &&
251 lrq->inflight + lrq->completed < lrq->size - 1)
252 {
253 Assert(((lrq->head + 1) % lrq->size) != lrq->tail);
254 switch (lrq->next(lrq->lrq_private, &lrq->queue[lrq->head].lsn))
255 {
256 case LRQ_NEXT_AGAIN:
257 return;
258 case LRQ_NEXT_IO:
259 lrq->queue[lrq->head].io = true;
260 lrq->inflight++;
261 break;
262 case LRQ_NEXT_NO_IO:
263 lrq->queue[lrq->head].io = false;
264 lrq->completed++;
265 break;
266 }
267 lrq->head++;
268 if (lrq->head == lrq->size)
269 lrq->head = 0;
270 }
271}

References Assert, fb(), LRQ_NEXT_AGAIN, LRQ_NEXT_IO, and LRQ_NEXT_NO_IO.

Referenced by lrq_complete_lsn(), and XLogPrefetcherReadRecord().

◆ pg_stat_get_recovery_prefetch()

Datum pg_stat_get_recovery_prefetch ( PG_FUNCTION_ARGS  )

Definition at line 826 of file xlogprefetcher.c.

827{
828#define PG_STAT_GET_RECOVERY_PREFETCH_COLS 10
829 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
832
833 InitMaterializedSRF(fcinfo, 0);
834
835 for (int i = 0; i < PG_STAT_GET_RECOVERY_PREFETCH_COLS; ++i)
836 nulls[i] = false;
837
848 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
849
850 return (Datum) 0;
851}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
static Datum values[MAXATTR]
Definition bootstrap.c:188
void InitMaterializedSRF(FunctionCallInfo fcinfo, uint32 flags)
Definition funcapi.c:76
int i
Definition isn.c:77
static Datum Int64GetDatum(int64 X)
Definition postgres.h:413
uint64_t Datum
Definition postgres.h:70
static Datum Int32GetDatum(int32 X)
Definition postgres.h:212
pg_atomic_uint64 skip_fpw
pg_atomic_uint64 skip_init
pg_atomic_uint64 reset_time
pg_atomic_uint64 hit
pg_atomic_uint64 prefetch
pg_atomic_uint64 skip_rep
pg_atomic_uint64 skip_new
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition tuplestore.c:785
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
static XLogPrefetchStats * SharedStats
#define PG_STAT_GET_RECOVERY_PREFETCH_COLS

References XLogPrefetchStats::block_distance, fb(), XLogPrefetchStats::hit, i, InitMaterializedSRF(), Int32GetDatum(), Int64GetDatum(), XLogPrefetchStats::io_depth, pg_atomic_read_u64(), PG_STAT_GET_RECOVERY_PREFETCH_COLS, XLogPrefetchStats::prefetch, XLogPrefetchStats::reset_time, SharedStats, XLogPrefetchStats::skip_fpw, XLogPrefetchStats::skip_init, XLogPrefetchStats::skip_new, XLogPrefetchStats::skip_rep, TimestampTzGetDatum(), tuplestore_putvalues(), values, and XLogPrefetchStats::wal_distance.

◆ XLogPrefetcherAddFilter()

static void XLogPrefetcherAddFilter ( XLogPrefetcher prefetcher,
RelFileLocator  rlocator,
BlockNumber  blockno,
XLogRecPtr  lsn 
)
inlinestatic

Definition at line 858 of file xlogprefetcher.c.

860{
861 XLogPrefetcherFilter *filter;
862 bool found;
863
864 filter = hash_search(prefetcher->filter_table, &rlocator, HASH_ENTER, &found);
865 if (!found)
866 {
867 /*
868 * Don't allow any prefetching of this block or higher until replayed.
869 */
870 filter->filter_until_replayed = lsn;
871 filter->filter_from_block = blockno;
872 dlist_push_head(&prefetcher->filter_queue, &filter->link);
873 }
874 else
875 {
876 /*
877 * We were already filtering this rlocator. Extend the filter's
878 * lifetime to cover this WAL record, but leave the lower of the block
879 * numbers there because we don't want to have to track individual
880 * blocks.
881 */
882 filter->filter_until_replayed = lsn;
883 dlist_delete(&filter->link);
884 dlist_push_head(&prefetcher->filter_queue, &filter->link);
885 filter->filter_from_block = Min(filter->filter_from_block, blockno);
886 }
887}
#define Min(x, y)
Definition c.h:1091
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:938
@ HASH_ENTER
Definition hsearch.h:112
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
static void dlist_push_head(dlist_head *head, dlist_node *node)
Definition ilist.h:347
XLogRecPtr filter_until_replayed
BlockNumber filter_from_block

References dlist_delete(), dlist_push_head(), fb(), XLogPrefetcherFilter::filter_from_block, XLogPrefetcherFilter::filter_until_replayed, HASH_ENTER, hash_search(), XLogPrefetcherFilter::link, and Min.

Referenced by XLogPrefetcherNextBlock().

◆ XLogPrefetcherAllocate()

XLogPrefetcher * XLogPrefetcherAllocate ( XLogReaderState reader)

Definition at line 364 of file xlogprefetcher.c.

365{
367 HASHCTL ctl;
368
370 prefetcher->reader = reader;
371
372 ctl.keysize = sizeof(RelFileLocator);
373 ctl.entrysize = sizeof(XLogPrefetcherFilter);
374 prefetcher->filter_table = hash_create("XLogPrefetcherFilterTable", 1024,
376 dlist_init(&prefetcher->filter_queue);
377
381
382 /* First usage will cause streaming_read to be allocated. */
383 prefetcher->reconfigure_count = XLogPrefetchReconfigureCount - 1;
384
385 return prefetcher;
386}
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:354
#define palloc0_object(type)
Definition fe_memutils.h:75
#define HASH_ELEM
Definition hsearch.h:93
#define HASH_BLOBS
Definition hsearch.h:95
static void dlist_init(dlist_head *head)
Definition ilist.h:314
tree ctl
Definition radixtree.h:1838
static int XLogPrefetchReconfigureCount

References XLogPrefetchStats::block_distance, ctl, dlist_init(), fb(), HASH_BLOBS, hash_create(), HASH_ELEM, XLogPrefetchStats::io_depth, palloc0_object, SharedStats, XLogPrefetchStats::wal_distance, and XLogPrefetchReconfigureCount.

Referenced by InitWalRecovery().

◆ XLogPrefetcherBeginRead()

void XLogPrefetcherBeginRead ( XLogPrefetcher prefetcher,
XLogRecPtr  recPtr 
)

Definition at line 964 of file xlogprefetcher.c.

965{
966 /* This will forget about any in-flight IO. */
967 prefetcher->reconfigure_count--;
968
969 /* Book-keeping to avoid readahead on first read. */
970 prefetcher->begin_ptr = recPtr;
971
972 prefetcher->no_readahead_until = InvalidXLogRecPtr;
973
974 /* This will forget about any queued up records in the decoder. */
976}
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition xlogreader.c:233

References fb(), InvalidXLogRecPtr, and XLogBeginRead().

Referenced by FinishWalRecovery(), InitWalRecovery(), PerformWalRecovery(), and ReadCheckpointRecord().

◆ XLogPrefetcherCompleteFilters()

static void XLogPrefetcherCompleteFilters ( XLogPrefetcher prefetcher,
XLogRecPtr  replaying_lsn 
)
inlinestatic

Definition at line 896 of file xlogprefetcher.c.

897{
898 while (unlikely(!dlist_is_empty(&prefetcher->filter_queue)))
899 {
901 link,
902 &prefetcher->filter_queue);
903
905 break;
906
907 dlist_delete(&filter->link);
908 hash_search(prefetcher->filter_table, filter, HASH_REMOVE, NULL);
909 }
910}
#define unlikely(x)
Definition c.h:438
@ HASH_REMOVE
Definition hsearch.h:113
#define dlist_tail_element(type, membername, lhead)
Definition ilist.h:612
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336

References dlist_delete(), dlist_is_empty(), dlist_tail_element, fb(), XLogPrefetcherFilter::filter_until_replayed, HASH_REMOVE, hash_search(), XLogPrefetcherFilter::link, and unlikely.

Referenced by XLogPrefetcherReadRecord().

◆ XLogPrefetcherComputeStats()

void XLogPrefetcherComputeStats ( XLogPrefetcher prefetcher)

Definition at line 412 of file xlogprefetcher.c.

413{
414 uint32 io_depth;
415 uint32 completed;
416 int64 wal_distance;
417
418
419 /* How far ahead of replay are we now? */
420 if (prefetcher->reader->decode_queue_tail)
421 {
422 wal_distance =
423 prefetcher->reader->decode_queue_tail->lsn -
424 prefetcher->reader->decode_queue_head->lsn;
425 }
426 else
427 {
428 wal_distance = 0;
429 }
430
431 /* How many IOs are currently in flight and completed? */
432 io_depth = lrq_inflight(prefetcher->streaming_read);
433 completed = lrq_completed(prefetcher->streaming_read);
434
435 /* Update the instantaneous stats visible in pg_stat_recovery_prefetch. */
436 SharedStats->io_depth = io_depth;
437 SharedStats->block_distance = io_depth + completed;
438 SharedStats->wal_distance = wal_distance;
439
440 prefetcher->next_stats_shm_lsn =
441 prefetcher->reader->ReadRecPtr + XLOGPREFETCHER_STATS_DISTANCE;
442}
int64_t int64
Definition c.h:621
static uint32 lrq_completed(LsnReadQueue *lrq)
static uint32 lrq_inflight(LsnReadQueue *lrq)
#define XLOGPREFETCHER_STATS_DISTANCE

References XLogPrefetchStats::block_distance, XLogReaderState::decode_queue_tail, fb(), XLogPrefetchStats::io_depth, lrq_completed(), lrq_inflight(), DecodedXLogRecord::lsn, SharedStats, XLogPrefetchStats::wal_distance, and XLOGPREFETCHER_STATS_DISTANCE.

Referenced by ShutdownWalRecovery(), WaitForWALToBecomeAvailable(), and XLogPrefetcherReadRecord().

◆ XLogPrefetcherFree()

void XLogPrefetcherFree ( XLogPrefetcher prefetcher)

Definition at line 392 of file xlogprefetcher.c.

393{
394 lrq_free(prefetcher->streaming_read);
395 hash_destroy(prefetcher->filter_table);
397}
void hash_destroy(HTAB *hashp)
Definition dynahash.c:851
static void lrq_free(LsnReadQueue *lrq)

References fb(), hash_destroy(), lrq_free(), and pfree().

Referenced by ShutdownWalRecovery().

◆ XLogPrefetcherGetReader()

XLogReaderState * XLogPrefetcherGetReader ( XLogPrefetcher prefetcher)

Definition at line 403 of file xlogprefetcher.c.

404{
405 return prefetcher->reader;
406}

References fb().

Referenced by ReadRecord().

◆ XLogPrefetcherIsFiltered()

static bool XLogPrefetcherIsFiltered ( XLogPrefetcher prefetcher,
RelFileLocator  rlocator,
BlockNumber  blockno 
)
inlinestatic

Definition at line 916 of file xlogprefetcher.c.

918{
919 /*
920 * Test for empty queue first, because we expect it to be empty most of
921 * the time and we can avoid the hash table lookup in that case.
922 */
923 if (unlikely(!dlist_is_empty(&prefetcher->filter_queue)))
924 {
925 XLogPrefetcherFilter *filter;
926
927 /* See if the block range is filtered. */
928 filter = hash_search(prefetcher->filter_table, &rlocator, HASH_FIND, NULL);
929 if (filter && filter->filter_from_block <= blockno)
930 {
931#ifdef XLOGPREFETCHER_DEBUG_LEVEL
933 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (blocks >= %u filtered)",
934 rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
936 filter->filter_from_block);
937#endif
938 return true;
939 }
940
941 /* See if the whole database is filtered. */
943 rlocator.spcOid = InvalidOid;
944 filter = hash_search(prefetcher->filter_table, &rlocator, HASH_FIND, NULL);
945 if (filter)
946 {
947#ifdef XLOGPREFETCHER_DEBUG_LEVEL
949 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (whole database)",
950 rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
952#endif
953 return true;
954 }
955 }
956
957 return false;
958}
#define elog(elevel,...)
Definition elog.h:227
@ HASH_FIND
Definition hsearch.h:111
#define InvalidOid
#define InvalidRelFileNumber
Definition relpath.h:26
RelFileNumber relNumber
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47

References RelFileLocator::dbOid, dlist_is_empty(), elog, fb(), XLogPrefetcherFilter::filter_from_block, XLogPrefetcherFilter::filter_until_replayed, HASH_FIND, hash_search(), InvalidOid, InvalidRelFileNumber, LSN_FORMAT_ARGS, RelFileLocator::relNumber, RelFileLocator::spcOid, and unlikely.

Referenced by XLogPrefetcherNextBlock().

◆ XLogPrefetcherNextBlock()

static LsnReadQueueNextStatus XLogPrefetcherNextBlock ( uintptr_t  pgsr_private,
XLogRecPtr lsn 
)
static

Definition at line 461 of file xlogprefetcher.c.

462{
464 XLogReaderState *reader = prefetcher->reader;
466
467 /*
468 * We keep track of the record and block we're up to between calls with
469 * prefetcher->record and prefetcher->next_block_id.
470 */
471 for (;;)
472 {
473 DecodedXLogRecord *record;
474
475 /* Try to read a new future record, if we don't already have one. */
476 if (prefetcher->record == NULL)
477 {
478 bool nonblocking;
479
480 /*
481 * If there are already records or an error queued up that could
482 * be replayed, we don't want to block here. Otherwise, it's OK
483 * to block waiting for more data: presumably the caller has
484 * nothing else to do.
485 */
486 nonblocking = XLogReaderHasQueuedRecordOrError(reader);
487
488 /* Readahead is disabled until we replay past a certain point. */
489 if (nonblocking && replaying_lsn <= prefetcher->no_readahead_until)
490 return LRQ_NEXT_AGAIN;
491
492 record = XLogReadAhead(prefetcher->reader, nonblocking);
493 if (record == NULL)
494 {
495 /*
496 * We can't read any more, due to an error or lack of data in
497 * nonblocking mode. Don't try to read ahead again until
498 * we've replayed everything already decoded.
499 */
500 if (nonblocking && prefetcher->reader->decode_queue_tail)
501 prefetcher->no_readahead_until =
502 prefetcher->reader->decode_queue_tail->lsn;
503
504 return LRQ_NEXT_AGAIN;
505 }
506
507 /*
508 * If prefetching is disabled, we don't need to analyze the record
509 * or issue any prefetches. We just need to cause one record to
510 * be decoded.
511 */
513 {
514 *lsn = InvalidXLogRecPtr;
515 return LRQ_NEXT_NO_IO;
516 }
517
518 /* We have a new record to process. */
519 prefetcher->record = record;
520 prefetcher->next_block_id = 0;
521 }
522 else
523 {
524 /* Continue to process from last call, or last loop. */
525 record = prefetcher->record;
526 }
527
528 /*
529 * Check for operations that require us to filter out block ranges, or
530 * pause readahead completely.
531 */
533 {
534 uint8 rmid = record->header.xl_rmid;
535 uint8 record_type = record->header.xl_info & ~XLR_INFO_MASK;
536
537 if (rmid == RM_XLOG_ID)
538 {
539 if (record_type == XLOG_CHECKPOINT_SHUTDOWN ||
540 record_type == XLOG_END_OF_RECOVERY)
541 {
542 /*
543 * These records might change the TLI. Avoid potential
544 * bugs if we were to allow "read TLI" and "replay TLI" to
545 * differ without more analysis.
546 */
547 prefetcher->no_readahead_until = record->lsn;
548
549#ifdef XLOGPREFETCHER_DEBUG_LEVEL
551 "suppressing all readahead until %X/%08X is replayed due to possible TLI change",
552 LSN_FORMAT_ARGS(record->lsn));
553#endif
554
555 /* Fall through so we move past this record. */
556 }
557 }
558 else if (rmid == RM_DBASE_ID)
559 {
560 /*
561 * When databases are created with the file-copy strategy,
562 * there are no WAL records to tell us about the creation of
563 * individual relations.
564 */
565 if (record_type == XLOG_DBASE_CREATE_FILE_COPY)
566 {
569 RelFileLocator rlocator =
571
572 /*
573 * Don't try to prefetch anything in this database until
574 * it has been created, or we might confuse the blocks of
575 * different generations, if a database OID or
576 * relfilenumber is reused. It's also more efficient than
577 * discovering that relations don't exist on disk yet with
578 * ENOENT errors.
579 */
580 XLogPrefetcherAddFilter(prefetcher, rlocator, 0, record->lsn);
581
582#ifdef XLOGPREFETCHER_DEBUG_LEVEL
584 "suppressing prefetch in database %u until %X/%08X is replayed due to raw file copy",
585 rlocator.dbOid,
586 LSN_FORMAT_ARGS(record->lsn));
587#endif
588 }
589 }
590 else if (rmid == RM_SMGR_ID)
591 {
592 if (record_type == XLOG_SMGR_CREATE)
593 {
595 record->main_data;
596
597 if (xlrec->forkNum == MAIN_FORKNUM)
598 {
599 /*
600 * Don't prefetch anything for this whole relation
601 * until it has been created. Otherwise we might
602 * confuse the blocks of different generations, if a
603 * relfilenumber is reused. This also avoids the need
604 * to discover the problem via extra syscalls that
605 * report ENOENT.
606 */
608 record->lsn);
609
610#ifdef XLOGPREFETCHER_DEBUG_LEVEL
612 "suppressing prefetch in relation %u/%u/%u until %X/%08X is replayed, which creates the relation",
613 xlrec->rlocator.spcOid,
614 xlrec->rlocator.dbOid,
615 xlrec->rlocator.relNumber,
616 LSN_FORMAT_ARGS(record->lsn));
617#endif
618 }
619 }
620 else if (record_type == XLOG_SMGR_TRUNCATE)
621 {
623 record->main_data;
624
625 /*
626 * Don't consider prefetching anything in the truncated
627 * range until the truncation has been performed.
628 */
630 xlrec->blkno,
631 record->lsn);
632
633#ifdef XLOGPREFETCHER_DEBUG_LEVEL
635 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, which truncates the relation",
636 xlrec->rlocator.spcOid,
637 xlrec->rlocator.dbOid,
638 xlrec->rlocator.relNumber,
639 xlrec->blkno,
640 LSN_FORMAT_ARGS(record->lsn));
641#endif
642 }
643 }
644 }
645
646 /* Scan the block references, starting where we left off last time. */
647 while (prefetcher->next_block_id <= record->max_block_id)
648 {
649 int block_id = prefetcher->next_block_id++;
650 DecodedBkpBlock *block = &record->blocks[block_id];
653
654 if (!block->in_use)
655 continue;
656
658
659 /*
660 * Record the LSN of this record. When it's replayed,
661 * LsnReadQueue will consider any IOs submitted for earlier LSNs
662 * to be finished.
663 */
664 *lsn = record->lsn;
665
666 /* We don't try to prefetch anything but the main fork for now. */
667 if (block->forknum != MAIN_FORKNUM)
668 {
669 return LRQ_NEXT_NO_IO;
670 }
671
672 /*
673 * If there is a full page image attached, we won't be reading the
674 * page, so don't bother trying to prefetch.
675 */
676 if (block->has_image)
677 {
679 return LRQ_NEXT_NO_IO;
680 }
681
682 /* There is no point in reading a page that will be zeroed. */
683 if (block->flags & BKPBLOCK_WILL_INIT)
684 {
686 return LRQ_NEXT_NO_IO;
687 }
688
689 /* Should we skip prefetching this block due to a filter? */
691 {
693 return LRQ_NEXT_NO_IO;
694 }
695
696 /* There is no point in repeatedly prefetching the same block. */
697 for (int i = 0; i < XLOGPREFETCHER_SEQ_WINDOW_SIZE; ++i)
698 {
699 if (block->blkno == prefetcher->recent_block[i] &&
700 RelFileLocatorEquals(block->rlocator, prefetcher->recent_rlocator[i]))
701 {
702 /*
703 * XXX If we also remembered where it was, we could set
704 * recent_buffer so that recovery could skip smgropen()
705 * and a buffer table lookup.
706 */
708 return LRQ_NEXT_NO_IO;
709 }
710 }
711 prefetcher->recent_rlocator[prefetcher->recent_idx] = block->rlocator;
712 prefetcher->recent_block[prefetcher->recent_idx] = block->blkno;
713 prefetcher->recent_idx =
714 (prefetcher->recent_idx + 1) % XLOGPREFETCHER_SEQ_WINDOW_SIZE;
715
716 /*
717 * We could try to have a fast path for repeated references to the
718 * same relation (with some scheme to handle invalidations
719 * safely), but for now we'll call smgropen() every time.
720 */
722
723 /*
724 * If the relation file doesn't exist on disk, for example because
725 * we're replaying after a crash and the file will be created and
726 * then unlinked by WAL that hasn't been replayed yet, suppress
727 * further prefetching in the relation until this record is
728 * replayed.
729 */
731 {
732#ifdef XLOGPREFETCHER_DEBUG_LEVEL
734 "suppressing all prefetch in relation %u/%u/%u until %X/%08X is replayed, because the relation does not exist on disk",
735 reln->smgr_rlocator.locator.spcOid,
736 reln->smgr_rlocator.locator.dbOid,
737 reln->smgr_rlocator.locator.relNumber,
738 LSN_FORMAT_ARGS(record->lsn));
739#endif
741 record->lsn);
743 return LRQ_NEXT_NO_IO;
744 }
745
746 /*
747 * If the relation isn't big enough to contain the referenced
748 * block yet, suppress prefetching of this block and higher until
749 * this record is replayed.
750 */
751 if (block->blkno >= smgrnblocks(reln, block->forknum))
752 {
753#ifdef XLOGPREFETCHER_DEBUG_LEVEL
755 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, because the relation is too small",
756 reln->smgr_rlocator.locator.spcOid,
757 reln->smgr_rlocator.locator.dbOid,
758 reln->smgr_rlocator.locator.relNumber,
759 block->blkno,
760 LSN_FORMAT_ARGS(record->lsn));
761#endif
763 record->lsn);
765 return LRQ_NEXT_NO_IO;
766 }
767
768 /* Try to initiate prefetching. */
769 result = PrefetchSharedBuffer(reln, block->forknum, block->blkno);
770 if (BufferIsValid(result.recent_buffer))
771 {
772 /* Cache hit, nothing to do. */
774 block->prefetch_buffer = result.recent_buffer;
775 return LRQ_NEXT_NO_IO;
776 }
777 else if (result.initiated_io)
778 {
779 /* Cache miss, I/O (presumably) started. */
782 return LRQ_NEXT_IO;
783 }
784 else if ((io_direct_flags & IO_DIRECT_DATA) == 0)
785 {
786 /*
787 * This shouldn't be possible, because we already determined
788 * that the relation exists on disk and is big enough.
789 * Something is wrong with the cache invalidation for
790 * smgrexists(), smgrnblocks(), or the file was unlinked or
791 * truncated beneath our feet?
792 */
793 elog(ERROR,
794 "could not prefetch relation %u/%u/%u block %u",
795 reln->smgr_rlocator.locator.spcOid,
796 reln->smgr_rlocator.locator.dbOid,
797 reln->smgr_rlocator.locator.relNumber,
798 block->blkno);
799 }
800 }
801
802 /*
803 * Several callsites need to be able to read exactly one record
804 * without any internal readahead. Examples: xlog.c reading
805 * checkpoint records with emode set to PANIC, which might otherwise
806 * cause XLogPageRead() to panic on some future page, and xlog.c
807 * determining where to start writing WAL next, which depends on the
808 * contents of the reader's internal buffer after reading one record.
809 * Therefore, don't even think about prefetching until the first
810 * record after XLogPrefetcherBeginRead() has been consumed.
811 */
812 if (prefetcher->reader->decode_queue_tail &&
813 prefetcher->reader->decode_queue_tail->lsn == prefetcher->begin_ptr)
814 return LRQ_NEXT_AGAIN;
815
816 /* Advance to the next record. */
817 prefetcher->record = NULL;
818 }
820}
#define InvalidBuffer
Definition buf.h:25
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:697
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:423
uint8_t uint8
Definition c.h:622
#define pg_unreachable()
Definition c.h:367
#define XLOG_DBASE_CREATE_FILE_COPY
#define ERROR
Definition elog.h:39
int io_direct_flags
Definition fd.c:172
#define IO_DIRECT_DATA
Definition fd.h:54
#define XLOG_CHECKPOINT_SHUTDOWN
Definition pg_control.h:69
#define XLOG_END_OF_RECOVERY
Definition pg_control.h:78
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
#define RelFileLocatorEquals(locator1, locator2)
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:819
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:462
#define XLOG_SMGR_CREATE
#define XLOG_SMGR_TRUNCATE
Buffer prefetch_buffer
Definition xlogreader.h:130
RelFileLocator rlocator
Definition xlogreader.h:125
BlockNumber blkno
Definition xlogreader.h:127
ForkNumber forknum
Definition xlogreader.h:126
XLogRecord header
Definition xlogreader.h:165
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]
Definition xlogreader.h:171
Buffer recent_buffer
Definition bufmgr.h:61
XLogRecPtr ReadRecPtr
Definition xlogreader.h:205
uint8 xl_info
Definition xlogrecord.h:46
RmgrId xl_rmid
Definition xlogrecord.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
static bool XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno)
static void XLogPrefetchIncrement(pg_atomic_uint64 *counter)
#define XLOGPREFETCHER_SEQ_WINDOW_SIZE
static void XLogPrefetcherAddFilter(XLogPrefetcher *prefetcher, RelFileLocator rlocator, BlockNumber blockno, XLogRecPtr lsn)
DecodedXLogRecord * XLogReadAhead(XLogReaderState *state, bool nonblocking)
Definition xlogreader.c:978
static bool XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
Definition xlogreader.h:324
#define BKPBLOCK_WILL_INIT
Definition xlogrecord.h:199

References Assert, BKPBLOCK_WILL_INIT, DecodedBkpBlock::blkno, DecodedXLogRecord::blocks, BufferIsValid(), RelFileLocator::dbOid, elog, ERROR, fb(), DecodedBkpBlock::flags, DecodedBkpBlock::forknum, DecodedBkpBlock::has_image, DecodedXLogRecord::header, XLogPrefetchStats::hit, i, DecodedBkpBlock::in_use, PrefetchBufferResult::initiated_io, INVALID_PROC_NUMBER, InvalidBuffer, InvalidOid, InvalidRelFileNumber, InvalidXLogRecPtr, IO_DIRECT_DATA, io_direct_flags, LRQ_NEXT_AGAIN, LRQ_NEXT_IO, LRQ_NEXT_NO_IO, DecodedXLogRecord::lsn, LSN_FORMAT_ARGS, DecodedXLogRecord::main_data, MAIN_FORKNUM, DecodedXLogRecord::max_block_id, pg_unreachable, XLogPrefetchStats::prefetch, DecodedBkpBlock::prefetch_buffer, PrefetchSharedBuffer(), XLogReaderState::ReadRecPtr, PrefetchBufferResult::recent_buffer, RecoveryPrefetchEnabled, RelFileLocatorEquals, DecodedBkpBlock::rlocator, SharedStats, XLogPrefetchStats::skip_fpw, XLogPrefetchStats::skip_init, XLogPrefetchStats::skip_new, XLogPrefetchStats::skip_rep, smgrexists(), smgrnblocks(), smgropen(), XLogRecord::xl_info, XLogRecord::xl_rmid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_DBASE_CREATE_FILE_COPY, XLOG_END_OF_RECOVERY, XLOG_SMGR_CREATE, XLOG_SMGR_TRUNCATE, XLOGPREFETCHER_SEQ_WINDOW_SIZE, XLogPrefetcherAddFilter(), XLogPrefetcherIsFiltered(), XLogPrefetchIncrement(), XLogReadAhead(), and XLogReaderHasQueuedRecordOrError().

Referenced by XLogPrefetcherReadRecord().

◆ XLogPrefetcherReadRecord()

XLogRecord * XLogPrefetcherReadRecord ( XLogPrefetcher prefetcher,
char **  errmsg 
)

Definition at line 983 of file xlogprefetcher.c.

984{
985 DecodedXLogRecord *record;
987
988 /*
989 * See if it's time to reset the prefetching machinery, because a relevant
990 * GUC was changed.
991 */
992 if (unlikely(XLogPrefetchReconfigureCount != prefetcher->reconfigure_count))
993 {
995 uint32 max_inflight;
996
997 if (prefetcher->streaming_read)
998 lrq_free(prefetcher->streaming_read);
999
1001 {
1003 max_inflight = maintenance_io_concurrency;
1005 }
1006 else
1007 {
1008 max_inflight = 1;
1009 max_distance = 1;
1010 }
1011
1012 prefetcher->streaming_read = lrq_alloc(max_distance,
1013 max_inflight,
1016
1017 prefetcher->reconfigure_count = XLogPrefetchReconfigureCount;
1018 }
1019
1020 /*
1021 * Release last returned record, if there is one, as it's now been
1022 * replayed.
1023 */
1025
1026 /*
1027 * Can we drop any filters yet? If we were waiting for a relation to be
1028 * created or extended, it is now OK to access blocks in the covered
1029 * range.
1030 */
1032
1033 /*
1034 * All IO initiated by earlier WAL is now completed. This might trigger
1035 * further prefetching.
1036 */
1037 lrq_complete_lsn(prefetcher->streaming_read, replayed_up_to);
1038
1039 /*
1040 * If there's nothing queued yet, then start prefetching to cause at least
1041 * one record to be queued.
1042 */
1044 {
1045 Assert(lrq_inflight(prefetcher->streaming_read) == 0);
1046 Assert(lrq_completed(prefetcher->streaming_read) == 0);
1047 lrq_prefetch(prefetcher->streaming_read);
1048 }
1049
1050 /* Read the next record. */
1051 record = XLogNextRecord(prefetcher->reader, errmsg);
1052 if (!record)
1053 return NULL;
1054
1055 /*
1056 * The record we just got is the "current" one, for the benefit of the
1057 * XLogRecXXX() macros.
1058 */
1059 Assert(record == prefetcher->reader->record);
1060
1061 /*
1062 * If maintenance_io_concurrency is set very low, we might have started
1063 * prefetching some but not all of the blocks referenced in the record
1064 * we're about to return. Forget about the rest of the blocks in this
1065 * record by dropping the prefetcher's reference to it.
1066 */
1067 if (record == prefetcher->record)
1068 prefetcher->record = NULL;
1069
1070 /*
1071 * See if it's time to compute some statistics, because enough WAL has
1072 * been processed.
1073 */
1074 if (unlikely(record->lsn >= prefetcher->next_stats_shm_lsn))
1076
1077 Assert(record == prefetcher->reader->record);
1078
1079 return &record->header;
1080}
int maintenance_io_concurrency
Definition bufmgr.c:207
static char * errmsg
void XLogPrefetcherComputeStats(XLogPrefetcher *prefetcher)
static void XLogPrefetcherCompleteFilters(XLogPrefetcher *prefetcher, XLogRecPtr replaying_lsn)
static LsnReadQueueNextStatus XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
static LsnReadQueue * lrq_alloc(uint32 max_distance, uint32 max_inflight, uintptr_t lrq_private, LsnReadQueueNextFun next)
static void lrq_complete_lsn(LsnReadQueue *lrq, XLogRecPtr lsn)
#define XLOGPREFETCHER_DISTANCE_MULTIPLIER
DecodedXLogRecord * XLogNextRecord(XLogReaderState *state, char **errormsg)
Definition xlogreader.c:327
XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state)
Definition xlogreader.c:251

References Assert, errmsg, fb(), DecodedXLogRecord::header, lrq_alloc(), lrq_complete_lsn(), lrq_completed(), lrq_free(), lrq_inflight(), lrq_prefetch(), DecodedXLogRecord::lsn, maintenance_io_concurrency, RecoveryPrefetchEnabled, unlikely, XLogNextRecord(), XLOGPREFETCHER_DISTANCE_MULTIPLIER, XLogPrefetcherCompleteFilters(), XLogPrefetcherComputeStats(), XLogPrefetcherNextBlock(), XLogPrefetchReconfigureCount, XLogReaderHasQueuedRecordOrError(), and XLogReleasePreviousRecord().

Referenced by ReadRecord().

◆ XLogPrefetchIncrement()

static void XLogPrefetchIncrement ( pg_atomic_uint64 counter)
inlinestatic

Definition at line 353 of file xlogprefetcher.c.

354{
356 pg_atomic_write_u64(counter, pg_atomic_read_u64(counter) + 1);
357}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
bool IsUnderPostmaster
Definition globals.c:120

References AmStartupProcess, Assert, IsUnderPostmaster, pg_atomic_read_u64(), and pg_atomic_write_u64().

Referenced by XLogPrefetcherNextBlock().

◆ XLogPrefetchReconfigure()

void XLogPrefetchReconfigure ( void  )

◆ XLogPrefetchResetStats()

◆ XLogPrefetchShmemInit()

void XLogPrefetchShmemInit ( void  )

◆ XLogPrefetchShmemSize()

size_t XLogPrefetchShmemSize ( void  )

Definition at line 296 of file xlogprefetcher.c.

297{
298 return sizeof(XLogPrefetchStats);
299}

Referenced by CalculateShmemSize().

Variable Documentation

◆ recovery_prefetch

int recovery_prefetch = RECOVERY_PREFETCH_TRY

Definition at line 70 of file xlogprefetcher.c.

Referenced by assign_recovery_prefetch().

◆ SharedStats

◆ XLogPrefetchReconfigureCount

int XLogPrefetchReconfigureCount = 0
static