PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pgstat_io.c
Go to the documentation of this file.
1/* -------------------------------------------------------------------------
2 *
3 * pgstat_io.c
4 * Implementation of IO statistics.
5 *
6 * This file contains the implementation of IO statistics. It is kept separate
7 * from pgstat.c to enforce the line between the statistics access / storage
8 * implementation and the details about individual types of statistics.
9 *
10 * Copyright (c) 2021-2026, PostgreSQL Global Development Group
11 *
12 * IDENTIFICATION
13 * src/backend/utils/activity/pgstat_io.c
14 * -------------------------------------------------------------------------
15 */
16
17#include "postgres.h"
18
19#include "executor/instrument.h"
20#include "storage/bufmgr.h"
22
24static bool have_iostats = false;
25
26/*
27 * Check that stats have not been counted for any combination of IOObject,
28 * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
29 * stats are tracked for this combination and IO times are non-zero, counts
30 * should be non-zero.
31 *
32 * The passed-in PgStat_BktypeIO must contain stats from the BackendType
33 * specified by the second parameter. Caller is responsible for locking the
34 * passed-in PgStat_BktypeIO, if needed.
35 */
36bool
39{
41 {
43 {
44 for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
45 {
46 /* we do track it */
48 {
49 /* ensure that if IO times are non-zero, counts are > 0 */
50 if (backend_io->times[io_object][io_context][io_op] != 0 &&
51 backend_io->counts[io_object][io_context][io_op] <= 0)
52 return false;
53
54 continue;
55 }
56
57 /* we don't track it, and it is not 0 */
58 if (backend_io->counts[io_object][io_context][io_op] != 0)
59 return false;
60 }
61 }
62 }
63
64 return true;
65}
66
67void
85
86/*
87 * Initialize the internal timing for an IO operation, depending on an
88 * IO timing GUC.
89 */
92{
94
95 if (track_io_guc)
97 else
98 {
99 /*
100 * There is no need to set io_start when an IO timing GUC is disabled.
101 * Initialize it to zero to avoid compiler warnings and to let
102 * pgstat_count_io_op_time() know that timings should be ignored.
103 */
105 }
106
107 return io_start;
108}
109
110/*
111 * Like pgstat_count_io_op() except it also accumulates time.
112 *
113 * The calls related to pgstat_count_buffer_*() are for pgstat_database. As
114 * pg_stat_database only counts block read and write times, these are done for
115 * IOOP_READ, IOOP_WRITE and IOOP_EXTEND.
116 *
117 * pgBufferUsage is used for EXPLAIN. pgBufferUsage has write and read stats
118 * for shared, local and temporary blocks. pg_stat_io does not track the
119 * activity of temporary blocks, so these are ignored here.
120 */
121void
162
163PgStat_IO *
170
171/*
172 * Simpler wrapper of pgstat_io_flush_cb()
173 */
174void
175pgstat_flush_io(bool nowait)
176{
177 (void) pgstat_io_flush_cb(nowait);
178}
179
180/*
181 * Flush out locally pending IO statistics
182 *
183 * If no stats have been recorded, this function returns false.
184 *
185 * If nowait is true, this function returns true if the lock could not be
186 * acquired. Otherwise, return false.
187 */
188bool
190{
193
194 if (!have_iostats)
195 return false;
196
200
201 if (!nowait)
204 return true;
205
207 {
209 {
210 for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
211 {
212 instr_time time;
213
216
219
221
224 }
225 }
226 }
227
229
231
233
234 have_iostats = false;
235
236 return false;
237}
238
239const char *
241{
242 switch (io_context)
243 {
245 return "bulkread";
247 return "bulkwrite";
248 case IOCONTEXT_INIT:
249 return "init";
250 case IOCONTEXT_NORMAL:
251 return "normal";
252 case IOCONTEXT_VACUUM:
253 return "vacuum";
254 }
255
256 elog(ERROR, "unrecognized IOContext value: %d", io_context);
258}
259
260const char *
262{
263 switch (io_object)
264 {
266 return "relation";
268 return "temp relation";
269 case IOOBJECT_WAL:
270 return "wal";
271 }
272
273 elog(ERROR, "unrecognized IOObject value: %d", io_object);
275}
276
277void
279{
281
282 for (int i = 0; i < BACKEND_NUM_TYPES; i++)
284}
285
286void
288{
289 for (int i = 0; i < BACKEND_NUM_TYPES; i++)
290 {
293
295
296 /*
297 * Use the lock in the first BackendType's PgStat_BktypeIO to protect
298 * the reset timestamp as well.
299 */
300 if (i == 0)
302
305 }
306}
307
308void
310{
311 for (int i = 0; i < BACKEND_NUM_TYPES; i++)
312 {
316
318
319 /*
320 * Use the lock in the first BackendType's PgStat_BktypeIO to protect
321 * the reset timestamp as well.
322 */
323 if (i == 0)
326
327 /* using struct assignment due to better type safety */
330 }
331}
332
333/*
334* IO statistics are not collected for all BackendTypes.
335*
336* The following BackendTypes do not participate in the cumulative stats
337* subsystem or do not perform IO on which we currently track:
338* - Dead-end backend because it is not connected to shared memory and
339* doesn't do any IO
340* - Syslogger because it is not connected to shared memory
341* - Archiver because most relevant archiving IO is delegated to a
342* specialized command or module
343*
344* Function returns true if BackendType participates in the cumulative stats
345* subsystem for IO and false if it does not.
346*
347* When adding a new BackendType, also consider adding relevant restrictions to
348* pgstat_tracks_io_object() and pgstat_tracks_io_op().
349*/
350bool
352{
353 /*
354 * List every type so that new backend types trigger a warning about
355 * needing to adjust this switch.
356 */
357 switch (bktype)
358 {
359 case B_INVALID:
361 case B_ARCHIVER:
362 case B_LOGGER:
363 return false;
364
366 case B_AUTOVAC_WORKER:
367 case B_BACKEND:
368 case B_BG_WORKER:
369 case B_BG_WRITER:
370 case B_CHECKPOINTER:
371 case B_IO_WORKER:
374 case B_STARTUP:
375 case B_WAL_RECEIVER:
376 case B_WAL_SENDER:
377 case B_WAL_SUMMARIZER:
378 case B_WAL_WRITER:
379 return true;
380 }
381
382 return false;
383}
384
385/*
386 * Some BackendTypes do not perform IO on certain IOObjects or in certain
387 * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
388 * that the given BackendType is expected to do IO in the given IOContext and
389 * on the given IOObject and that the given IOObject is expected to be operated
390 * on in the given IOContext.
391 */
392bool
395{
396 bool no_temp_rel;
397
398 /*
399 * Some BackendTypes should never track IO statistics.
400 */
402 return false;
403
404 /*
405 * Currently, IO on IOOBJECT_WAL objects can only occur in the
406 * IOCONTEXT_NORMAL and IOCONTEXT_INIT IOContexts.
407 */
408 if (io_object == IOOBJECT_WAL &&
411 return false;
412
413 /*
414 * Currently, IO on temporary relations can only occur in the
415 * IOCONTEXT_NORMAL IOContext.
416 */
419 return false;
420
421 /*
422 * In core Postgres, only regular backends and WAL Sender processes
423 * executing queries will use local buffers and operate on temporary
424 * relations. Parallel workers will not use local buffers (see
425 * InitLocalBuffers()); however, extensions leveraging background workers
426 * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
427 * BackendType B_BG_WORKER.
428 */
434
437 return false;
438
439 /*
440 * Some BackendTypes only perform IO under IOOBJECT_WAL, hence exclude all
441 * rows for all the other objects for these.
442 */
445 return false;
446
447 /*
448 * Some BackendTypes do not currently perform any IO in certain
449 * IOContexts, and, while it may not be inherently incorrect for them to
450 * do so, excluding those rows from the view makes the view easier to use.
451 */
452 if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
456 return false;
457
459 return false;
460
463 return false;
464
465 return true;
466}
467
468/*
469 * Some BackendTypes will never do certain IOOps and some IOOps should not
470 * occur in certain IOContexts or on certain IOObjects. Check that the given
471 * IOOp is valid for the given BackendType in the given IOContext and on the
472 * given IOObject. Note that there are currently no cases of an IOOp being
473 * invalid for a particular BackendType only within a certain IOContext and/or
474 * only on a certain IOObject.
475 */
476bool
479{
481
482 /* if (io_context, io_object) will never collect stats, we're done */
484 return false;
485
486 /*
487 * Some BackendTypes will not do certain IOOps.
488 */
489 if (bktype == B_BG_WRITER &&
490 (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
491 return false;
492
493 if (bktype == B_CHECKPOINTER &&
495 (io_op == IOOP_EVICT || io_op == IOOP_HIT)))
496 return false;
497
500 return false;
501
502 /*
503 * Some BackendTypes do not perform reads with IOOBJECT_WAL.
504 */
505 if (io_object == IOOBJECT_WAL && io_op == IOOP_READ &&
509 return false;
510
511 /*
512 * Temporary tables are not logged and thus do not require fsync'ing.
513 * Writeback is not requested for temporary tables.
514 */
517 return false;
518
519 /*
520 * Some IOOps are not valid in certain IOContexts and some IOOps are only
521 * valid in certain contexts.
522 */
524 return false;
525
528
529 /*
530 * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
531 */
533 return false;
534
535 /*
536 * IOOBJECT_WAL IOObject will not do certain IOOps depending on IOContext.
537 */
539 !(io_op == IOOP_WRITE || io_op == IOOP_FSYNC))
540 return false;
541
544 return false;
545
546 /*
547 * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
548 * counted in the IOCONTEXT_NORMAL IOContext. See comment in
549 * register_dirty_segment() for more details.
550 */
552 return false;
553
554
555 return true;
556}
#define Assert(condition)
Definition c.h:883
uint64_t uint64
Definition c.h:557
#define pg_unreachable()
Definition c.h:351
uint32_t uint32
Definition c.h:556
int64 TimestampTz
Definition timestamp.h:39
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_ADD(x, y)
Definition instr_time.h:178
#define INSTR_TIME_IS_ZERO(t)
Definition instr_time.h:169
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:196
#define INSTR_TIME_SET_ZERO(t)
Definition instr_time.h:172
BufferUsage pgBufferUsage
Definition instrument.c:20
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:698
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1347
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
#define BACKEND_NUM_TYPES
Definition miscadmin.h:377
BackendType
Definition miscadmin.h:338
@ B_WAL_SUMMARIZER
Definition miscadmin.h:367
@ B_WAL_WRITER
Definition miscadmin.h:368
@ B_WAL_RECEIVER
Definition miscadmin.h:366
@ B_CHECKPOINTER
Definition miscadmin.h:363
@ B_WAL_SENDER
Definition miscadmin.h:347
@ B_IO_WORKER
Definition miscadmin.h:364
@ B_LOGGER
Definition miscadmin.h:374
@ B_STARTUP
Definition miscadmin.h:365
@ B_BG_WORKER
Definition miscadmin.h:346
@ B_INVALID
Definition miscadmin.h:339
@ B_STANDALONE_BACKEND
Definition miscadmin.h:350
@ B_BG_WRITER
Definition miscadmin.h:362
@ B_BACKEND
Definition miscadmin.h:342
@ B_ARCHIVER
Definition miscadmin.h:361
@ B_AUTOVAC_LAUNCHER
Definition miscadmin.h:344
@ B_SLOTSYNC_WORKER
Definition miscadmin.h:348
@ B_DEAD_END_BACKEND
Definition miscadmin.h:343
@ B_AUTOVAC_WORKER
Definition miscadmin.h:345
BackendType MyBackendType
Definition miscinit.c:64
static time_t start_time
Definition pg_ctl.c:96
void pgstat_snapshot_fixed(PgStat_Kind kind)
Definition pgstat.c:1070
bool pgstat_report_fixed
Definition pgstat.c:218
PgStat_LocalState pgStatLocal
Definition pgstat.c:212
#define pgstat_count_buffer_read_time(n)
Definition pgstat.h:631
IOObject
Definition pgstat.h:276
@ IOOBJECT_RELATION
Definition pgstat.h:277
@ IOOBJECT_WAL
Definition pgstat.h:279
@ IOOBJECT_TEMP_RELATION
Definition pgstat.h:278
#define pgstat_is_ioop_tracked_in_bytes(io_op)
Definition pgstat.h:321
#define pgstat_count_buffer_write_time(n)
Definition pgstat.h:633
#define IOOP_NUM_TYPES
Definition pgstat.h:319
IOContext
Definition pgstat.h:285
@ IOCONTEXT_INIT
Definition pgstat.h:288
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOCONTEXT_VACUUM
Definition pgstat.h:290
@ IOCONTEXT_BULKREAD
Definition pgstat.h:286
@ IOCONTEXT_BULKWRITE
Definition pgstat.h:287
#define IOCONTEXT_NUM_TYPES
Definition pgstat.h:293
IOOp
Definition pgstat.h:305
@ IOOP_EXTEND
Definition pgstat.h:314
@ IOOP_FSYNC
Definition pgstat.h:308
@ IOOP_READ
Definition pgstat.h:315
@ IOOP_WRITEBACK
Definition pgstat.h:311
@ IOOP_HIT
Definition pgstat.h:309
@ IOOP_EVICT
Definition pgstat.h:307
@ IOOP_REUSE
Definition pgstat.h:310
@ IOOP_WRITE
Definition pgstat.h:316
#define IOOBJECT_NUM_TYPES
Definition pgstat.h:282
void pgstat_count_backend_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
void pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time io_time)
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:68
void pgstat_flush_io(bool nowait)
Definition pgstat_io.c:175
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
PgStat_IO * pgstat_fetch_stat_io(void)
Definition pgstat_io.c:164
const char * pgstat_get_io_context_name(IOContext io_context)
Definition pgstat_io.c:240
bool pgstat_tracks_io_bktype(BackendType bktype)
Definition pgstat_io.c:351
const char * pgstat_get_io_object_name(IOObject io_object)
Definition pgstat_io.c:261
bool pgstat_io_flush_cb(bool nowait)
Definition pgstat_io.c:189
void pgstat_io_reset_all_cb(TimestampTz ts)
Definition pgstat_io.c:287
bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype)
Definition pgstat_io.c:37
static PgStat_PendingIO PendingIOStats
Definition pgstat_io.c:23
bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op)
Definition pgstat_io.c:477
static bool have_iostats
Definition pgstat_io.c:24
void pgstat_io_snapshot_cb(void)
Definition pgstat_io.c:309
void pgstat_io_init_shmem_cb(void *stats)
Definition pgstat_io.c:278
bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context)
Definition pgstat_io.c:393
#define PGSTAT_KIND_IO
Definition pgstat_kind.h:38
static int fb(int x)
instr_time local_blk_read_time
Definition instrument.h:38
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
instr_time local_blk_write_time
Definition instrument.h:39
LWLock locks[BACKEND_NUM_TYPES]
PgStat_BktypeIO stats[BACKEND_NUM_TYPES]
Definition pgstat.h:342
TimestampTz stat_reset_timestamp
Definition pgstat.h:341
PgStat_Snapshot snapshot
PgStat_ShmemControl * shmem
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition pgstat.h:335
uint64 bytes[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition pgstat.h:334
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition pgstat.h:336