PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pgstat_io.c
Go to the documentation of this file.
1/* -------------------------------------------------------------------------
2 *
3 * pgstat_io.c
4 * Implementation of IO statistics.
5 *
6 * This file contains the implementation of IO statistics. It is kept separate
7 * from pgstat.c to enforce the line between the statistics access / storage
8 * implementation and the details about individual types of statistics.
9 *
10 * Copyright (c) 2021-2026, PostgreSQL Global Development Group
11 *
12 * IDENTIFICATION
13 * src/backend/utils/activity/pgstat_io.c
14 * -------------------------------------------------------------------------
15 */
16
17#include "postgres.h"
18
19#include "executor/instrument.h"
20#include "storage/bufmgr.h"
22
24static bool have_iostats = false;
25
26/*
27 * Check that stats have not been counted for any combination of IOObject,
28 * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
29 * stats are tracked for this combination and IO times are non-zero, counts
30 * should be non-zero.
31 *
32 * The passed-in PgStat_BktypeIO must contain stats from the BackendType
33 * specified by the second parameter. Caller is responsible for locking the
34 * passed-in PgStat_BktypeIO, if needed.
35 */
36bool
39{
41 {
43 {
44 for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
45 {
46 /* we do track it */
48 {
49 /* ensure that if IO times are non-zero, counts are > 0 */
50 if (backend_io->times[io_object][io_context][io_op] != 0 &&
51 backend_io->counts[io_object][io_context][io_op] <= 0)
52 return false;
53
54 continue;
55 }
56
57 /* we don't track it, and it is not 0 */
58 if (backend_io->counts[io_object][io_context][io_op] != 0)
59 return false;
60 }
61 }
62 }
63
64 return true;
65}
66
67void
85
86/*
87 * Initialize the internal timing for an IO operation, depending on an
88 * IO timing GUC.
89 */
92{
94
95 if (track_io_guc)
97 else
98 {
99 /*
100 * There is no need to set io_start when an IO timing GUC is disabled.
101 * Initialize it to zero to avoid compiler warnings and to let
102 * pgstat_count_io_op_time() know that timings should be ignored.
103 */
105 }
106
107 return io_start;
108}
109
110/*
111 * Like pgstat_count_io_op() except it also accumulates time.
112 *
113 * The calls related to pgstat_count_buffer_*() are for pgstat_database. As
114 * pg_stat_database only counts block read and write times, these are done for
115 * IOOP_READ, IOOP_WRITE and IOOP_EXTEND.
116 *
117 * pgBufferUsage is used for EXPLAIN. pgBufferUsage has write and read stats
118 * for shared, local and temporary blocks. pg_stat_io does not track the
119 * activity of temporary blocks, so these are ignored here.
120 */
121void
162
163PgStat_IO *
170
171/*
172 * Simpler wrapper of pgstat_io_flush_cb()
173 */
174void
175pgstat_flush_io(bool nowait)
176{
177 (void) pgstat_io_flush_cb(nowait);
178}
179
180/*
181 * Flush out locally pending IO statistics
182 *
183 * If no stats have been recorded, this function returns false.
184 *
185 * If nowait is true, this function returns true if the lock could not be
186 * acquired. Otherwise, return false.
187 */
188bool
190{
193
194 if (!have_iostats)
195 return false;
196
200
201 if (!nowait)
204 return true;
205
207 {
209 {
210 for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
211 {
212 instr_time time;
213
216
219
221
224 }
225 }
226 }
227
229
231
233
234 have_iostats = false;
235
236 return false;
237}
238
239const char *
241{
242 switch (io_context)
243 {
245 return "bulkread";
247 return "bulkwrite";
248 case IOCONTEXT_INIT:
249 return "init";
250 case IOCONTEXT_NORMAL:
251 return "normal";
252 case IOCONTEXT_VACUUM:
253 return "vacuum";
254 }
255
256 elog(ERROR, "unrecognized IOContext value: %d", io_context);
258}
259
260const char *
262{
263 switch (io_object)
264 {
266 return "relation";
268 return "temp relation";
269 case IOOBJECT_WAL:
270 return "wal";
271 }
272
273 elog(ERROR, "unrecognized IOObject value: %d", io_object);
275}
276
277void
279{
281
282 for (int i = 0; i < BACKEND_NUM_TYPES; i++)
284}
285
286void
288{
289 for (int i = 0; i < BACKEND_NUM_TYPES; i++)
290 {
293
295
296 /*
297 * Use the lock in the first BackendType's PgStat_BktypeIO to protect
298 * the reset timestamp as well.
299 */
300 if (i == 0)
302
305 }
306}
307
308void
310{
311 for (int i = 0; i < BACKEND_NUM_TYPES; i++)
312 {
316
318
319 /*
320 * Use the lock in the first BackendType's PgStat_BktypeIO to protect
321 * the reset timestamp as well.
322 */
323 if (i == 0)
326
327 /* using struct assignment due to better type safety */
330 }
331}
332
333/*
334* IO statistics are not collected for all BackendTypes.
335*
336* The following BackendTypes do not participate in the cumulative stats
337* subsystem or do not perform IO on which we currently track:
338* - Dead-end backend because it is not connected to shared memory and
339* doesn't do any IO
340* - Syslogger because it is not connected to shared memory
341* - Archiver because most relevant archiving IO is delegated to a
342* specialized command or module
343*
344* Function returns true if BackendType participates in the cumulative stats
345* subsystem for IO and false if it does not.
346*
347* When adding a new BackendType, also consider adding relevant restrictions to
348* pgstat_tracks_io_object() and pgstat_tracks_io_op().
349*/
350bool
352{
353 /*
354 * List every type so that new backend types trigger a warning about
355 * needing to adjust this switch.
356 */
357 switch (bktype)
358 {
359 case B_INVALID:
361 case B_ARCHIVER:
362 case B_LOGGER:
363 return false;
364
368 case B_AUTOVAC_WORKER:
369 case B_BACKEND:
370 case B_BG_WORKER:
371 case B_BG_WRITER:
372 case B_CHECKPOINTER:
373 case B_IO_WORKER:
376 case B_STARTUP:
377 case B_WAL_RECEIVER:
378 case B_WAL_SENDER:
379 case B_WAL_SUMMARIZER:
380 case B_WAL_WRITER:
381 return true;
382 }
383
384 return false;
385}
386
387/*
388 * Some BackendTypes do not perform IO on certain IOObjects or in certain
389 * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
390 * that the given BackendType is expected to do IO in the given IOContext and
391 * on the given IOObject and that the given IOObject is expected to be operated
392 * on in the given IOContext.
393 */
394bool
397{
398 bool no_temp_rel;
399
400 /*
401 * Some BackendTypes should never track IO statistics.
402 */
404 return false;
405
406 /*
407 * Currently, IO on IOOBJECT_WAL objects can only occur in the
408 * IOCONTEXT_NORMAL and IOCONTEXT_INIT IOContexts.
409 */
410 if (io_object == IOOBJECT_WAL &&
413 return false;
414
415 /*
416 * Currently, IO on temporary relations can only occur in the
417 * IOCONTEXT_NORMAL IOContext.
418 */
421 return false;
422
423 /*
424 * In core Postgres, only regular backends and WAL Sender processes
425 * executing queries will use local buffers and operate on temporary
426 * relations. Parallel workers will not use local buffers (see
427 * InitLocalBuffers()); however, extensions leveraging background workers
428 * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
429 * BackendType B_BG_WORKER.
430 */
436
439 return false;
440
441 /*
442 * Some BackendTypes only perform IO under IOOBJECT_WAL, hence exclude all
443 * rows for all the other objects for these.
444 */
447 return false;
448
449 /*
450 * Some BackendTypes do not currently perform any IO in certain
451 * IOContexts, and, while it may not be inherently incorrect for them to
452 * do so, excluding those rows from the view makes the view easier to use.
453 */
454 if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
458 return false;
459
461 return false;
462
465 return false;
466
467 return true;
468}
469
470/*
471 * Some BackendTypes will never do certain IOOps and some IOOps should not
472 * occur in certain IOContexts or on certain IOObjects. Check that the given
473 * IOOp is valid for the given BackendType in the given IOContext and on the
474 * given IOObject. Note that there are currently no cases of an IOOp being
475 * invalid for a particular BackendType only within a certain IOContext and/or
476 * only on a certain IOObject.
477 */
478bool
481{
483
484 /* if (io_context, io_object) will never collect stats, we're done */
486 return false;
487
488 /*
489 * Some BackendTypes will not do certain IOOps.
490 */
491 if (bktype == B_BG_WRITER &&
492 (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
493 return false;
494
495 if (bktype == B_CHECKPOINTER &&
497 (io_op == IOOP_EVICT || io_op == IOOP_HIT)))
498 return false;
499
502 return false;
503
504 /*
505 * Some BackendTypes do not perform reads with IOOBJECT_WAL.
506 */
507 if (io_object == IOOBJECT_WAL && io_op == IOOP_READ &&
511 return false;
512
513 /*
514 * Temporary tables are not logged and thus do not require fsync'ing.
515 * Writeback is not requested for temporary tables.
516 */
519 return false;
520
521 /*
522 * Some IOOps are not valid in certain IOContexts and some IOOps are only
523 * valid in certain contexts.
524 */
526 return false;
527
530
531 /*
532 * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
533 */
535 return false;
536
537 /*
538 * IOOBJECT_WAL IOObject will not do certain IOOps depending on IOContext.
539 */
541 !(io_op == IOOP_WRITE || io_op == IOOP_FSYNC))
542 return false;
543
546 return false;
547
548 /*
549 * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
550 * counted in the IOCONTEXT_NORMAL IOContext. See comment in
551 * register_dirty_segment() for more details.
552 */
554 return false;
555
556
557 return true;
558}
#define Assert(condition)
Definition c.h:943
uint64_t uint64
Definition c.h:625
#define pg_unreachable()
Definition c.h:367
uint32_t uint32
Definition c.h:624
int64 TimestampTz
Definition timestamp.h:39
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:426
#define INSTR_TIME_ADD(x, y)
Definition instr_time.h:430
#define INSTR_TIME_IS_ZERO(t)
Definition instr_time.h:419
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:436
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:454
#define INSTR_TIME_SET_ZERO(t)
Definition instr_time.h:421
BufferUsage pgBufferUsage
Definition instrument.c:25
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1321
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
#define BACKEND_NUM_TYPES
Definition miscadmin.h:392
BackendType
Definition miscadmin.h:350
@ B_WAL_SUMMARIZER
Definition miscadmin.h:379
@ B_WAL_WRITER
Definition miscadmin.h:380
@ B_WAL_RECEIVER
Definition miscadmin.h:378
@ B_CHECKPOINTER
Definition miscadmin.h:375
@ B_DATACHECKSUMSWORKER_WORKER
Definition miscadmin.h:383
@ B_WAL_SENDER
Definition miscadmin.h:359
@ B_IO_WORKER
Definition miscadmin.h:376
@ B_LOGGER
Definition miscadmin.h:389
@ B_STARTUP
Definition miscadmin.h:377
@ B_DATACHECKSUMSWORKER_LAUNCHER
Definition miscadmin.h:382
@ B_BG_WORKER
Definition miscadmin.h:358
@ B_INVALID
Definition miscadmin.h:351
@ B_STANDALONE_BACKEND
Definition miscadmin.h:362
@ B_BG_WRITER
Definition miscadmin.h:374
@ B_BACKEND
Definition miscadmin.h:354
@ B_ARCHIVER
Definition miscadmin.h:373
@ B_AUTOVAC_LAUNCHER
Definition miscadmin.h:356
@ B_SLOTSYNC_WORKER
Definition miscadmin.h:360
@ B_DEAD_END_BACKEND
Definition miscadmin.h:355
@ B_AUTOVAC_WORKER
Definition miscadmin.h:357
BackendType MyBackendType
Definition miscinit.c:65
static time_t start_time
Definition pg_ctl.c:96
void pgstat_snapshot_fixed(PgStat_Kind kind)
Definition pgstat.c:1104
bool pgstat_report_fixed
Definition pgstat.c:219
PgStat_LocalState pgStatLocal
Definition pgstat.c:213
#define pgstat_count_buffer_read_time(n)
Definition pgstat.h:658
IOObject
Definition pgstat.h:280
@ IOOBJECT_RELATION
Definition pgstat.h:281
@ IOOBJECT_WAL
Definition pgstat.h:283
@ IOOBJECT_TEMP_RELATION
Definition pgstat.h:282
#define pgstat_is_ioop_tracked_in_bytes(io_op)
Definition pgstat.h:325
#define pgstat_count_buffer_write_time(n)
Definition pgstat.h:660
#define IOOP_NUM_TYPES
Definition pgstat.h:323
IOContext
Definition pgstat.h:289
@ IOCONTEXT_INIT
Definition pgstat.h:292
@ IOCONTEXT_NORMAL
Definition pgstat.h:293
@ IOCONTEXT_VACUUM
Definition pgstat.h:294
@ IOCONTEXT_BULKREAD
Definition pgstat.h:290
@ IOCONTEXT_BULKWRITE
Definition pgstat.h:291
#define IOCONTEXT_NUM_TYPES
Definition pgstat.h:297
IOOp
Definition pgstat.h:309
@ IOOP_EXTEND
Definition pgstat.h:318
@ IOOP_FSYNC
Definition pgstat.h:312
@ IOOP_READ
Definition pgstat.h:319
@ IOOP_WRITEBACK
Definition pgstat.h:315
@ IOOP_HIT
Definition pgstat.h:313
@ IOOP_EVICT
Definition pgstat.h:311
@ IOOP_REUSE
Definition pgstat.h:314
@ IOOP_WRITE
Definition pgstat.h:320
#define IOOBJECT_NUM_TYPES
Definition pgstat.h:286
void pgstat_count_backend_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
void pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time io_time)
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:68
void pgstat_flush_io(bool nowait)
Definition pgstat_io.c:175
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
PgStat_IO * pgstat_fetch_stat_io(void)
Definition pgstat_io.c:164
const char * pgstat_get_io_context_name(IOContext io_context)
Definition pgstat_io.c:240
bool pgstat_tracks_io_bktype(BackendType bktype)
Definition pgstat_io.c:351
const char * pgstat_get_io_object_name(IOObject io_object)
Definition pgstat_io.c:261
bool pgstat_io_flush_cb(bool nowait)
Definition pgstat_io.c:189
void pgstat_io_reset_all_cb(TimestampTz ts)
Definition pgstat_io.c:287
bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype)
Definition pgstat_io.c:37
static PgStat_PendingIO PendingIOStats
Definition pgstat_io.c:23
bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op)
Definition pgstat_io.c:479
static bool have_iostats
Definition pgstat_io.c:24
void pgstat_io_snapshot_cb(void)
Definition pgstat_io.c:309
void pgstat_io_init_shmem_cb(void *stats)
Definition pgstat_io.c:278
bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context)
Definition pgstat_io.c:395
#define PGSTAT_KIND_IO
Definition pgstat_kind.h:38
static int fb(int x)
instr_time local_blk_read_time
Definition instrument.h:38
instr_time shared_blk_read_time
Definition instrument.h:36
instr_time shared_blk_write_time
Definition instrument.h:37
instr_time local_blk_write_time
Definition instrument.h:39
LWLock locks[BACKEND_NUM_TYPES]
PgStat_BktypeIO stats[BACKEND_NUM_TYPES]
Definition pgstat.h:346
TimestampTz stat_reset_timestamp
Definition pgstat.h:345
PgStat_Snapshot snapshot
PgStat_ShmemControl * shmem
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition pgstat.h:339
uint64 bytes[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition pgstat.h:338
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition pgstat.h:340