PostgreSQL Source Code  git master
pgstat_io.c
Go to the documentation of this file.
1 /* -------------------------------------------------------------------------
2  *
3  * pgstat_io.c
4  * Implementation of IO statistics.
5  *
6  * This file contains the implementation of IO statistics. It is kept separate
7  * from pgstat.c to enforce the line between the statistics access / storage
8  * implementation and the details about individual types of statistics.
9  *
10  * Copyright (c) 2021-2024, PostgreSQL Global Development Group
11  *
12  * IDENTIFICATION
13  * src/backend/utils/activity/pgstat_io.c
14  * -------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "executor/instrument.h"
20 #include "storage/bufmgr.h"
21 #include "utils/pgstat_internal.h"
22 
23 
24 typedef struct PgStat_PendingIO
25 {
29 
30 
32 static bool have_iostats = false;
33 
34 
35 /*
36  * Check that stats have not been counted for any combination of IOObject,
37  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38  * stats are tracked for this combination and IO times are non-zero, counts
39  * should be non-zero.
40  *
41  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42  * specified by the second parameter. Caller is responsible for locking the
43  * passed-in PgStat_BktypeIO, if needed.
44  */
45 bool
47  BackendType bktype)
48 {
49  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50  {
51  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52  {
53  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54  {
55  /* we do track it */
56  if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57  {
58  /* ensure that if IO times are non-zero, counts are > 0 */
59  if (backend_io->times[io_object][io_context][io_op] != 0 &&
60  backend_io->counts[io_object][io_context][io_op] <= 0)
61  return false;
62 
63  continue;
64  }
65 
66  /* we don't track it, and it is not 0 */
67  if (backend_io->counts[io_object][io_context][io_op] != 0)
68  return false;
69  }
70  }
71  }
72 
73  return true;
74 }
75 
76 void
77 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 {
79  pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 }
81 
82 void
83 pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 {
85  Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86  Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87  Assert((unsigned int) io_op < IOOP_NUM_TYPES);
88  Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 
90  PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 
92  have_iostats = true;
93 }
94 
95 /*
96  * Initialize the internal timing for an IO operation, depending on an
97  * IO timing GUC.
98  */
100 pgstat_prepare_io_time(bool track_io_guc)
101 {
102  instr_time io_start;
103 
104  if (track_io_guc)
105  INSTR_TIME_SET_CURRENT(io_start);
106  else
107  {
108  /*
109  * There is no need to set io_start when an IO timing GUC is disabled,
110  * still initialize it to zero to avoid compiler warnings.
111  */
112  INSTR_TIME_SET_ZERO(io_start);
113  }
114 
115  return io_start;
116 }
117 
118 /*
119  * Like pgstat_count_io_op_n() except it also accumulates time.
120  */
121 void
122 pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
124 {
125  if (track_io_timing)
126  {
127  instr_time io_time;
128 
129  INSTR_TIME_SET_CURRENT(io_time);
131 
132  if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
133  {
135  if (io_object == IOOBJECT_RELATION)
137  else if (io_object == IOOBJECT_TEMP_RELATION)
139  }
140  else if (io_op == IOOP_READ)
141  {
143  if (io_object == IOOBJECT_RELATION)
145  else if (io_object == IOOBJECT_TEMP_RELATION)
147  }
148 
149  INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
150  io_time);
151  }
152 
153  pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
154 }
155 
156 PgStat_IO *
158 {
160 
161  return &pgStatLocal.snapshot.io;
162 }
163 
164 /*
165  * Check if there any IO stats waiting for flush.
166  */
167 bool
169 {
170  return have_iostats;
171 }
172 
173 /*
174  * Simpler wrapper of pgstat_io_flush_cb()
175  */
176 void
177 pgstat_flush_io(bool nowait)
178 {
179  (void) pgstat_io_flush_cb(nowait);
180 }
181 
182 /*
183  * Flush out locally pending IO statistics
184  *
185  * If no stats have been recorded, this function returns false.
186  *
187  * If nowait is true, this function returns true if the lock could not be
188  * acquired. Otherwise, return false.
189  */
190 bool
191 pgstat_io_flush_cb(bool nowait)
192 {
193  LWLock *bktype_lock;
194  PgStat_BktypeIO *bktype_shstats;
195 
196  if (!have_iostats)
197  return false;
198 
199  bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
200  bktype_shstats =
202 
203  if (!nowait)
204  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
205  else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
206  return true;
207 
208  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
209  {
210  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
211  {
212  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
213  {
214  instr_time time;
215 
216  bktype_shstats->counts[io_object][io_context][io_op] +=
217  PendingIOStats.counts[io_object][io_context][io_op];
218 
219  time = PendingIOStats.pending_times[io_object][io_context][io_op];
220 
221  bktype_shstats->times[io_object][io_context][io_op] +=
223  }
224  }
225  }
226 
228 
229  LWLockRelease(bktype_lock);
230 
231  memset(&PendingIOStats, 0, sizeof(PendingIOStats));
232 
233  have_iostats = false;
234 
235  return false;
236 }
237 
238 const char *
240 {
241  switch (io_context)
242  {
243  case IOCONTEXT_BULKREAD:
244  return "bulkread";
245  case IOCONTEXT_BULKWRITE:
246  return "bulkwrite";
247  case IOCONTEXT_NORMAL:
248  return "normal";
249  case IOCONTEXT_VACUUM:
250  return "vacuum";
251  }
252 
253  elog(ERROR, "unrecognized IOContext value: %d", io_context);
254  pg_unreachable();
255 }
256 
257 const char *
259 {
260  switch (io_object)
261  {
262  case IOOBJECT_RELATION:
263  return "relation";
265  return "temp relation";
266  }
267 
268  elog(ERROR, "unrecognized IOObject value: %d", io_object);
269  pg_unreachable();
270 }
271 
272 void
274 {
275  PgStatShared_IO *stat_shmem = (PgStatShared_IO *) stats;
276 
277  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
279 }
280 
281 void
283 {
284  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
285  {
286  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
287  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
288 
289  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
290 
291  /*
292  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
293  * the reset timestamp as well.
294  */
295  if (i == 0)
297 
298  memset(bktype_shstats, 0, sizeof(*bktype_shstats));
299  LWLockRelease(bktype_lock);
300  }
301 }
302 
303 void
305 {
306  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
307  {
308  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
309  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
310  PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
311 
312  LWLockAcquire(bktype_lock, LW_SHARED);
313 
314  /*
315  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
316  * the reset timestamp as well.
317  */
318  if (i == 0)
321 
322  /* using struct assignment due to better type safety */
323  *bktype_snap = *bktype_shstats;
324  LWLockRelease(bktype_lock);
325  }
326 }
327 
328 /*
329 * IO statistics are not collected for all BackendTypes.
330 *
331 * The following BackendTypes do not participate in the cumulative stats
332 * subsystem or do not perform IO on which we currently track:
333 * - Syslogger because it is not connected to shared memory
334 * - Archiver because most relevant archiving IO is delegated to a
335 * specialized command or module
336 * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
337 * pg_stat_io for now
338 *
339 * Function returns true if BackendType participates in the cumulative stats
340 * subsystem for IO and false if it does not.
341 *
342 * When adding a new BackendType, also consider adding relevant restrictions to
343 * pgstat_tracks_io_object() and pgstat_tracks_io_op().
344 */
345 bool
347 {
348  /*
349  * List every type so that new backend types trigger a warning about
350  * needing to adjust this switch.
351  */
352  switch (bktype)
353  {
354  case B_INVALID:
355  case B_ARCHIVER:
356  case B_LOGGER:
357  case B_WAL_RECEIVER:
358  case B_WAL_WRITER:
359  case B_WAL_SUMMARIZER:
360  return false;
361 
362  case B_AUTOVAC_LAUNCHER:
363  case B_AUTOVAC_WORKER:
364  case B_BACKEND:
365  case B_BG_WORKER:
366  case B_BG_WRITER:
367  case B_CHECKPOINTER:
368  case B_SLOTSYNC_WORKER:
370  case B_STARTUP:
371  case B_WAL_SENDER:
372  return true;
373  }
374 
375  return false;
376 }
377 
378 /*
379  * Some BackendTypes do not perform IO on certain IOObjects or in certain
380  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
381  * that the given BackendType is expected to do IO in the given IOContext and
382  * on the given IOObject and that the given IOObject is expected to be operated
383  * on in the given IOContext.
384  */
385 bool
387  IOContext io_context)
388 {
389  bool no_temp_rel;
390 
391  /*
392  * Some BackendTypes should never track IO statistics.
393  */
394  if (!pgstat_tracks_io_bktype(bktype))
395  return false;
396 
397  /*
398  * Currently, IO on temporary relations can only occur in the
399  * IOCONTEXT_NORMAL IOContext.
400  */
401  if (io_context != IOCONTEXT_NORMAL &&
402  io_object == IOOBJECT_TEMP_RELATION)
403  return false;
404 
405  /*
406  * In core Postgres, only regular backends and WAL Sender processes
407  * executing queries will use local buffers and operate on temporary
408  * relations. Parallel workers will not use local buffers (see
409  * InitLocalBuffers()); however, extensions leveraging background workers
410  * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
411  * BackendType B_BG_WORKER.
412  */
413  no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
414  bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
415  bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
416 
417  if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
418  io_object == IOOBJECT_TEMP_RELATION)
419  return false;
420 
421  /*
422  * Some BackendTypes do not currently perform any IO in certain
423  * IOContexts, and, while it may not be inherently incorrect for them to
424  * do so, excluding those rows from the view makes the view easier to use.
425  */
426  if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
427  (io_context == IOCONTEXT_BULKREAD ||
428  io_context == IOCONTEXT_BULKWRITE ||
429  io_context == IOCONTEXT_VACUUM))
430  return false;
431 
432  if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
433  return false;
434 
435  if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
436  io_context == IOCONTEXT_BULKWRITE)
437  return false;
438 
439  return true;
440 }
441 
442 /*
443  * Some BackendTypes will never do certain IOOps and some IOOps should not
444  * occur in certain IOContexts or on certain IOObjects. Check that the given
445  * IOOp is valid for the given BackendType in the given IOContext and on the
446  * given IOObject. Note that there are currently no cases of an IOOp being
447  * invalid for a particular BackendType only within a certain IOContext and/or
448  * only on a certain IOObject.
449  */
450 bool
452  IOContext io_context, IOOp io_op)
453 {
454  bool strategy_io_context;
455 
456  /* if (io_context, io_object) will never collect stats, we're done */
457  if (!pgstat_tracks_io_object(bktype, io_object, io_context))
458  return false;
459 
460  /*
461  * Some BackendTypes will not do certain IOOps.
462  */
463  if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
464  (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
465  return false;
466 
467  if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
468  bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
469  return false;
470 
471  /*
472  * Temporary tables are not logged and thus do not require fsync'ing.
473  * Writeback is not requested for temporary tables.
474  */
475  if (io_object == IOOBJECT_TEMP_RELATION &&
476  (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
477  return false;
478 
479  /*
480  * Some IOOps are not valid in certain IOContexts and some IOOps are only
481  * valid in certain contexts.
482  */
483  if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
484  return false;
485 
486  strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
487  io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
488 
489  /*
490  * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
491  */
492  if (!strategy_io_context && io_op == IOOP_REUSE)
493  return false;
494 
495  /*
496  * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
497  * counted in the IOCONTEXT_NORMAL IOContext. See comment in
498  * register_dirty_segment() for more details.
499  */
500  if (strategy_io_context && io_op == IOOP_FSYNC)
501  return false;
502 
503 
504  return true;
505 }
bool track_io_timing
Definition: bufmgr.c:143
unsigned int uint32
Definition: c.h:506
#define Assert(condition)
Definition: c.h:858
#define pg_unreachable()
Definition: c.h:296
int64 TimestampTz
Definition: timestamp.h:39
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:178
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
BufferUsage pgBufferUsage
Definition: instrument.c:20
int i
Definition: isn.c:73
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339
@ LWTRANCHE_PGSTATS_DATA
Definition: lwlock.h:205
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define BACKEND_NUM_TYPES
Definition: miscadmin.h:367
BackendType
Definition: miscadmin.h:331
@ B_WAL_SUMMARIZER
Definition: miscadmin.h:357
@ B_WAL_WRITER
Definition: miscadmin.h:358
@ B_WAL_RECEIVER
Definition: miscadmin.h:356
@ B_CHECKPOINTER
Definition: miscadmin.h:354
@ B_WAL_SENDER
Definition: miscadmin.h:339
@ B_LOGGER
Definition: miscadmin.h:364
@ B_STARTUP
Definition: miscadmin.h:355
@ B_BG_WORKER
Definition: miscadmin.h:338
@ B_INVALID
Definition: miscadmin.h:332
@ B_STANDALONE_BACKEND
Definition: miscadmin.h:342
@ B_BG_WRITER
Definition: miscadmin.h:353
@ B_BACKEND
Definition: miscadmin.h:335
@ B_ARCHIVER
Definition: miscadmin.h:352
@ B_AUTOVAC_LAUNCHER
Definition: miscadmin.h:336
@ B_SLOTSYNC_WORKER
Definition: miscadmin.h:340
@ B_AUTOVAC_WORKER
Definition: miscadmin.h:337
BackendType MyBackendType
Definition: miscinit.c:63
static time_t start_time
Definition: pg_ctl.c:95
void pgstat_snapshot_fixed(PgStat_Kind kind)
Definition: pgstat.c:1054
PgStat_LocalState pgStatLocal
Definition: pgstat.c:212
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:585
IOObject
Definition: pgstat.h:311
@ IOOBJECT_RELATION
Definition: pgstat.h:312
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:313
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:587
#define IOOP_NUM_TYPES
Definition: pgstat.h:340
IOContext
Definition: pgstat.h:319
@ IOCONTEXT_NORMAL
Definition: pgstat.h:322
@ IOCONTEXT_VACUUM
Definition: pgstat.h:323
@ IOCONTEXT_BULKREAD
Definition: pgstat.h:320
@ IOCONTEXT_BULKWRITE
Definition: pgstat.h:321
#define IOCONTEXT_NUM_TYPES
Definition: pgstat.h:326
IOOp
Definition: pgstat.h:329
@ IOOP_EXTEND
Definition: pgstat.h:331
@ IOOP_FSYNC
Definition: pgstat.h:332
@ IOOP_READ
Definition: pgstat.h:334
@ IOOP_WRITEBACK
Definition: pgstat.h:337
@ IOOP_HIT
Definition: pgstat.h:333
@ IOOP_EVICT
Definition: pgstat.h:330
@ IOOP_REUSE
Definition: pgstat.h:335
@ IOOP_WRITE
Definition: pgstat.h:336
#define PGSTAT_KIND_IO
Definition: pgstat.h:57
int64 PgStat_Counter
Definition: pgstat.h:120
#define IOOBJECT_NUM_TYPES
Definition: pgstat.h:316
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:100
void pgstat_flush_io(bool nowait)
Definition: pgstat_io.c:177
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:122
struct PgStat_PendingIO PgStat_PendingIO
bool pgstat_tracks_io_bktype(BackendType bktype)
Definition: pgstat_io.c:346
bool pgstat_io_have_pending_cb(void)
Definition: pgstat_io.c:168
bool pgstat_io_flush_cb(bool nowait)
Definition: pgstat_io.c:191
const char * pgstat_get_io_object_name(IOObject io_object)
Definition: pgstat_io.c:258
void pgstat_io_reset_all_cb(TimestampTz ts)
Definition: pgstat_io.c:282
bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype)
Definition: pgstat_io.c:46
static PgStat_PendingIO PendingIOStats
Definition: pgstat_io.c:31
PgStat_IO * pgstat_fetch_stat_io(void)
Definition: pgstat_io.c:157
bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:451
static bool have_iostats
Definition: pgstat_io.c:32
void pgstat_io_snapshot_cb(void)
Definition: pgstat_io.c:304
const char * pgstat_get_io_context_name(IOContext io_context)
Definition: pgstat_io.c:239
void pgstat_io_init_shmem_cb(void *stats)
Definition: pgstat_io.c:273
bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context)
Definition: pgstat_io.c:386
void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
Definition: pgstat_io.c:83
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
instr_time local_blk_read_time
Definition: instrument.h:38
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
instr_time local_blk_write_time
Definition: instrument.h:39
Definition: lwlock.h:42
LWLock locks[BACKEND_NUM_TYPES]
PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:345
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:344
PgStat_BktypeIO stats[BACKEND_NUM_TYPES]
Definition: pgstat.h:351
TimestampTz stat_reset_timestamp
Definition: pgstat.h:350
PgStat_Snapshot snapshot
PgStat_ShmemControl * shmem
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:26
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:27
PgStatShared_IO io