PostgreSQL Source Code  git master
pgstat_io.c
Go to the documentation of this file.
1 /* -------------------------------------------------------------------------
2  *
3  * pgstat_io.c
4  * Implementation of IO statistics.
5  *
6  * This file contains the implementation of IO statistics. It is kept separate
7  * from pgstat.c to enforce the line between the statistics access / storage
8  * implementation and the details about individual types of statistics.
9  *
10  * Copyright (c) 2021-2024, PostgreSQL Global Development Group
11  *
12  * IDENTIFICATION
13  * src/backend/utils/activity/pgstat_io.c
14  * -------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "executor/instrument.h"
20 #include "storage/bufmgr.h"
21 #include "utils/pgstat_internal.h"
22 
23 
24 typedef struct PgStat_PendingIO
25 {
29 
30 
32 bool have_iostats = false;
33 
34 
35 /*
36  * Check that stats have not been counted for any combination of IOObject,
37  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38  * stats are tracked for this combination and IO times are non-zero, counts
39  * should be non-zero.
40  *
41  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42  * specified by the second parameter. Caller is responsible for locking the
43  * passed-in PgStat_BktypeIO, if needed.
44  */
45 bool
47  BackendType bktype)
48 {
49  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50  {
51  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52  {
53  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54  {
55  /* we do track it */
56  if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57  {
58  /* ensure that if IO times are non-zero, counts are > 0 */
59  if (backend_io->times[io_object][io_context][io_op] != 0 &&
60  backend_io->counts[io_object][io_context][io_op] <= 0)
61  return false;
62 
63  continue;
64  }
65 
66  /* we don't track it, and it is not 0 */
67  if (backend_io->counts[io_object][io_context][io_op] != 0)
68  return false;
69  }
70  }
71  }
72 
73  return true;
74 }
75 
76 void
77 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 {
79  pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 }
81 
82 void
83 pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 {
85  Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86  Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87  Assert((unsigned int) io_op < IOOP_NUM_TYPES);
88  Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 
90  PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 
92  have_iostats = true;
93 }
94 
95 /*
96  * Initialize the internal timing for an IO operation, depending on an
97  * IO timing GUC.
98  */
100 pgstat_prepare_io_time(bool track_io_guc)
101 {
102  instr_time io_start;
103 
104  if (track_io_guc)
105  INSTR_TIME_SET_CURRENT(io_start);
106  else
107  {
108  /*
109  * There is no need to set io_start when an IO timing GUC is disabled,
110  * still initialize it to zero to avoid compiler warnings.
111  */
112  INSTR_TIME_SET_ZERO(io_start);
113  }
114 
115  return io_start;
116 }
117 
118 /*
119  * Like pgstat_count_io_op_n() except it also accumulates time.
120  */
121 void
122 pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
124 {
125  if (track_io_timing)
126  {
127  instr_time io_time;
128 
129  INSTR_TIME_SET_CURRENT(io_time);
131 
132  if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
133  {
135  if (io_object == IOOBJECT_RELATION)
137  else if (io_object == IOOBJECT_TEMP_RELATION)
139  }
140  else if (io_op == IOOP_READ)
141  {
143  if (io_object == IOOBJECT_RELATION)
145  else if (io_object == IOOBJECT_TEMP_RELATION)
147  }
148 
149  INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
150  io_time);
151  }
152 
153  pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
154 }
155 
156 PgStat_IO *
158 {
160 
161  return &pgStatLocal.snapshot.io;
162 }
163 
164 /*
165  * Flush out locally pending IO statistics
166  *
167  * If no stats have been recorded, this function returns false.
168  *
169  * If nowait is true, this function returns true if the lock could not be
170  * acquired. Otherwise, return false.
171  */
172 bool
173 pgstat_flush_io(bool nowait)
174 {
175  LWLock *bktype_lock;
176  PgStat_BktypeIO *bktype_shstats;
177 
178  if (!have_iostats)
179  return false;
180 
181  bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
182  bktype_shstats =
184 
185  if (!nowait)
186  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
187  else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
188  return true;
189 
190  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
191  {
192  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
193  {
194  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
195  {
196  instr_time time;
197 
198  bktype_shstats->counts[io_object][io_context][io_op] +=
199  PendingIOStats.counts[io_object][io_context][io_op];
200 
201  time = PendingIOStats.pending_times[io_object][io_context][io_op];
202 
203  bktype_shstats->times[io_object][io_context][io_op] +=
205  }
206  }
207  }
208 
210 
211  LWLockRelease(bktype_lock);
212 
213  memset(&PendingIOStats, 0, sizeof(PendingIOStats));
214 
215  have_iostats = false;
216 
217  return false;
218 }
219 
220 const char *
222 {
223  switch (io_context)
224  {
225  case IOCONTEXT_BULKREAD:
226  return "bulkread";
227  case IOCONTEXT_BULKWRITE:
228  return "bulkwrite";
229  case IOCONTEXT_NORMAL:
230  return "normal";
231  case IOCONTEXT_VACUUM:
232  return "vacuum";
233  }
234 
235  elog(ERROR, "unrecognized IOContext value: %d", io_context);
236  pg_unreachable();
237 }
238 
239 const char *
241 {
242  switch (io_object)
243  {
244  case IOOBJECT_RELATION:
245  return "relation";
247  return "temp relation";
248  }
249 
250  elog(ERROR, "unrecognized IOObject value: %d", io_object);
251  pg_unreachable();
252 }
253 
254 void
256 {
257  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
258  {
259  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
260  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
261 
262  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
263 
264  /*
265  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
266  * the reset timestamp as well.
267  */
268  if (i == 0)
270 
271  memset(bktype_shstats, 0, sizeof(*bktype_shstats));
272  LWLockRelease(bktype_lock);
273  }
274 }
275 
276 void
278 {
279  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
280  {
281  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
282  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
283  PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
284 
285  LWLockAcquire(bktype_lock, LW_SHARED);
286 
287  /*
288  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
289  * the reset timestamp as well.
290  */
291  if (i == 0)
294 
295  /* using struct assignment due to better type safety */
296  *bktype_snap = *bktype_shstats;
297  LWLockRelease(bktype_lock);
298  }
299 }
300 
301 /*
302 * IO statistics are not collected for all BackendTypes.
303 *
304 * The following BackendTypes do not participate in the cumulative stats
305 * subsystem or do not perform IO on which we currently track:
306 * - Syslogger because it is not connected to shared memory
307 * - Archiver because most relevant archiving IO is delegated to a
308 * specialized command or module
309 * - WAL Receiver, WAL Writer, and WAL Summarizer IO are not tracked in
310 * pg_stat_io for now
311 *
312 * Function returns true if BackendType participates in the cumulative stats
313 * subsystem for IO and false if it does not.
314 *
315 * When adding a new BackendType, also consider adding relevant restrictions to
316 * pgstat_tracks_io_object() and pgstat_tracks_io_op().
317 */
318 bool
320 {
321  /*
322  * List every type so that new backend types trigger a warning about
323  * needing to adjust this switch.
324  */
325  switch (bktype)
326  {
327  case B_INVALID:
328  case B_ARCHIVER:
329  case B_LOGGER:
330  case B_WAL_RECEIVER:
331  case B_WAL_WRITER:
332  case B_WAL_SUMMARIZER:
333  return false;
334 
335  case B_AUTOVAC_LAUNCHER:
336  case B_AUTOVAC_WORKER:
337  case B_BACKEND:
338  case B_BG_WORKER:
339  case B_BG_WRITER:
340  case B_CHECKPOINTER:
341  case B_SLOTSYNC_WORKER:
343  case B_STARTUP:
344  case B_WAL_SENDER:
345  return true;
346  }
347 
348  return false;
349 }
350 
351 /*
352  * Some BackendTypes do not perform IO on certain IOObjects or in certain
353  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
354  * that the given BackendType is expected to do IO in the given IOContext and
355  * on the given IOObject and that the given IOObject is expected to be operated
356  * on in the given IOContext.
357  */
358 bool
360  IOContext io_context)
361 {
362  bool no_temp_rel;
363 
364  /*
365  * Some BackendTypes should never track IO statistics.
366  */
367  if (!pgstat_tracks_io_bktype(bktype))
368  return false;
369 
370  /*
371  * Currently, IO on temporary relations can only occur in the
372  * IOCONTEXT_NORMAL IOContext.
373  */
374  if (io_context != IOCONTEXT_NORMAL &&
375  io_object == IOOBJECT_TEMP_RELATION)
376  return false;
377 
378  /*
379  * In core Postgres, only regular backends and WAL Sender processes
380  * executing queries will use local buffers and operate on temporary
381  * relations. Parallel workers will not use local buffers (see
382  * InitLocalBuffers()); however, extensions leveraging background workers
383  * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
384  * BackendType B_BG_WORKER.
385  */
386  no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
387  bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
388  bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
389 
390  if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
391  io_object == IOOBJECT_TEMP_RELATION)
392  return false;
393 
394  /*
395  * Some BackendTypes do not currently perform any IO in certain
396  * IOContexts, and, while it may not be inherently incorrect for them to
397  * do so, excluding those rows from the view makes the view easier to use.
398  */
399  if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
400  (io_context == IOCONTEXT_BULKREAD ||
401  io_context == IOCONTEXT_BULKWRITE ||
402  io_context == IOCONTEXT_VACUUM))
403  return false;
404 
405  if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
406  return false;
407 
408  if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
409  io_context == IOCONTEXT_BULKWRITE)
410  return false;
411 
412  return true;
413 }
414 
415 /*
416  * Some BackendTypes will never do certain IOOps and some IOOps should not
417  * occur in certain IOContexts or on certain IOObjects. Check that the given
418  * IOOp is valid for the given BackendType in the given IOContext and on the
419  * given IOObject. Note that there are currently no cases of an IOOp being
420  * invalid for a particular BackendType only within a certain IOContext and/or
421  * only on a certain IOObject.
422  */
423 bool
425  IOContext io_context, IOOp io_op)
426 {
427  bool strategy_io_context;
428 
429  /* if (io_context, io_object) will never collect stats, we're done */
430  if (!pgstat_tracks_io_object(bktype, io_object, io_context))
431  return false;
432 
433  /*
434  * Some BackendTypes will not do certain IOOps.
435  */
436  if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
437  (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
438  return false;
439 
440  if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
441  bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
442  return false;
443 
444  /*
445  * Temporary tables are not logged and thus do not require fsync'ing.
446  * Writeback is not requested for temporary tables.
447  */
448  if (io_object == IOOBJECT_TEMP_RELATION &&
449  (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
450  return false;
451 
452  /*
453  * Some IOOps are not valid in certain IOContexts and some IOOps are only
454  * valid in certain contexts.
455  */
456  if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
457  return false;
458 
459  strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
460  io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
461 
462  /*
463  * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
464  */
465  if (!strategy_io_context && io_op == IOOP_REUSE)
466  return false;
467 
468  /*
469  * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
470  * counted in the IOCONTEXT_NORMAL IOContext. See comment in
471  * register_dirty_segment() for more details.
472  */
473  if (strategy_io_context && io_op == IOOP_FSYNC)
474  return false;
475 
476 
477  return true;
478 }
bool track_io_timing
Definition: bufmgr.c:138
unsigned int uint32
Definition: c.h:493
#define pg_unreachable()
Definition: c.h:283
int64 TimestampTz
Definition: timestamp.h:39
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:178
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
BufferUsage pgBufferUsage
Definition: instrument.c:20
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1169
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1782
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1340
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define BACKEND_NUM_TYPES
Definition: miscadmin.h:370
BackendType
Definition: miscadmin.h:334
@ B_WAL_SUMMARIZER
Definition: miscadmin.h:360
@ B_WAL_WRITER
Definition: miscadmin.h:361
@ B_WAL_RECEIVER
Definition: miscadmin.h:359
@ B_CHECKPOINTER
Definition: miscadmin.h:357
@ B_WAL_SENDER
Definition: miscadmin.h:342
@ B_LOGGER
Definition: miscadmin.h:367
@ B_STARTUP
Definition: miscadmin.h:358
@ B_BG_WORKER
Definition: miscadmin.h:341
@ B_INVALID
Definition: miscadmin.h:335
@ B_STANDALONE_BACKEND
Definition: miscadmin.h:345
@ B_BG_WRITER
Definition: miscadmin.h:356
@ B_BACKEND
Definition: miscadmin.h:338
@ B_ARCHIVER
Definition: miscadmin.h:355
@ B_AUTOVAC_LAUNCHER
Definition: miscadmin.h:339
@ B_SLOTSYNC_WORKER
Definition: miscadmin.h:343
@ B_AUTOVAC_WORKER
Definition: miscadmin.h:340
BackendType MyBackendType
Definition: miscinit.c:63
static time_t start_time
Definition: pg_ctl.c:94
void pgstat_snapshot_fixed(PgStat_Kind kind)
Definition: pgstat.c:938
PgStat_LocalState pgStatLocal
Definition: pgstat.c:193
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:552
IOObject
Definition: pgstat.h:279
@ IOOBJECT_RELATION
Definition: pgstat.h:280
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:281
@ PGSTAT_KIND_IO
Definition: pgstat.h:51
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:554
#define IOOP_NUM_TYPES
Definition: pgstat.h:308
IOContext
Definition: pgstat.h:287
@ IOCONTEXT_NORMAL
Definition: pgstat.h:290
@ IOCONTEXT_VACUUM
Definition: pgstat.h:291
@ IOCONTEXT_BULKREAD
Definition: pgstat.h:288
@ IOCONTEXT_BULKWRITE
Definition: pgstat.h:289
#define IOCONTEXT_NUM_TYPES
Definition: pgstat.h:294
IOOp
Definition: pgstat.h:297
@ IOOP_EXTEND
Definition: pgstat.h:299
@ IOOP_FSYNC
Definition: pgstat.h:300
@ IOOP_READ
Definition: pgstat.h:302
@ IOOP_WRITEBACK
Definition: pgstat.h:305
@ IOOP_HIT
Definition: pgstat.h:301
@ IOOP_EVICT
Definition: pgstat.h:298
@ IOOP_REUSE
Definition: pgstat.h:303
@ IOOP_WRITE
Definition: pgstat.h:304
int64 PgStat_Counter
Definition: pgstat.h:89
#define IOOBJECT_NUM_TYPES
Definition: pgstat.h:284
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:100
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:122
struct PgStat_PendingIO PgStat_PendingIO
bool pgstat_tracks_io_bktype(BackendType bktype)
Definition: pgstat_io.c:319
bool pgstat_flush_io(bool nowait)
Definition: pgstat_io.c:173
const char * pgstat_get_io_object_name(IOObject io_object)
Definition: pgstat_io.c:240
void pgstat_io_reset_all_cb(TimestampTz ts)
Definition: pgstat_io.c:255
bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype)
Definition: pgstat_io.c:46
static PgStat_PendingIO PendingIOStats
Definition: pgstat_io.c:31
PgStat_IO * pgstat_fetch_stat_io(void)
Definition: pgstat_io.c:157
bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:424
bool have_iostats
Definition: pgstat_io.c:32
void pgstat_io_snapshot_cb(void)
Definition: pgstat_io.c:277
const char * pgstat_get_io_context_name(IOContext io_context)
Definition: pgstat_io.c:221
bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context)
Definition: pgstat_io.c:359
void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
Definition: pgstat_io.c:83
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
instr_time local_blk_read_time
Definition: instrument.h:38
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
instr_time local_blk_write_time
Definition: instrument.h:39
Definition: lwlock.h:42
LWLock locks[BACKEND_NUM_TYPES]
PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:313
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:312
PgStat_BktypeIO stats[BACKEND_NUM_TYPES]
Definition: pgstat.h:319
TimestampTz stat_reset_timestamp
Definition: pgstat.h:318
PgStat_Snapshot snapshot
PgStat_ShmemControl * shmem
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:26
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:27
PgStatShared_IO io