PostgreSQL Source Code  git master
pgstat_io.c
Go to the documentation of this file.
1 /* -------------------------------------------------------------------------
2  *
3  * pgstat_io.c
4  * Implementation of IO statistics.
5  *
6  * This file contains the implementation of IO statistics. It is kept separate
7  * from pgstat.c to enforce the line between the statistics access / storage
8  * implementation and the details about individual types of statistics.
9  *
10  * Copyright (c) 2021-2023, PostgreSQL Global Development Group
11  *
12  * IDENTIFICATION
13  * src/backend/utils/activity/pgstat_io.c
14  * -------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "executor/instrument.h"
20 #include "storage/bufmgr.h"
21 #include "utils/pgstat_internal.h"
22 
23 
24 typedef struct PgStat_PendingIO
25 {
29 
30 
32 bool have_iostats = false;
33 
34 
35 /*
36  * Check that stats have not been counted for any combination of IOObject,
37  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38  * stats are tracked for this combination and IO times are non-zero, counts
39  * should be non-zero.
40  *
41  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42  * specified by the second parameter. Caller is responsible for locking the
43  * passed-in PgStat_BktypeIO, if needed.
44  */
45 bool
47  BackendType bktype)
48 {
49  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50  {
51  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52  {
53  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54  {
55  /* we do track it */
56  if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57  {
58  /* ensure that if IO times are non-zero, counts are > 0 */
59  if (backend_io->times[io_object][io_context][io_op] != 0 &&
60  backend_io->counts[io_object][io_context][io_op] <= 0)
61  return false;
62 
63  continue;
64  }
65 
66  /* we don't track it, and it is not 0 */
67  if (backend_io->counts[io_object][io_context][io_op] != 0)
68  return false;
69  }
70  }
71  }
72 
73  return true;
74 }
75 
76 void
77 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 {
79  pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 }
81 
82 void
83 pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 {
85  Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86  Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87  Assert((unsigned int) io_op < IOOP_NUM_TYPES);
88  Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 
90  PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 
92  have_iostats = true;
93 }
94 
97 {
98  instr_time io_start;
99 
100  if (track_io_timing)
101  INSTR_TIME_SET_CURRENT(io_start);
102  else
103  INSTR_TIME_SET_ZERO(io_start);
104 
105  return io_start;
106 }
107 
108 /*
109  * Like pgstat_count_io_op_n() except it also accumulates time.
110  */
111 void
112 pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
114 {
115  if (track_io_timing)
116  {
117  instr_time io_time;
118 
119  INSTR_TIME_SET_CURRENT(io_time);
121 
122  if (io_op == IOOP_WRITE)
123  {
125  if (io_object == IOOBJECT_RELATION)
127  }
128  else if (io_op == IOOP_READ)
129  {
131  if (io_object == IOOBJECT_RELATION)
133  }
134 
135  INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
136  io_time);
137  }
138 
139  pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
140 }
141 
142 PgStat_IO *
144 {
146 
147  return &pgStatLocal.snapshot.io;
148 }
149 
150 /*
151  * Flush out locally pending IO statistics
152  *
153  * If no stats have been recorded, this function returns false.
154  *
155  * If nowait is true, this function returns true if the lock could not be
156  * acquired. Otherwise, return false.
157  */
158 bool
159 pgstat_flush_io(bool nowait)
160 {
161  LWLock *bktype_lock;
162  PgStat_BktypeIO *bktype_shstats;
163 
164  if (!have_iostats)
165  return false;
166 
167  bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
168  bktype_shstats =
170 
171  if (!nowait)
172  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
173  else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
174  return true;
175 
176  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
177  {
178  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
179  {
180  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
181  {
182  instr_time time;
183 
184  bktype_shstats->counts[io_object][io_context][io_op] +=
185  PendingIOStats.counts[io_object][io_context][io_op];
186 
187  time = PendingIOStats.pending_times[io_object][io_context][io_op];
188 
189  bktype_shstats->times[io_object][io_context][io_op] +=
191  }
192  }
193  }
194 
196 
197  LWLockRelease(bktype_lock);
198 
199  memset(&PendingIOStats, 0, sizeof(PendingIOStats));
200 
201  have_iostats = false;
202 
203  return false;
204 }
205 
206 const char *
208 {
209  switch (io_context)
210  {
211  case IOCONTEXT_BULKREAD:
212  return "bulkread";
213  case IOCONTEXT_BULKWRITE:
214  return "bulkwrite";
215  case IOCONTEXT_NORMAL:
216  return "normal";
217  case IOCONTEXT_VACUUM:
218  return "vacuum";
219  }
220 
221  elog(ERROR, "unrecognized IOContext value: %d", io_context);
222  pg_unreachable();
223 }
224 
225 const char *
227 {
228  switch (io_object)
229  {
230  case IOOBJECT_RELATION:
231  return "relation";
233  return "temp relation";
234  }
235 
236  elog(ERROR, "unrecognized IOObject value: %d", io_object);
237  pg_unreachable();
238 }
239 
240 void
242 {
243  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
244  {
245  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
246  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
247 
248  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
249 
250  /*
251  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
252  * the reset timestamp as well.
253  */
254  if (i == 0)
256 
257  memset(bktype_shstats, 0, sizeof(*bktype_shstats));
258  LWLockRelease(bktype_lock);
259  }
260 }
261 
262 void
264 {
265  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
266  {
267  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
268  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
269  PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
270 
271  LWLockAcquire(bktype_lock, LW_SHARED);
272 
273  /*
274  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
275  * the reset timestamp as well.
276  */
277  if (i == 0)
280 
281  /* using struct assignment due to better type safety */
282  *bktype_snap = *bktype_shstats;
283  LWLockRelease(bktype_lock);
284  }
285 }
286 
287 /*
288 * IO statistics are not collected for all BackendTypes.
289 *
290 * The following BackendTypes do not participate in the cumulative stats
291 * subsystem or do not perform IO on which we currently track:
292 * - Syslogger because it is not connected to shared memory
293 * - Archiver because most relevant archiving IO is delegated to a
294 * specialized command or module
295 * - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now
296 *
297 * Function returns true if BackendType participates in the cumulative stats
298 * subsystem for IO and false if it does not.
299 *
300 * When adding a new BackendType, also consider adding relevant restrictions to
301 * pgstat_tracks_io_object() and pgstat_tracks_io_op().
302 */
303 bool
305 {
306  /*
307  * List every type so that new backend types trigger a warning about
308  * needing to adjust this switch.
309  */
310  switch (bktype)
311  {
312  case B_INVALID:
313  case B_ARCHIVER:
314  case B_LOGGER:
315  case B_WAL_RECEIVER:
316  case B_WAL_WRITER:
317  return false;
318 
319  case B_AUTOVAC_LAUNCHER:
320  case B_AUTOVAC_WORKER:
321  case B_BACKEND:
322  case B_BG_WORKER:
323  case B_BG_WRITER:
324  case B_CHECKPOINTER:
326  case B_STARTUP:
327  case B_WAL_SENDER:
328  return true;
329  }
330 
331  return false;
332 }
333 
334 /*
335  * Some BackendTypes do not perform IO on certain IOObjects or in certain
336  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
337  * that the given BackendType is expected to do IO in the given IOContext and
338  * on the given IOObject and that the given IOObject is expected to be operated
339  * on in the given IOContext.
340  */
341 bool
343  IOContext io_context)
344 {
345  bool no_temp_rel;
346 
347  /*
348  * Some BackendTypes should never track IO statistics.
349  */
350  if (!pgstat_tracks_io_bktype(bktype))
351  return false;
352 
353  /*
354  * Currently, IO on temporary relations can only occur in the
355  * IOCONTEXT_NORMAL IOContext.
356  */
357  if (io_context != IOCONTEXT_NORMAL &&
358  io_object == IOOBJECT_TEMP_RELATION)
359  return false;
360 
361  /*
362  * In core Postgres, only regular backends and WAL Sender processes
363  * executing queries will use local buffers and operate on temporary
364  * relations. Parallel workers will not use local buffers (see
365  * InitLocalBuffers()); however, extensions leveraging background workers
366  * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
367  * BackendType B_BG_WORKER.
368  */
369  no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
370  bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
371  bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
372 
373  if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
374  io_object == IOOBJECT_TEMP_RELATION)
375  return false;
376 
377  /*
378  * Some BackendTypes do not currently perform any IO in certain
379  * IOContexts, and, while it may not be inherently incorrect for them to
380  * do so, excluding those rows from the view makes the view easier to use.
381  */
382  if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
383  (io_context == IOCONTEXT_BULKREAD ||
384  io_context == IOCONTEXT_BULKWRITE ||
385  io_context == IOCONTEXT_VACUUM))
386  return false;
387 
388  if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
389  return false;
390 
391  if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
392  io_context == IOCONTEXT_BULKWRITE)
393  return false;
394 
395  return true;
396 }
397 
398 /*
399  * Some BackendTypes will never do certain IOOps and some IOOps should not
400  * occur in certain IOContexts or on certain IOObjects. Check that the given
401  * IOOp is valid for the given BackendType in the given IOContext and on the
402  * given IOObject. Note that there are currently no cases of an IOOp being
403  * invalid for a particular BackendType only within a certain IOContext and/or
404  * only on a certain IOObject.
405  */
406 bool
408  IOContext io_context, IOOp io_op)
409 {
410  bool strategy_io_context;
411 
412  /* if (io_context, io_object) will never collect stats, we're done */
413  if (!pgstat_tracks_io_object(bktype, io_object, io_context))
414  return false;
415 
416  /*
417  * Some BackendTypes will not do certain IOOps.
418  */
419  if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
420  (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
421  return false;
422 
423  if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
424  bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
425  return false;
426 
427  /*
428  * Temporary tables are not logged and thus do not require fsync'ing.
429  * Writeback is not requested for temporary tables.
430  */
431  if (io_object == IOOBJECT_TEMP_RELATION &&
432  (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
433  return false;
434 
435  /*
436  * Some IOOps are not valid in certain IOContexts and some IOOps are only
437  * valid in certain contexts.
438  */
439  if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
440  return false;
441 
442  strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
443  io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
444 
445  /*
446  * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
447  */
448  if (!strategy_io_context && io_op == IOOP_REUSE)
449  return false;
450 
451  /*
452  * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
453  * counted in the IOCONTEXT_NORMAL IOContext. See comment in
454  * register_dirty_segment() for more details.
455  */
456  if (strategy_io_context && io_op == IOOP_FSYNC)
457  return false;
458 
459 
460  return true;
461 }
bool track_io_timing
Definition: bufmgr.c:138
unsigned int uint32
Definition: c.h:490
#define pg_unreachable()
Definition: c.h:280
int64 TimestampTz
Definition: timestamp.h:39
#define ERROR
Definition: elog.h:39
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:178
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
BufferUsage pgBufferUsage
Definition: instrument.c:20
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1366
@ LW_SHARED
Definition: lwlock.h:117
@ LW_EXCLUSIVE
Definition: lwlock.h:116
#define BACKEND_NUM_TYPES
Definition: miscadmin.h:344
BackendType
Definition: miscadmin.h:327
@ B_WAL_WRITER
Definition: miscadmin.h:341
@ B_WAL_RECEIVER
Definition: miscadmin.h:339
@ B_CHECKPOINTER
Definition: miscadmin.h:335
@ B_WAL_SENDER
Definition: miscadmin.h:340
@ B_LOGGER
Definition: miscadmin.h:336
@ B_STARTUP
Definition: miscadmin.h:338
@ B_BG_WORKER
Definition: miscadmin.h:333
@ B_INVALID
Definition: miscadmin.h:328
@ B_STANDALONE_BACKEND
Definition: miscadmin.h:337
@ B_BG_WRITER
Definition: miscadmin.h:334
@ B_BACKEND
Definition: miscadmin.h:332
@ B_ARCHIVER
Definition: miscadmin.h:329
@ B_AUTOVAC_LAUNCHER
Definition: miscadmin.h:330
@ B_AUTOVAC_WORKER
Definition: miscadmin.h:331
BackendType MyBackendType
Definition: miscinit.c:63
static time_t start_time
Definition: pg_ctl.c:94
void pgstat_snapshot_fixed(PgStat_Kind kind)
Definition: pgstat.c:930
PgStat_LocalState pgStatLocal
Definition: pgstat.c:196
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:550
IOObject
Definition: pgstat.h:277
@ IOOBJECT_RELATION
Definition: pgstat.h:278
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:279
@ PGSTAT_KIND_IO
Definition: pgstat.h:51
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:552
#define IOOP_NUM_TYPES
Definition: pgstat.h:306
IOContext
Definition: pgstat.h:285
@ IOCONTEXT_NORMAL
Definition: pgstat.h:288
@ IOCONTEXT_VACUUM
Definition: pgstat.h:289
@ IOCONTEXT_BULKREAD
Definition: pgstat.h:286
@ IOCONTEXT_BULKWRITE
Definition: pgstat.h:287
#define IOCONTEXT_NUM_TYPES
Definition: pgstat.h:292
IOOp
Definition: pgstat.h:295
@ IOOP_EXTEND
Definition: pgstat.h:297
@ IOOP_FSYNC
Definition: pgstat.h:298
@ IOOP_READ
Definition: pgstat.h:300
@ IOOP_WRITEBACK
Definition: pgstat.h:303
@ IOOP_HIT
Definition: pgstat.h:299
@ IOOP_EVICT
Definition: pgstat.h:296
@ IOOP_REUSE
Definition: pgstat.h:301
@ IOOP_WRITE
Definition: pgstat.h:302
int64 PgStat_Counter
Definition: pgstat.h:89
#define IOOBJECT_NUM_TYPES
Definition: pgstat.h:282
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:112
struct PgStat_PendingIO PgStat_PendingIO
bool pgstat_tracks_io_bktype(BackendType bktype)
Definition: pgstat_io.c:304
bool pgstat_flush_io(bool nowait)
Definition: pgstat_io.c:159
const char * pgstat_get_io_object_name(IOObject io_object)
Definition: pgstat_io.c:226
void pgstat_io_reset_all_cb(TimestampTz ts)
Definition: pgstat_io.c:241
bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype)
Definition: pgstat_io.c:46
instr_time pgstat_prepare_io_time(void)
Definition: pgstat_io.c:96
static PgStat_PendingIO PendingIOStats
Definition: pgstat_io.c:31
PgStat_IO * pgstat_fetch_stat_io(void)
Definition: pgstat_io.c:143
bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:407
bool have_iostats
Definition: pgstat_io.c:32
void pgstat_io_snapshot_cb(void)
Definition: pgstat_io.c:263
const char * pgstat_get_io_context_name(IOContext io_context)
Definition: pgstat_io.c:207
bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context)
Definition: pgstat_io.c:342
void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
Definition: pgstat_io.c:83
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
instr_time blk_write_time
Definition: instrument.h:37
instr_time blk_read_time
Definition: instrument.h:36
Definition: lwlock.h:41
LWLock locks[BACKEND_NUM_TYPES]
PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:311
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:310
PgStat_BktypeIO stats[BACKEND_NUM_TYPES]
Definition: pgstat.h:317
TimestampTz stat_reset_timestamp
Definition: pgstat.h:316
PgStat_Snapshot snapshot
PgStat_ShmemControl * shmem
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:26
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:27
PgStatShared_IO io