PostgreSQL Source Code  git master
pgstat_io.c
Go to the documentation of this file.
1 /* -------------------------------------------------------------------------
2  *
3  * pgstat_io.c
4  * Implementation of IO statistics.
5  *
6  * This file contains the implementation of IO statistics. It is kept separate
7  * from pgstat.c to enforce the line between the statistics access / storage
8  * implementation and the details about individual types of statistics.
9  *
10  * Copyright (c) 2021-2023, PostgreSQL Global Development Group
11  *
12  * IDENTIFICATION
13  * src/backend/utils/activity/pgstat_io.c
14  * -------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "executor/instrument.h"
20 #include "storage/bufmgr.h"
21 #include "utils/pgstat_internal.h"
22 
23 
24 typedef struct PgStat_PendingIO
25 {
29 
30 
32 bool have_iostats = false;
33 
34 
35 /*
36  * Check that stats have not been counted for any combination of IOObject,
37  * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38  * stats are tracked for this combination and IO times are non-zero, counts
39  * should be non-zero.
40  *
41  * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42  * specified by the second parameter. Caller is responsible for locking the
43  * passed-in PgStat_BktypeIO, if needed.
44  */
45 bool
47  BackendType bktype)
48 {
49  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50  {
51  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52  {
53  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54  {
55  /* we do track it */
56  if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57  {
58  /* ensure that if IO times are non-zero, counts are > 0 */
59  if (backend_io->times[io_object][io_context][io_op] != 0 &&
60  backend_io->counts[io_object][io_context][io_op] <= 0)
61  return false;
62 
63  continue;
64  }
65 
66  /* we don't track it, and it is not 0 */
67  if (backend_io->counts[io_object][io_context][io_op] != 0)
68  return false;
69  }
70  }
71  }
72 
73  return true;
74 }
75 
76 void
77 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 {
79  pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 }
81 
82 void
83 pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 {
85  Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86  Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87  Assert((unsigned int) io_op < IOOP_NUM_TYPES);
88  Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 
90  PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 
92  have_iostats = true;
93 }
94 
97 {
98  instr_time io_start;
99 
100  if (track_io_timing)
101  INSTR_TIME_SET_CURRENT(io_start);
102  else
103  INSTR_TIME_SET_ZERO(io_start);
104 
105  return io_start;
106 }
107 
108 /*
109  * Like pgstat_count_io_op_n() except it also accumulates time.
110  */
111 void
112 pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
114 {
115  if (track_io_timing)
116  {
117  instr_time io_time;
118 
119  INSTR_TIME_SET_CURRENT(io_time);
121 
122  if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
123  {
125  if (io_object == IOOBJECT_RELATION)
127  else if (io_object == IOOBJECT_TEMP_RELATION)
129  }
130  else if (io_op == IOOP_READ)
131  {
133  if (io_object == IOOBJECT_RELATION)
135  else if (io_object == IOOBJECT_TEMP_RELATION)
137  }
138 
139  INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op],
140  io_time);
141  }
142 
143  pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
144 }
145 
146 PgStat_IO *
148 {
150 
151  return &pgStatLocal.snapshot.io;
152 }
153 
154 /*
155  * Flush out locally pending IO statistics
156  *
157  * If no stats have been recorded, this function returns false.
158  *
159  * If nowait is true, this function returns true if the lock could not be
160  * acquired. Otherwise, return false.
161  */
162 bool
163 pgstat_flush_io(bool nowait)
164 {
165  LWLock *bktype_lock;
166  PgStat_BktypeIO *bktype_shstats;
167 
168  if (!have_iostats)
169  return false;
170 
171  bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
172  bktype_shstats =
174 
175  if (!nowait)
176  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
177  else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
178  return true;
179 
180  for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
181  {
182  for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
183  {
184  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
185  {
186  instr_time time;
187 
188  bktype_shstats->counts[io_object][io_context][io_op] +=
189  PendingIOStats.counts[io_object][io_context][io_op];
190 
191  time = PendingIOStats.pending_times[io_object][io_context][io_op];
192 
193  bktype_shstats->times[io_object][io_context][io_op] +=
195  }
196  }
197  }
198 
200 
201  LWLockRelease(bktype_lock);
202 
203  memset(&PendingIOStats, 0, sizeof(PendingIOStats));
204 
205  have_iostats = false;
206 
207  return false;
208 }
209 
210 const char *
212 {
213  switch (io_context)
214  {
215  case IOCONTEXT_BULKREAD:
216  return "bulkread";
217  case IOCONTEXT_BULKWRITE:
218  return "bulkwrite";
219  case IOCONTEXT_NORMAL:
220  return "normal";
221  case IOCONTEXT_VACUUM:
222  return "vacuum";
223  }
224 
225  elog(ERROR, "unrecognized IOContext value: %d", io_context);
226  pg_unreachable();
227 }
228 
229 const char *
231 {
232  switch (io_object)
233  {
234  case IOOBJECT_RELATION:
235  return "relation";
237  return "temp relation";
238  }
239 
240  elog(ERROR, "unrecognized IOObject value: %d", io_object);
241  pg_unreachable();
242 }
243 
244 void
246 {
247  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
248  {
249  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
250  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
251 
252  LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
253 
254  /*
255  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
256  * the reset timestamp as well.
257  */
258  if (i == 0)
260 
261  memset(bktype_shstats, 0, sizeof(*bktype_shstats));
262  LWLockRelease(bktype_lock);
263  }
264 }
265 
266 void
268 {
269  for (int i = 0; i < BACKEND_NUM_TYPES; i++)
270  {
271  LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
272  PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
273  PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
274 
275  LWLockAcquire(bktype_lock, LW_SHARED);
276 
277  /*
278  * Use the lock in the first BackendType's PgStat_BktypeIO to protect
279  * the reset timestamp as well.
280  */
281  if (i == 0)
284 
285  /* using struct assignment due to better type safety */
286  *bktype_snap = *bktype_shstats;
287  LWLockRelease(bktype_lock);
288  }
289 }
290 
291 /*
292 * IO statistics are not collected for all BackendTypes.
293 *
294 * The following BackendTypes do not participate in the cumulative stats
295 * subsystem or do not perform IO on which we currently track:
296 * - Syslogger because it is not connected to shared memory
297 * - Archiver because most relevant archiving IO is delegated to a
298 * specialized command or module
299 * - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now
300 *
301 * Function returns true if BackendType participates in the cumulative stats
302 * subsystem for IO and false if it does not.
303 *
304 * When adding a new BackendType, also consider adding relevant restrictions to
305 * pgstat_tracks_io_object() and pgstat_tracks_io_op().
306 */
307 bool
309 {
310  /*
311  * List every type so that new backend types trigger a warning about
312  * needing to adjust this switch.
313  */
314  switch (bktype)
315  {
316  case B_INVALID:
317  case B_ARCHIVER:
318  case B_LOGGER:
319  case B_WAL_RECEIVER:
320  case B_WAL_WRITER:
321  return false;
322 
323  case B_AUTOVAC_LAUNCHER:
324  case B_AUTOVAC_WORKER:
325  case B_BACKEND:
326  case B_BG_WORKER:
327  case B_BG_WRITER:
328  case B_CHECKPOINTER:
330  case B_STARTUP:
331  case B_WAL_SENDER:
332  return true;
333  }
334 
335  return false;
336 }
337 
338 /*
339  * Some BackendTypes do not perform IO on certain IOObjects or in certain
340  * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
341  * that the given BackendType is expected to do IO in the given IOContext and
342  * on the given IOObject and that the given IOObject is expected to be operated
343  * on in the given IOContext.
344  */
345 bool
347  IOContext io_context)
348 {
349  bool no_temp_rel;
350 
351  /*
352  * Some BackendTypes should never track IO statistics.
353  */
354  if (!pgstat_tracks_io_bktype(bktype))
355  return false;
356 
357  /*
358  * Currently, IO on temporary relations can only occur in the
359  * IOCONTEXT_NORMAL IOContext.
360  */
361  if (io_context != IOCONTEXT_NORMAL &&
362  io_object == IOOBJECT_TEMP_RELATION)
363  return false;
364 
365  /*
366  * In core Postgres, only regular backends and WAL Sender processes
367  * executing queries will use local buffers and operate on temporary
368  * relations. Parallel workers will not use local buffers (see
369  * InitLocalBuffers()); however, extensions leveraging background workers
370  * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
371  * BackendType B_BG_WORKER.
372  */
373  no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
374  bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
375  bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
376 
377  if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
378  io_object == IOOBJECT_TEMP_RELATION)
379  return false;
380 
381  /*
382  * Some BackendTypes do not currently perform any IO in certain
383  * IOContexts, and, while it may not be inherently incorrect for them to
384  * do so, excluding those rows from the view makes the view easier to use.
385  */
386  if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
387  (io_context == IOCONTEXT_BULKREAD ||
388  io_context == IOCONTEXT_BULKWRITE ||
389  io_context == IOCONTEXT_VACUUM))
390  return false;
391 
392  if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
393  return false;
394 
395  if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
396  io_context == IOCONTEXT_BULKWRITE)
397  return false;
398 
399  return true;
400 }
401 
402 /*
403  * Some BackendTypes will never do certain IOOps and some IOOps should not
404  * occur in certain IOContexts or on certain IOObjects. Check that the given
405  * IOOp is valid for the given BackendType in the given IOContext and on the
406  * given IOObject. Note that there are currently no cases of an IOOp being
407  * invalid for a particular BackendType only within a certain IOContext and/or
408  * only on a certain IOObject.
409  */
410 bool
412  IOContext io_context, IOOp io_op)
413 {
414  bool strategy_io_context;
415 
416  /* if (io_context, io_object) will never collect stats, we're done */
417  if (!pgstat_tracks_io_object(bktype, io_object, io_context))
418  return false;
419 
420  /*
421  * Some BackendTypes will not do certain IOOps.
422  */
423  if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
424  (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
425  return false;
426 
427  if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
428  bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
429  return false;
430 
431  /*
432  * Temporary tables are not logged and thus do not require fsync'ing.
433  * Writeback is not requested for temporary tables.
434  */
435  if (io_object == IOOBJECT_TEMP_RELATION &&
436  (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK))
437  return false;
438 
439  /*
440  * Some IOOps are not valid in certain IOContexts and some IOOps are only
441  * valid in certain contexts.
442  */
443  if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
444  return false;
445 
446  strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
447  io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
448 
449  /*
450  * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
451  */
452  if (!strategy_io_context && io_op == IOOP_REUSE)
453  return false;
454 
455  /*
456  * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
457  * counted in the IOCONTEXT_NORMAL IOContext. See comment in
458  * register_dirty_segment() for more details.
459  */
460  if (strategy_io_context && io_op == IOOP_FSYNC)
461  return false;
462 
463 
464  return true;
465 }
bool track_io_timing
Definition: bufmgr.c:139
unsigned int uint32
Definition: c.h:495
#define pg_unreachable()
Definition: c.h:285
int64 TimestampTz
Definition: timestamp.h:39
#define ERROR
Definition: elog.h:39
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:178
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
BufferUsage pgBufferUsage
Definition: instrument.c:20
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1808
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1366
@ LW_SHARED
Definition: lwlock.h:117
@ LW_EXCLUSIVE
Definition: lwlock.h:116
#define BACKEND_NUM_TYPES
Definition: miscadmin.h:346
BackendType
Definition: miscadmin.h:329
@ B_WAL_WRITER
Definition: miscadmin.h:343
@ B_WAL_RECEIVER
Definition: miscadmin.h:341
@ B_CHECKPOINTER
Definition: miscadmin.h:337
@ B_WAL_SENDER
Definition: miscadmin.h:342
@ B_LOGGER
Definition: miscadmin.h:338
@ B_STARTUP
Definition: miscadmin.h:340
@ B_BG_WORKER
Definition: miscadmin.h:335
@ B_INVALID
Definition: miscadmin.h:330
@ B_STANDALONE_BACKEND
Definition: miscadmin.h:339
@ B_BG_WRITER
Definition: miscadmin.h:336
@ B_BACKEND
Definition: miscadmin.h:334
@ B_ARCHIVER
Definition: miscadmin.h:331
@ B_AUTOVAC_LAUNCHER
Definition: miscadmin.h:332
@ B_AUTOVAC_WORKER
Definition: miscadmin.h:333
BackendType MyBackendType
Definition: miscinit.c:63
static time_t start_time
Definition: pg_ctl.c:94
void pgstat_snapshot_fixed(PgStat_Kind kind)
Definition: pgstat.c:941
PgStat_LocalState pgStatLocal
Definition: pgstat.c:196
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:549
IOObject
Definition: pgstat.h:276
@ IOOBJECT_RELATION
Definition: pgstat.h:277
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:278
@ PGSTAT_KIND_IO
Definition: pgstat.h:51
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:551
#define IOOP_NUM_TYPES
Definition: pgstat.h:305
IOContext
Definition: pgstat.h:284
@ IOCONTEXT_NORMAL
Definition: pgstat.h:287
@ IOCONTEXT_VACUUM
Definition: pgstat.h:288
@ IOCONTEXT_BULKREAD
Definition: pgstat.h:285
@ IOCONTEXT_BULKWRITE
Definition: pgstat.h:286
#define IOCONTEXT_NUM_TYPES
Definition: pgstat.h:291
IOOp
Definition: pgstat.h:294
@ IOOP_EXTEND
Definition: pgstat.h:296
@ IOOP_FSYNC
Definition: pgstat.h:297
@ IOOP_READ
Definition: pgstat.h:299
@ IOOP_WRITEBACK
Definition: pgstat.h:302
@ IOOP_HIT
Definition: pgstat.h:298
@ IOOP_EVICT
Definition: pgstat.h:295
@ IOOP_REUSE
Definition: pgstat.h:300
@ IOOP_WRITE
Definition: pgstat.h:301
int64 PgStat_Counter
Definition: pgstat.h:89
#define IOOBJECT_NUM_TYPES
Definition: pgstat.h:281
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:112
struct PgStat_PendingIO PgStat_PendingIO
bool pgstat_tracks_io_bktype(BackendType bktype)
Definition: pgstat_io.c:308
bool pgstat_flush_io(bool nowait)
Definition: pgstat_io.c:163
const char * pgstat_get_io_object_name(IOObject io_object)
Definition: pgstat_io.c:230
void pgstat_io_reset_all_cb(TimestampTz ts)
Definition: pgstat_io.c:245
bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype)
Definition: pgstat_io.c:46
instr_time pgstat_prepare_io_time(void)
Definition: pgstat_io.c:96
static PgStat_PendingIO PendingIOStats
Definition: pgstat_io.c:31
PgStat_IO * pgstat_fetch_stat_io(void)
Definition: pgstat_io.c:147
bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:411
bool have_iostats
Definition: pgstat_io.c:32
void pgstat_io_snapshot_cb(void)
Definition: pgstat_io.c:267
const char * pgstat_get_io_context_name(IOContext io_context)
Definition: pgstat_io.c:211
bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context)
Definition: pgstat_io.c:346
void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
Definition: pgstat_io.c:83
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
instr_time local_blk_read_time
Definition: instrument.h:38
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
instr_time local_blk_write_time
Definition: instrument.h:39
Definition: lwlock.h:41
LWLock locks[BACKEND_NUM_TYPES]
PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:310
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat.h:309
PgStat_BktypeIO stats[BACKEND_NUM_TYPES]
Definition: pgstat.h:316
TimestampTz stat_reset_timestamp
Definition: pgstat.h:315
PgStat_Snapshot snapshot
PgStat_ShmemControl * shmem
PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:26
instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]
Definition: pgstat_io.c:27
PgStatShared_IO io