PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
aio_internal.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * aio_internal.h
4 * AIO related declarations that should only be used by the AIO subsystem
5 * internally.
6 *
7 *
8 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 * Portions Copyright (c) 1994, Regents of the University of California
10 *
11 * src/include/storage/aio_internal.h
12 *
13 *-------------------------------------------------------------------------
14 */
15#ifndef AIO_INTERNAL_H
16#define AIO_INTERNAL_H
17
18
19#include "lib/ilist.h"
20#include "port/pg_iovec.h"
21#include "storage/aio.h"
23
24
25/*
26 * The maximum number of IOs that can be batch submitted at once.
27 */
28#define PGAIO_SUBMIT_BATCH_SIZE 32
29
30
31
32/*
33 * State machine for handles. With some exceptions, noted below, handles move
34 * linearly through all states.
35 *
36 * State changes should all go through pgaio_io_update_state().
37 */
38typedef enum PgAioHandleState
39{
40 /* not in use */
42
43 /*
44 * Returned by pgaio_io_acquire(). The next state is either DEFINED (if
45 * pgaio_io_start_*() is called), or IDLE (if pgaio_io_release() is
46 * called).
47 */
49
50 /*
51 * pgaio_io_start_*() has been called, but IO is not yet staged. At this
52 * point the handle has all the information for the IO to be executed.
53 */
55
56 /*
57 * stage() callbacks have been called, handle ready to be submitted for
58 * execution. Unless in batchmode (see c.f. pgaio_enter_batchmode()), the
59 * IO will be submitted immediately after.
60 */
62
63 /* IO has been submitted to the IO method for execution */
65
66 /* IO finished, but result has not yet been processed */
68
69 /*
70 * IO completed, shared completion has been called.
71 *
72 * If the IO completion occurs in the issuing backend, local callbacks
73 * will immediately be called. Otherwise the handle stays in
74 * COMPLETED_SHARED until the issuing backend waits for the completion of
75 * the IO.
76 */
78
79 /*
80 * IO completed, local completion has been called.
81 *
82 * After this the handle will be made reusable and go into IDLE state.
83 */
86
87
89
90/* typedef is in aio_types.h */
92{
93 /* all state updates should go through pgaio_io_update_state() */
95
96 /* what are we operating on */
98
99 /* which IO operation */
101
102 /* bitfield of PgAioHandleFlags */
104
106
107 /* using the proper type here would use more space */
109
110 /* data forwarded to each callback */
112
113 /*
114 * Length of data associated with handle using
115 * pgaio_io_set_handle_data_*().
116 */
118
119 /* XXX: could be optimized out with some pointer math */
121
122 /* raw result of the IO operation */
124
136
139
140 /* incremented every time the IO handle is reused */
142
143 /*
144 * To wait for the IO to complete other backends can wait on this CV. Note
145 * that, if in SUBMITTED state, a waiter first needs to check if it needs
146 * to do work via IoMethodOps->wait_one().
147 */
149
150 /* result of shared callback, passed to issuer callback */
152
153 /*
154 * Index into PgAioCtl->iovecs and PgAioCtl->handle_data.
155 *
156 * At the moment there's no need to differentiate between the two, but
157 * that won't necessarily stay that way.
158 */
160
161 /*
162 * If not NULL, this memory location will be updated with information
163 * about the IOs completion iff the issuing backend learns about the IOs
164 * completion.
165 */
167
168 /* Data necessary for the IO to be performed */
170
171 /*
172 * Data necessary to identify the object undergoing IO to higher-level
173 * code. Needs to be sufficient to allow another backend to reopen the
174 * file.
175 */
177};
178
179
180typedef struct PgAioBackend
181{
182 /* index into PgAioCtl->io_handles */
184
185 /* IO Handles that currently are not used */
187
188 /*
189 * Only one IO may be returned by pgaio_io_acquire()/pgaio_io_acquire_nb()
190 * without having been either defined (by actually associating it with IO)
191 * or released (with pgaio_io_release()). This restriction is necessary to
192 * guarantee that we always can acquire an IO. ->handed_out_io is used to
193 * enforce that rule.
194 */
196
197 /* Are we currently in batchmode? See pgaio_enter_batchmode(). */
199
200 /*
201 * IOs that are defined, but not yet submitted.
202 */
205
206 /*
207 * List of in-flight IOs. Also contains IOs that aren't strictly speaking
208 * in-flight anymore, but have been waited-for and completed by another
209 * backend. Once this backend sees such an IO it'll be reclaimed.
210 *
211 * The list is ordered by submission time, with more recently submitted
212 * IOs being appended at the end.
213 */
216
217
218typedef struct PgAioCtl
219{
222
223 /*
224 * Array of iovec structs. Each iovec is owned by a specific backend. The
225 * allocation is in PgAioCtl to allow the maximum number of iovecs for
226 * individual IOs to be configurable with PGC_POSTMASTER GUC.
227 */
229 struct iovec *iovecs;
230
231 /*
232 * For, e.g., an IO covering multiple buffers in shared / temp buffers, we
233 * need to get Buffer IDs during completion to be able to change the
234 * BufferDesc state accordingly. This space can be used to store e.g.
235 * Buffer IDs. Note that the actual iovec might be shorter than this,
236 * because we combine neighboring pages into one larger iovec entry.
237 */
239
243
244
245
246/*
247 * Callbacks used to implement an IO method.
248 */
249typedef struct IoMethodOps
250{
251 /* properties */
252
253 /*
254 * If an FD is about to be closed, do we need to wait for all in-flight
255 * IOs referencing that FD?
256 */
258
259
260 /* global initialization */
261
262 /*
263 * Amount of additional shared memory to reserve for the io_method. Called
264 * just like a normal ipci.c style *Size() function. Optional.
265 */
266 size_t (*shmem_size) (void);
267
268 /*
269 * Initialize shared memory. First time is true if AIO's shared memory was
270 * just initialized, false otherwise. Optional.
271 */
272 void (*shmem_init) (bool first_time);
273
274 /*
275 * Per-backend initialization. Optional.
276 */
277 void (*init_backend) (void);
278
279
280 /* handling of IOs */
281
282 /* optional */
284
285 /*
286 * Start executing passed in IOs.
287 *
288 * Will not be called if ->needs_synchronous_execution() returned true.
289 *
290 * num_staged_ios is <= PGAIO_SUBMIT_BATCH_SIZE.
291 *
292 * Always called in a critical section.
293 */
294 int (*submit) (uint16 num_staged_ios, PgAioHandle **staged_ios);
295
296 /*
297 * Wait for the IO to complete. Optional.
298 *
299 * If not provided, it needs to be guaranteed that the IO method calls
300 * pgaio_io_process_completion() without further interaction by the
301 * issuing backend.
302 */
303 void (*wait_one) (PgAioHandle *ioh,
304 uint64 ref_generation);
306
307
308/* aio.c */
309extern bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state);
310extern void pgaio_io_stage(PgAioHandle *ioh, PgAioOp op);
311extern void pgaio_io_process_completion(PgAioHandle *ioh, int result);
312extern void pgaio_io_prepare_submit(PgAioHandle *ioh);
314extern const char *pgaio_io_get_state_name(PgAioHandle *ioh);
316extern void pgaio_shutdown(int code, Datum arg);
317
318/* aio_callback.c */
319extern void pgaio_io_call_stage(PgAioHandle *ioh);
322
323/* aio_io.c */
325extern const char *pgaio_io_get_op_name(PgAioHandle *ioh);
326extern bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd);
327
328/* aio_target.c */
329extern bool pgaio_io_can_reopen(PgAioHandle *ioh);
330extern void pgaio_io_reopen(PgAioHandle *ioh);
331extern const char *pgaio_io_get_target_name(PgAioHandle *ioh);
332
333
334/*
335 * The AIO subsystem has fairly verbose debug logging support. This can be
336 * enabled/disabled at build time. The reason for this is that
337 * a) the verbosity can make debugging things on higher levels hard
338 * b) even if logging can be skipped due to elevel checks, it still causes a
339 * measurable slowdown
340 *
341 * XXX: This likely should be eventually be disabled by default, at least in
342 * non-assert builds.
343 */
344#define PGAIO_VERBOSE 1
345
346/*
347 * Simple ereport() wrapper that only logs if PGAIO_VERBOSE is defined.
348 *
349 * This intentionally still compiles the code, guarded by a constant if (0),
350 * if verbose logging is disabled, to make it less likely that debug logging
351 * is silently broken.
352 *
353 * The current definition requires passing at least one argument.
354 */
355#define pgaio_debug(elevel, msg, ...) \
356 do { \
357 if (PGAIO_VERBOSE) \
358 ereport(elevel, \
359 errhidestmt(true), errhidecontext(true), \
360 errmsg_internal(msg, \
361 __VA_ARGS__)); \
362 } while(0)
363
364/*
365 * Simple ereport() wrapper. Note that the definition requires passing at
366 * least one argument.
367 */
368#define pgaio_debug_io(elevel, ioh, msg, ...) \
369 pgaio_debug(elevel, "io %-10d|op %-5s|target %-4s|state %-16s: " msg, \
370 pgaio_io_get_id(ioh), \
371 pgaio_io_get_op_name(ioh), \
372 pgaio_io_get_target_name(ioh), \
373 pgaio_io_get_state_name(ioh), \
374 __VA_ARGS__)
375
376
377#ifdef USE_INJECTION_POINTS
378
379extern void pgaio_io_call_inj(PgAioHandle *ioh, const char *injection_point);
380
381/* just for use in tests, from within injection points */
382extern PgAioHandle *pgaio_inj_io_get(void);
383
384#else
385
386#define pgaio_io_call_inj(ioh, injection_point) (void) 0
387
388/*
389 * no fallback for pgaio_inj_io_get, all code using injection points better be
390 * guarded by USE_INJECTION_POINTS.
391 */
392
393#endif
394
395
396/* Declarations for the tables of function pointers exposed by each IO method. */
399#ifdef IOMETHOD_IO_URING_ENABLED
400extern PGDLLIMPORT const IoMethodOps pgaio_uring_ops;
401#endif
402
406
407
408
409#endif /* AIO_INTERNAL_H */
#define PGAIO_HANDLE_MAX_CALLBACKS
Definition: aio.h:256
PgAioTargetID
Definition: aio.h:117
PgAioOp
Definition: aio.h:88
void pgaio_io_process_completion(PgAioHandle *ioh, int result)
Definition: aio.c:498
void pgaio_io_perform_synchronously(PgAioHandle *ioh)
Definition: aio_io.c:116
struct IoMethodOps IoMethodOps
const char * pgaio_result_status_string(PgAioResultStatus rs)
Definition: aio.c:834
void pgaio_io_call_stage(PgAioHandle *ioh)
Definition: aio_callback.c:188
PGDLLIMPORT const IoMethodOps pgaio_worker_ops
Definition: method_worker.c:82
PgAioHandleState
Definition: aio_internal.h:39
@ PGAIO_HS_STAGED
Definition: aio_internal.h:61
@ PGAIO_HS_COMPLETED_SHARED
Definition: aio_internal.h:77
@ PGAIO_HS_DEFINED
Definition: aio_internal.h:54
@ PGAIO_HS_SUBMITTED
Definition: aio_internal.h:64
@ PGAIO_HS_IDLE
Definition: aio_internal.h:41
@ PGAIO_HS_HANDED_OUT
Definition: aio_internal.h:48
@ PGAIO_HS_COMPLETED_IO
Definition: aio_internal.h:67
@ PGAIO_HS_COMPLETED_LOCAL
Definition: aio_internal.h:84
bool pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
Definition: aio.c:453
const char * pgaio_io_get_op_name(PgAioHandle *ioh)
Definition: aio_io.c:175
PgAioResult pgaio_io_call_complete_local(PgAioHandle *ioh)
Definition: aio_callback.c:271
#define pgaio_io_call_inj(ioh, injection_point)
Definition: aio_internal.h:386
void pgaio_io_reopen(PgAioHandle *ioh)
Definition: aio_target.c:108
struct PgAioCtl PgAioCtl
bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
Definition: aio_io.c:197
bool pgaio_io_can_reopen(PgAioHandle *ioh)
Definition: aio_target.c:97
void pgaio_io_call_complete_shared(PgAioHandle *ioh)
Definition: aio_callback.c:214
void pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
Definition: aio.c:403
PGDLLIMPORT PgAioBackend * pgaio_my_backend
Definition: aio.c:82
struct PgAioBackend PgAioBackend
PGDLLIMPORT PgAioCtl * pgaio_ctl
Definition: aio.c:79
PGDLLIMPORT const IoMethodOps pgaio_sync_ops
Definition: method_sync.c:28
PGDLLIMPORT const IoMethodOps * pgaio_method_ops
Definition: aio.c:94
const char * pgaio_io_get_target_name(PgAioHandle *ioh)
Definition: aio_target.c:48
const char * pgaio_io_get_state_name(PgAioHandle *ioh)
Definition: aio.c:828
bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
Definition: aio.c:529
void pgaio_io_prepare_submit(PgAioHandle *ioh)
Definition: aio.c:480
void pgaio_shutdown(int code, Datum arg)
Definition: aio.c:1168
#define PGAIO_SUBMIT_BATCH_SIZE
Definition: aio_internal.h:28
PgAioResultStatus
Definition: aio_types.h:75
#define PGDLLIMPORT
Definition: c.h:1291
uint8_t uint8
Definition: c.h:500
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint16_t uint16
Definition: c.h:501
uint32_t uint32
Definition: c.h:502
void * arg
uintptr_t Datum
Definition: postgres.h:69
static int fd(const char *x, int i)
Definition: preproc-init.c:105
size_t(* shmem_size)(void)
Definition: aio_internal.h:266
bool wait_on_fd_before_close
Definition: aio_internal.h:257
void(* shmem_init)(bool first_time)
Definition: aio_internal.h:272
void(* init_backend)(void)
Definition: aio_internal.h:277
int(* submit)(uint16 num_staged_ios, PgAioHandle **staged_ios)
Definition: aio_internal.h:294
void(* wait_one)(PgAioHandle *ioh, uint64 ref_generation)
Definition: aio_internal.h:303
bool(* needs_synchronous_execution)(PgAioHandle *ioh)
Definition: aio_internal.h:283
uint32 io_handle_off
Definition: aio_internal.h:183
dclist_head in_flight_ios
Definition: aio_internal.h:214
uint16 num_staged_ios
Definition: aio_internal.h:203
dclist_head idle_ios
Definition: aio_internal.h:186
PgAioHandle * staged_ios[PGAIO_SUBMIT_BATCH_SIZE]
Definition: aio_internal.h:204
PgAioHandle * handed_out_io
Definition: aio_internal.h:195
uint32 iovec_count
Definition: aio_internal.h:228
struct iovec * iovecs
Definition: aio_internal.h:229
PgAioHandle * io_handles
Definition: aio_internal.h:241
uint32 io_handle_count
Definition: aio_internal.h:240
int backend_state_count
Definition: aio_internal.h:220
uint64 * handle_data
Definition: aio_internal.h:238
PgAioBackend * backend_state
Definition: aio_internal.h:221
PgAioTargetData target_data
Definition: aio_internal.h:176
struct ResourceOwnerData * resowner
Definition: aio_internal.h:137
int32 owner_procno
Definition: aio_internal.h:120
PgAioResult distilled_result
Definition: aio_internal.h:151
uint8 callbacks[PGAIO_HANDLE_MAX_CALLBACKS]
Definition: aio_internal.h:108
dlist_node node
Definition: aio_internal.h:135
uint8 handle_data_len
Definition: aio_internal.h:117
PgAioOp op
Definition: aio_internal.h:100
PgAioReturn * report_return
Definition: aio_internal.h:166
PgAioOpData op_data
Definition: aio_internal.h:169
uint32 iovec_off
Definition: aio_internal.h:159
uint64 generation
Definition: aio_internal.h:141
uint8 callbacks_data[PGAIO_HANDLE_MAX_CALLBACKS]
Definition: aio_internal.h:111
uint8 num_callbacks
Definition: aio_internal.h:105
PgAioHandleState state
Definition: aio_internal.h:94
dlist_node resowner_node
Definition: aio_internal.h:138
PgAioTargetID target
Definition: aio_internal.h:97
ConditionVariable cv
Definition: aio_internal.h:148
Definition: regguts.h:323