PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
aio.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * aio.h
4 * Main AIO interface
5 *
6 * This is the header to include when actually issuing AIO. When just
7 * declaring functions involving an AIO related type, it might suffice to
8 * include aio_types.h. Initialization related functions are in the dedicated
9 * aio_init.h.
10 *
11 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
13 *
14 * src/include/storage/aio.h
15 *
16 *-------------------------------------------------------------------------
17 */
18#ifndef AIO_H
19#define AIO_H
20
21#include "storage/aio_types.h"
22#include "storage/procnumber.h"
23
24
25/* io_uring is incompatible with EXEC_BACKEND */
26#if defined(USE_LIBURING) && !defined(EXEC_BACKEND)
27#define IOMETHOD_IO_URING_ENABLED
28#endif
29
30
31/* Enum for io_method GUC. */
32typedef enum IoMethod
33{
36#ifdef IOMETHOD_IO_URING_ENABLED
37 IOMETHOD_IO_URING,
38#endif
40
41/* We'll default to worker based execution. */
42#define DEFAULT_IO_METHOD IOMETHOD_WORKER
43
44
45/*
46 * Flags for an IO that can be set with pgaio_io_set_flag().
47 */
48typedef enum PgAioHandleFlags
49{
50 /*
51 * The IO references backend local memory.
52 *
53 * This needs to be set on an IO whenever the IO references process-local
54 * memory. Some IO methods do not support executing IO that references
55 * process local memory and thus need to fall back to executing IO
56 * synchronously for IOs with this flag set.
57 *
58 * Required for correctness.
59 */
61
62 /*
63 * Hint that IO will be executed synchronously.
64 *
65 * This can make it a bit cheaper to execute synchronous IO via the AIO
66 * interface, to avoid needing an AIO and non-AIO version of code.
67 *
68 * Advantageous to set, if applicable, but not required for correctness.
69 */
71
72 /*
73 * IO is using buffered IO, used to control heuristic in some IO methods.
74 *
75 * Advantageous to set, if applicable, but not required for correctness.
76 */
79
80/*
81 * The IO operations supported by the AIO subsystem.
82 *
83 * This could be in aio_internal.h, as it is not publicly referenced, but
84 * PgAioOpData currently *does* need to be public, therefore keeping this
85 * public seems to make sense.
86 */
87typedef enum PgAioOp
88{
89 /* intentionally the zero value, to help catch zeroed memory etc */
91
94
106
107#define PGAIO_OP_COUNT (PGAIO_OP_WRITEV + 1)
108
109
110/*
111 * On what is IO being performed?
112 *
113 * PgAioTargetID specific behaviour should be implemented in
114 * aio_target.c.
115 */
116typedef enum PgAioTargetID
117{
118 /* intentionally the zero value, to help catch zeroed memory etc */
122
123#define PGAIO_TID_COUNT (PGAIO_TID_SMGR + 1)
124
125
126/*
127 * Data necessary for support IO operations (see PgAioOp).
128 *
129 * NB: Note that the FDs in here may *not* be relied upon for re-issuing
130 * requests (e.g. for partial reads/writes or in an IO worker) - the FD might
131 * be from another process, or closed since. That's not a problem for staged
132 * IOs, as all staged IOs are submitted when closing an FD.
133 */
134typedef union
135{
136 struct
137 {
138 int fd;
142
143 struct
144 {
145 int fd;
146 uint16 iov_length;
147 uint64 offset;
150
151
152/*
153 * Information the object that IO is executed on. Mostly callbacks that
154 * operate on PgAioTargetData.
155 *
156 * typedef is in aio_types.h
157 */
159{
160 /*
161 * To support executing using worker processes, the file descriptor for an
162 * IO may need to be be reopened in a different process.
163 */
164 void (*reopen) (PgAioHandle *ioh);
165
166 /* describe the target of the IO, used for log messages and views */
167 char *(*describe_identity) (const PgAioTargetData *sd);
168
169 /* name of the target, used in log messages / views */
170 const char *name;
171};
172
173
174/*
175 * IDs for callbacks that can be registered on an IO.
176 *
177 * Callbacks are identified by an ID rather than a function pointer. There are
178 * two main reasons:
179 *
180 * 1) Memory within PgAioHandle is precious, due to the number of PgAioHandle
181 * structs in pre-allocated shared memory.
182 *
183 * 2) Due to EXEC_BACKEND function pointers are not necessarily stable between
184 * different backends, therefore function pointers cannot directly be in
185 * shared memory.
186 *
187 * Without 2), we could fairly easily allow to add new callbacks, by filling a
188 * ID->pointer mapping table on demand. In the presence of 2 that's still
189 * doable, but harder, because every process has to re-register the pointers
190 * so that a local ID->"backend local pointer" mapping can be maintained.
191 */
193{
195
197
199
202
203#define PGAIO_HCB_MAX PGAIO_HCB_LOCAL_BUFFER_READV
205 "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS");
206
207
208typedef void (*PgAioHandleCallbackStage) (PgAioHandle *ioh, uint8 cb_flags);
209typedef PgAioResult (*PgAioHandleCallbackComplete) (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_flags);
210typedef void (*PgAioHandleCallbackReport) (PgAioResult result, const PgAioTargetData *target_data, int elevel);
211
212/* typedef is in aio_types.h */
214{
215 /*
216 * Prepare resources affected by the IO for execution. This could e.g.
217 * include moving ownership of buffer pins to the AIO subsystem.
218 */
220
221 /*
222 * Update the state of resources affected by the IO to reflect completion
223 * of the IO. This could e.g. include updating shared buffer state to
224 * signal the IO has finished.
225 *
226 * The _shared suffix indicates that this is executed by the backend that
227 * completed the IO, which may or may not be the backend that issued the
228 * IO. Obviously the callback thus can only modify resources in shared
229 * memory.
230 *
231 * The latest registered callback is called first. This allows
232 * higher-level code to register callbacks that can rely on callbacks
233 * registered by lower-level code to already have been executed.
234 *
235 * NB: This is called in a critical section. Errors can be signalled by
236 * the callback's return value, it's the responsibility of the IO's issuer
237 * to react appropriately.
238 */
240
241 /*
242 * Like complete_shared, except called in the issuing backend.
243 *
244 * This variant of the completion callback is useful when backend-local
245 * state has to be updated to reflect the IO's completion. E.g. a
246 * temporary buffer's BufferDesc isn't accessible in complete_shared.
247 *
248 * Local callbacks are only called after complete_shared for all
249 * registered callbacks has been called.
250 */
252
253 /*
254 * Report the result of an IO operation. This is e.g. used to raise an
255 * error after an IO failed at the appropriate time (i.e. not when the IO
256 * failed, but under control of the code that issued the IO).
257 */
259};
260
261
262
263/*
264 * How many callbacks can be registered for one IO handle. Currently we only
265 * need two, but it's not hard to imagine needing a few more.
266 */
267#define PGAIO_HANDLE_MAX_CALLBACKS 4
268
269
270
271/* --------------------------------------------------------------------------------
272 * IO Handles
273 * --------------------------------------------------------------------------------
274 */
275
276/* functions in aio.c */
277struct ResourceOwnerData;
278extern PgAioHandle *pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret);
279extern PgAioHandle *pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret);
280
281extern void pgaio_io_release(PgAioHandle *ioh);
282struct dlist_node;
283extern void pgaio_io_release_resowner(struct dlist_node *ioh_node, bool on_error);
284
286
287extern int pgaio_io_get_id(PgAioHandle *ioh);
289
290extern void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow);
291
292/* functions in aio_io.c */
293struct iovec;
294extern int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov);
295
298
299extern void pgaio_io_start_readv(PgAioHandle *ioh,
300 int fd, int iovcnt, uint64 offset);
301extern void pgaio_io_start_writev(PgAioHandle *ioh,
302 int fd, int iovcnt, uint64 offset);
303
304/* functions in aio_target.c */
305extern void pgaio_io_set_target(PgAioHandle *ioh, PgAioTargetID targetid);
306extern bool pgaio_io_has_target(PgAioHandle *ioh);
309
310/* functions in aio_callback.c */
312 uint8 cb_data);
316
317
318
319/* --------------------------------------------------------------------------------
320 * IO Wait References
321 * --------------------------------------------------------------------------------
322 */
323
324extern void pgaio_wref_clear(PgAioWaitRef *iow);
325extern bool pgaio_wref_valid(PgAioWaitRef *iow);
326extern int pgaio_wref_get_id(PgAioWaitRef *iow);
327
328extern void pgaio_wref_wait(PgAioWaitRef *iow);
329extern bool pgaio_wref_check_done(PgAioWaitRef *iow);
330
331
332
333/* --------------------------------------------------------------------------------
334 * IO Result
335 * --------------------------------------------------------------------------------
336 */
337
338extern void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data,
339 int elevel);
340
341
342
343/* --------------------------------------------------------------------------------
344 * Actions on multiple IOs.
345 * --------------------------------------------------------------------------------
346 */
347
348extern void pgaio_enter_batchmode(void);
349extern void pgaio_exit_batchmode(void);
350extern void pgaio_submit_staged(void);
351extern bool pgaio_have_staged(void);
352
353
354
355/* --------------------------------------------------------------------------------
356 * Other
357 * --------------------------------------------------------------------------------
358 */
359
360extern void pgaio_closing_fd(int fd);
361
362
363
364/* GUCs */
365extern PGDLLIMPORT int io_method;
367
368
369#endif /* AIO_H */
void(* PgAioHandleCallbackReport)(PgAioResult result, const PgAioTargetData *target_data, int elevel)
Definition: aio.h:210
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
Definition: aio_target.c:72
PGDLLIMPORT int io_max_concurrency
Definition: aio.c:78
PgAioHandleCallbackID
Definition: aio.h:193
@ PGAIO_HCB_MD_READV
Definition: aio.h:196
@ PGAIO_HCB_LOCAL_BUFFER_READV
Definition: aio.h:200
@ PGAIO_HCB_SHARED_BUFFER_READV
Definition: aio.h:198
@ PGAIO_HCB_INVALID
Definition: aio.h:194
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition: aio.c:873
int pgaio_io_get_id(PgAioHandle *ioh)
Definition: aio.c:330
IoMethod
Definition: aio.h:33
@ IOMETHOD_WORKER
Definition: aio.h:35
@ IOMETHOD_SYNC
Definition: aio.h:34
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:173
PgAioTargetID
Definition: aio.h:117
@ PGAIO_TID_SMGR
Definition: aio.h:120
@ PGAIO_TID_INVALID
Definition: aio.h:119
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition: aio.c:866
PgAioOp
Definition: aio.h:88
@ PGAIO_OP_WRITEV
Definition: aio.h:93
@ PGAIO_OP_INVALID
Definition: aio.h:90
@ PGAIO_OP_READV
Definition: aio.h:92
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
Definition: aio_callback.c:139
StaticAssertDecl(PGAIO_HCB_MAX<=(1<< PGAIO_RESULT_ID_BITS), "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS")
void pgaio_io_start_readv(PgAioHandle *ioh, int fd, int iovcnt, uint64 offset)
Definition: aio_io.c:78
void(* PgAioHandleCallbackStage)(PgAioHandle *ioh, uint8 cb_flags)
Definition: aio.h:208
PgAioOpData * pgaio_io_get_op_data(PgAioHandle *ioh)
Definition: aio_io.c:58
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
Definition: aio.c:354
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
Definition: aio_callback.c:86
void pgaio_closing_fd(int fd)
Definition: aio.c:1117
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition: aio.c:318
bool pgaio_have_staged(void)
Definition: aio.c:1004
PgAioOp pgaio_io_get_op(PgAioHandle *ioh)
Definition: aio_io.c:52
PgAioHandleFlags
Definition: aio.h:49
@ PGAIO_HF_SYNCHRONOUS
Definition: aio.h:70
@ PGAIO_HF_REFERENCES_LOCAL
Definition: aio.h:60
@ PGAIO_HF_BUFFERED
Definition: aio.h:77
PgAioResult(* PgAioHandleCallbackComplete)(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_flags)
Definition: aio.h:209
bool pgaio_io_has_target(PgAioHandle *ioh)
Definition: aio_target.c:40
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
Definition: aio_callback.c:154
void pgaio_io_start_writev(PgAioHandle *ioh, int fd, int iovcnt, uint64 offset)
Definition: aio_io.c:91
void pgaio_io_set_handle_data_64(PgAioHandle *ioh, uint64 *data, uint8 len)
Definition: aio_callback.c:122
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition: aio.c:907
PGDLLIMPORT int io_method
Definition: aio.c:77
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
Definition: aio.c:343
void pgaio_enter_batchmode(void)
Definition: aio.c:978
void pgaio_io_release_resowner(struct dlist_node *ioh_node, bool on_error)
Definition: aio.c:262
void pgaio_submit_staged(void)
Definition: aio.c:1020
char * pgaio_io_get_target_description(PgAioHandle *ioh)
Definition: aio_target.c:83
#define PGAIO_HCB_MAX
Definition: aio.h:203
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition: aio.c:893
void pgaio_io_release(PgAioHandle *ioh)
Definition: aio.c:242
int pgaio_wref_get_id(PgAioWaitRef *iow)
Definition: aio.c:882
void pgaio_io_set_target(PgAioHandle *ioh, PgAioTargetID targetid)
Definition: aio_target.c:63
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
Definition: aio_io.c:42
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
Definition: aio_callback.c:171
void pgaio_exit_batchmode(void)
Definition: aio.c:989
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:199
#define PGAIO_RESULT_ID_BITS
Definition: aio_types.h:96
struct PgAioResult PgAioResult
#define PGDLLIMPORT
Definition: c.h:1291
uint8_t uint8
Definition: c.h:500
uint64_t uint64
Definition: c.h:503
uint16_t uint16
Definition: c.h:501
uint32_t uint32
Definition: c.h:502
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
const void size_t len
const void * data
static int fd(const char *x, int i)
Definition: preproc-init.c:105
int ProcNumber
Definition: procnumber.h:24
PgAioHandleCallbackComplete complete_shared
Definition: aio.h:239
PgAioHandleCallbackStage stage
Definition: aio.h:219
PgAioHandleCallbackReport report
Definition: aio.h:258
PgAioHandleCallbackComplete complete_local
Definition: aio.h:251
void(* reopen)(PgAioHandle *ioh)
Definition: aio.h:164
const char * name
Definition: aio.h:170
char * flag(int b)
Definition: test-ctype.c:33
uint64 offset
Definition: aio.h:140
int fd
Definition: aio.h:138
uint16 iov_length
Definition: aio.h:139