PostgreSQL Source Code git master
Loading...
Searching...
No Matches
aio.c File Reference
#include "postgres.h"
#include "lib/ilist.h"
#include "miscadmin.h"
#include "port/atomics.h"
#include "storage/aio.h"
#include "storage/aio_internal.h"
#include "storage/aio_subsys.h"
#include "utils/guc.h"
#include "utils/guc_hooks.h"
#include "utils/injection_point.h"
#include "utils/resowner.h"
#include "utils/wait_event_types.h"
Include dependency graph for aio.c:

Go to the source code of this file.

Macros

#define PGAIO_HS_TOSTR_CASE(sym)   case PGAIO_HS_##sym: return #sym
 

Functions

static void pgaio_io_update_state (PgAioHandle *ioh, PgAioHandleState new_state)
 
static void pgaio_io_reclaim (PgAioHandle *ioh)
 
static void pgaio_io_resowner_register (PgAioHandle *ioh, struct ResourceOwnerData *resowner)
 
static void pgaio_io_wait_for_free (void)
 
static PgAioHandlepgaio_io_from_wref (PgAioWaitRef *iow, uint64 *ref_generation)
 
static const charpgaio_io_state_get_name (PgAioHandleState s)
 
static void pgaio_io_wait (PgAioHandle *ioh, uint64 ref_generation)
 
 StaticAssertDecl (lengthof(io_method_options)==lengthof(pgaio_method_ops_table)+1, "io_method_options out of sync with pgaio_method_ops_table")
 
PgAioHandlepgaio_io_acquire (struct ResourceOwnerData *resowner, PgAioReturn *ret)
 
PgAioHandlepgaio_io_acquire_nb (struct ResourceOwnerData *resowner, PgAioReturn *ret)
 
void pgaio_io_release (PgAioHandle *ioh)
 
void pgaio_io_release_resowner (dlist_node *ioh_node, bool on_error)
 
void pgaio_io_set_flag (PgAioHandle *ioh, PgAioHandleFlags flag)
 
int pgaio_io_get_id (PgAioHandle *ioh)
 
ProcNumber pgaio_io_get_owner (PgAioHandle *ioh)
 
void pgaio_io_get_wref (PgAioHandle *ioh, PgAioWaitRef *iow)
 
void pgaio_io_stage (PgAioHandle *ioh, PgAioOp op)
 
bool pgaio_io_needs_synchronous_execution (PgAioHandle *ioh)
 
void pgaio_io_prepare_submit (PgAioHandle *ioh)
 
void pgaio_io_process_completion (PgAioHandle *ioh, int result)
 
bool pgaio_io_was_recycled (PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
 
const charpgaio_io_get_state_name (PgAioHandle *ioh)
 
const charpgaio_result_status_string (PgAioResultStatus rs)
 
void pgaio_wref_clear (PgAioWaitRef *iow)
 
bool pgaio_wref_valid (PgAioWaitRef *iow)
 
int pgaio_wref_get_id (PgAioWaitRef *iow)
 
void pgaio_wref_wait (PgAioWaitRef *iow)
 
bool pgaio_wref_check_done (PgAioWaitRef *iow)
 
void pgaio_enter_batchmode (void)
 
void pgaio_exit_batchmode (void)
 
bool pgaio_have_staged (void)
 
void pgaio_submit_staged (void)
 
void pgaio_error_cleanup (void)
 
void AtEOXact_Aio (bool is_commit)
 
void pgaio_closing_fd (int fd)
 
void pgaio_shutdown (int code, Datum arg)
 
void assign_io_method (int newval, void *extra)
 
bool check_io_max_concurrency (int *newval, void **extra, GucSource source)
 

Variables

const struct config_enum_entry io_method_options []
 
int io_method = DEFAULT_IO_METHOD
 
int io_max_concurrency = -1
 
PgAioCtlpgaio_ctl
 
PgAioBackendpgaio_my_backend
 
static const IoMethodOps *const pgaio_method_ops_table []
 
const IoMethodOpspgaio_method_ops
 

Macro Definition Documentation

◆ PGAIO_HS_TOSTR_CASE

#define PGAIO_HS_TOSTR_CASE (   sym)    case PGAIO_HS_##sym: return #sym

Function Documentation

◆ assign_io_method()

void assign_io_method ( int  newval,
void extra 
)

Definition at line 1322 of file aio.c.

1323{
1326
1328}
const IoMethodOps * pgaio_method_ops
Definition aio.c:96
static const IoMethodOps *const pgaio_method_ops_table[]
Definition aio.c:84
#define Assert(condition)
Definition c.h:873
#define lengthof(array)
Definition c.h:803
#define newval
static int fb(int x)

References Assert, fb(), lengthof, newval, pgaio_method_ops, and pgaio_method_ops_table.

◆ AtEOXact_Aio()

void AtEOXact_Aio ( bool  is_commit)

Definition at line 1193 of file aio.c.

1194{
1195 /*
1196 * We should never be in batch mode at transactional boundaries. In case
1197 * an error was thrown while in batch mode, pgaio_error_cleanup() should
1198 * have exited batchmode.
1199 *
1200 * In case we are in batchmode somehow, make sure to submit all staged
1201 * IOs, other backends may need them to complete to continue.
1202 */
1204 {
1206 elog(WARNING, "open AIO batch at end of (sub-)transaction");
1207 }
1208
1209 /*
1210 * As we aren't in batchmode, there shouldn't be any unsubmitted IOs.
1211 */
1213}
PgAioBackend * pgaio_my_backend
Definition aio.c:81
void pgaio_error_cleanup(void)
Definition aio.c:1165
#define WARNING
Definition elog.h:36
#define elog(elevel,...)
Definition elog.h:226
uint16 num_staged_ios

References Assert, elog, PgAioBackend::in_batchmode, PgAioBackend::num_staged_ios, pgaio_error_cleanup(), pgaio_my_backend, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), CommitTransaction(), pgaio_shutdown(), and PrepareTransaction().

◆ check_io_max_concurrency()

bool check_io_max_concurrency ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 1331 of file aio.c.

1332{
1333 if (*newval == -1)
1334 {
1335 /*
1336 * Auto-tuning will be applied later during startup, as auto-tuning
1337 * depends on the value of various GUCs.
1338 */
1339 return true;
1340 }
1341 else if (*newval == 0)
1342 {
1343 GUC_check_errdetail("Only -1 or values bigger than 0 are valid.");
1344 return false;
1345 }
1346
1347 return true;
1348}
#define GUC_check_errdetail
Definition guc.h:505

References GUC_check_errdetail, and newval.

◆ pgaio_closing_fd()

void pgaio_closing_fd ( int  fd)

Definition at line 1220 of file aio.c.

1221{
1222 /*
1223 * Might be called before AIO is initialized or in a subprocess that
1224 * doesn't use AIO.
1225 */
1226 if (!pgaio_my_backend)
1227 return;
1228
1229 /*
1230 * For now just submit all staged IOs - we could be more selective, but
1231 * it's probably not worth it.
1232 */
1234 {
1236 "submitting %d IOs before FD %d gets closed",
1239 }
1240
1241 /*
1242 * If requested by the IO method, wait for all IOs that use the
1243 * to-be-closed FD.
1244 */
1246 {
1247 /*
1248 * As waiting for one IO to complete may complete multiple IOs, we
1249 * can't just use a mutable list iterator. The maximum number of
1250 * in-flight IOs is fairly small, so just restart the loop after
1251 * waiting for an IO.
1252 */
1254 {
1255 dlist_iter iter;
1256 PgAioHandle *ioh = NULL;
1257 uint64 generation;
1258
1260 {
1261 ioh = dclist_container(PgAioHandle, node, iter.cur);
1262
1263 generation = ioh->generation;
1264
1265 if (pgaio_io_uses_fd(ioh, fd))
1266 break;
1267 else
1268 ioh = NULL;
1269 }
1270
1271 if (!ioh)
1272 break;
1273
1275 "waiting for IO before FD %d gets closed, %u in-flight IOs",
1277
1278 /* see comment in pgaio_io_wait_for_free() about raciness */
1279 pgaio_io_wait(ioh, generation);
1280 }
1281 }
1282}
void pgaio_submit_staged(void)
Definition aio.c:1123
static void pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
Definition aio.c:579
#define pgaio_debug(elevel, msg,...)
#define pgaio_debug_io(elevel, ioh, msg,...)
bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
Definition aio_io.c:197
uint64_t uint64
Definition c.h:547
#define DEBUG2
Definition elog.h:29
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
static bool dclist_is_empty(const dclist_head *head)
Definition ilist.h:682
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
static int fd(const char *x, int i)
bool wait_on_fd_before_close
dclist_head in_flight_ios
dlist_node * cur
Definition ilist.h:179

References dlist_iter::cur, dclist_container, dclist_count(), dclist_foreach, dclist_is_empty(), DEBUG2, fb(), fd(), PgAioBackend::in_flight_ios, PgAioBackend::num_staged_ios, pgaio_debug, pgaio_debug_io, pgaio_io_uses_fd(), pgaio_io_wait(), pgaio_method_ops, pgaio_my_backend, pgaio_submit_staged(), and IoMethodOps::wait_on_fd_before_close.

Referenced by CloseTransientFile(), FileClose(), FreeDesc(), and LruDelete().

◆ pgaio_enter_batchmode()

void pgaio_enter_batchmode ( void  )

Definition at line 1081 of file aio.c.

1082{
1084 elog(ERROR, "starting batch while batch already in progress");
1086}
#define ERROR
Definition elog.h:39

References elog, ERROR, PgAioBackend::in_batchmode, and pgaio_my_backend.

Referenced by batch_start(), read_rel_block_ll(), and read_stream_look_ahead().

◆ pgaio_error_cleanup()

void pgaio_error_cleanup ( void  )

Definition at line 1165 of file aio.c.

1166{
1167 /*
1168 * It is possible that code errored out after pgaio_enter_batchmode() but
1169 * before pgaio_exit_batchmode() was called. In that case we need to
1170 * submit the IO now.
1171 */
1173 {
1175
1177 }
1178
1179 /*
1180 * As we aren't in batchmode, there shouldn't be any unsubmitted IOs.
1181 */
1183}

References Assert, PgAioBackend::in_batchmode, PgAioBackend::num_staged_ios, pgaio_my_backend, and pgaio_submit_staged().

Referenced by AbortSubTransaction(), AbortTransaction(), AtEOXact_Aio(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), pgarch_archiveXlog(), WalSndErrorCleanup(), WalSummarizerMain(), and WalWriterMain().

◆ pgaio_exit_batchmode()

void pgaio_exit_batchmode ( void  )

◆ pgaio_have_staged()

bool pgaio_have_staged ( void  )

◆ pgaio_io_acquire()

PgAioHandle * pgaio_io_acquire ( struct ResourceOwnerData resowner,
PgAioReturn ret 
)

Definition at line 162 of file aio.c.

163{
164 PgAioHandle *h;
165
166 while (true)
167 {
168 h = pgaio_io_acquire_nb(resowner, ret);
169
170 if (h != NULL)
171 return h;
172
173 /*
174 * Evidently all handles by this backend are in use. Just wait for
175 * some to complete.
176 */
178 }
179}
static void pgaio_io_wait_for_free(void)
Definition aio.c:761
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:188

References fb(), pgaio_io_acquire_nb(), and pgaio_io_wait_for_free().

Referenced by AsyncReadBuffers(), handle_get(), handle_get_and_error(), handle_get_release(), handle_get_twice(), and read_rel_block_ll().

◆ pgaio_io_acquire_nb()

PgAioHandle * pgaio_io_acquire_nb ( struct ResourceOwnerData resowner,
PgAioReturn ret 
)

Definition at line 188 of file aio.c.

189{
191
193 {
196 }
197
199 elog(ERROR, "API violation: Only one IO can be handed out");
200
201 /*
202 * Probably not needed today, as interrupts should not process this IO,
203 * but...
204 */
206
208 {
210
212
213 Assert(ioh->state == PGAIO_HS_IDLE);
214 Assert(ioh->owner_procno == MyProcNumber);
215
218
219 if (resowner)
221
222 if (ret)
223 {
224 ioh->report_return = ret;
226 }
227 }
228
230
231 return ioh;
232}
static void pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
Definition aio.c:386
static void pgaio_io_resowner_register(PgAioHandle *ioh, struct ResourceOwnerData *resowner)
Definition aio.c:409
@ PGAIO_HS_IDLE
@ PGAIO_HS_HANDED_OUT
#define PGAIO_SUBMIT_BATCH_SIZE
@ PGAIO_RS_UNKNOWN
Definition aio_types.h:80
ProcNumber MyProcNumber
Definition globals.c:90
static dlist_node * dclist_pop_head_node(dclist_head *head)
Definition ilist.h:789
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
dclist_head idle_ios
PgAioHandle * handed_out_io
uint32 status
Definition aio_types.h:108
PgAioResult result
Definition aio_types.h:132

References Assert, dclist_container, dclist_is_empty(), dclist_pop_head_node(), elog, ERROR, fb(), PgAioBackend::handed_out_io, HOLD_INTERRUPTS, PgAioBackend::idle_ios, MyProcNumber, PgAioBackend::num_staged_ios, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, pgaio_io_resowner_register(), pgaio_io_update_state(), pgaio_my_backend, PGAIO_RS_UNKNOWN, PGAIO_SUBMIT_BATCH_SIZE, pgaio_submit_staged(), PgAioReturn::result, RESUME_INTERRUPTS, and PgAioResult::status.

Referenced by AsyncReadBuffers(), and pgaio_io_acquire().

◆ pgaio_io_from_wref()

static PgAioHandle * pgaio_io_from_wref ( PgAioWaitRef iow,
uint64 ref_generation 
)
static

Definition at line 891 of file aio.c.

892{
894
895 Assert(iow->aio_index < pgaio_ctl->io_handle_count);
896
897 ioh = &pgaio_ctl->io_handles[iow->aio_index];
898
899 *ref_generation = ((uint64) iow->generation_upper) << 32 |
900 iow->generation_lower;
901
902 Assert(*ref_generation != 0);
903
904 return ioh;
905}
PgAioCtl * pgaio_ctl
Definition aio.c:78
PgAioHandle * io_handles
uint32 io_handle_count

References Assert, fb(), PgAioCtl::io_handle_count, PgAioCtl::io_handles, and pgaio_ctl.

Referenced by pgaio_wref_check_done(), and pgaio_wref_wait().

◆ pgaio_io_get_id()

◆ pgaio_io_get_owner()

ProcNumber pgaio_io_get_owner ( PgAioHandle ioh)

Definition at line 355 of file aio.c.

356{
357 return ioh->owner_procno;
358}

References fb(), and PgAioHandle::owner_procno.

Referenced by buffer_readv_complete(), and smgr_aio_reopen().

◆ pgaio_io_get_state_name()

const char * pgaio_io_get_state_name ( PgAioHandle ioh)

Definition at line 928 of file aio.c.

929{
930 return pgaio_io_state_get_name(ioh->state);
931}
static const char * pgaio_io_state_get_name(PgAioHandleState s)
Definition aio.c:908

References fb(), and pgaio_io_state_get_name().

Referenced by pg_get_aios(), and pgaio_io_wait().

◆ pgaio_io_get_wref()

void pgaio_io_get_wref ( PgAioHandle ioh,
PgAioWaitRef iow 
)

Definition at line 366 of file aio.c.

367{
368 Assert(ioh->state == PGAIO_HS_HANDED_OUT ||
369 ioh->state == PGAIO_HS_DEFINED ||
370 ioh->state == PGAIO_HS_STAGED);
371 Assert(ioh->generation != 0);
372
373 iow->aio_index = ioh - pgaio_ctl->io_handles;
374 iow->generation_upper = (uint32) (ioh->generation >> 32);
375 iow->generation_lower = (uint32) ioh->generation;
376}
@ PGAIO_HS_STAGED
@ PGAIO_HS_DEFINED
uint32_t uint32
Definition c.h:546
uint64 generation

References Assert, fb(), PgAioHandle::generation, PgAioCtl::io_handles, pgaio_ctl, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, and PGAIO_HS_STAGED.

Referenced by AsyncReadBuffers(), buffer_stage_common(), and read_rel_block_ll().

◆ pgaio_io_needs_synchronous_execution()

bool pgaio_io_needs_synchronous_execution ( PgAioHandle ioh)

Definition at line 483 of file aio.c.

484{
485 /*
486 * If the caller said to execute the IO synchronously, do so.
487 *
488 * XXX: We could optimize the logic when to execute synchronously by first
489 * checking if there are other IOs in flight and only synchronously
490 * executing if not. Unclear whether that'll be sufficiently common to be
491 * worth worrying about.
492 */
493 if (ioh->flags & PGAIO_HF_SYNCHRONOUS)
494 return true;
495
496 /* Check if the IO method requires synchronous execution of IO */
499
500 return false;
501}
@ PGAIO_HF_SYNCHRONOUS
Definition aio.h:70
bool(* needs_synchronous_execution)(PgAioHandle *ioh)

References fb(), IoMethodOps::needs_synchronous_execution, PGAIO_HF_SYNCHRONOUS, and pgaio_method_ops.

Referenced by pgaio_io_stage().

◆ pgaio_io_prepare_submit()

void pgaio_io_prepare_submit ( PgAioHandle ioh)

Definition at line 510 of file aio.c.

511{
513
515}
@ PGAIO_HS_SUBMITTED
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition ilist.h:709

References dclist_push_tail(), fb(), PgAioBackend::in_flight_ios, PGAIO_HS_SUBMITTED, pgaio_io_update_state(), and pgaio_my_backend.

Referenced by pgaio_io_stage(), and pgaio_worker_submit().

◆ pgaio_io_process_completion()

void pgaio_io_process_completion ( PgAioHandle ioh,
int  result 
)

Definition at line 528 of file aio.c.

529{
530 Assert(ioh->state == PGAIO_HS_SUBMITTED);
531
533
534 ioh->result = result;
535
537
538 INJECTION_POINT("aio-process-completion-before-shared", ioh);
539
541
543
544 /* condition variable broadcast ensures state is visible before wakeup */
546
547 /* contains call to pgaio_io_call_complete_local() */
548 if (ioh->owner_procno == MyProcNumber)
550}
static void pgaio_io_reclaim(PgAioHandle *ioh)
Definition aio.c:675
void pgaio_io_call_complete_shared(PgAioHandle *ioh)
@ PGAIO_HS_COMPLETED_SHARED
@ PGAIO_HS_COMPLETED_IO
void ConditionVariableBroadcast(ConditionVariable *cv)
volatile uint32 CritSectionCount
Definition globals.c:45
#define INJECTION_POINT(name, arg)

References Assert, ConditionVariableBroadcast(), CritSectionCount, fb(), INJECTION_POINT, MyProcNumber, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_SUBMITTED, pgaio_io_call_complete_shared(), pgaio_io_reclaim(), and pgaio_io_update_state().

Referenced by IoWorkerMain(), and pgaio_io_perform_synchronously().

◆ pgaio_io_reclaim()

static void pgaio_io_reclaim ( PgAioHandle ioh)
static

Definition at line 675 of file aio.c.

676{
677 /* This is only ok if it's our IO */
678 Assert(ioh->owner_procno == MyProcNumber);
679 Assert(ioh->state != PGAIO_HS_IDLE);
680
681 /* see comment in function header */
683
684 /*
685 * It's a bit ugly, but right now the easiest place to put the execution
686 * of local completion callbacks is this function, as we need to execute
687 * local callbacks just before reclaiming at multiple callsites.
688 */
689 if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
690 {
692
695
696 if (ioh->report_return)
697 {
698 ioh->report_return->result = local_result;
699 ioh->report_return->target_data = ioh->target_data;
700 }
701 }
702
704 "reclaiming: distilled_result: (status %s, id %u, error_data %d), raw_result: %d",
705 pgaio_result_status_string(ioh->distilled_result.status),
706 ioh->distilled_result.id,
707 ioh->distilled_result.error_data,
708 ioh->result);
709
710 /* if the IO has been defined, it's on the in-flight list, remove */
711 if (ioh->state != PGAIO_HS_HANDED_OUT)
713
714 if (ioh->resowner)
715 {
716 ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
717 ioh->resowner = NULL;
718 }
719
720 Assert(!ioh->resowner);
721
722 /*
723 * Update generation & state first, before resetting the IO's fields,
724 * otherwise a concurrent "viewer" could think the fields are valid, even
725 * though they are being reset. Increment the generation first, so that
726 * we can assert elsewhere that we never wait for an IDLE IO. While it's
727 * a bit weird for the state to go backwards for a generation, it's OK
728 * here, as there cannot be references to the "reborn" IO yet. Can't
729 * update both at once, so something has to give.
730 */
731 ioh->generation++;
733
734 /* ensure the state update is visible before we reset fields */
736
737 ioh->op = PGAIO_OP_INVALID;
738 ioh->target = PGAIO_TID_INVALID;
739 ioh->flags = 0;
740 ioh->num_callbacks = 0;
741 ioh->handle_data_len = 0;
742 ioh->report_return = NULL;
743 ioh->result = 0;
744 ioh->distilled_result.status = PGAIO_RS_UNKNOWN;
745
746 /*
747 * We push the IO to the head of the idle IO list, that seems more cache
748 * efficient in cases where only a few IOs are used.
749 */
751
753}
const char * pgaio_result_status_string(PgAioResultStatus rs)
Definition aio.c:934
@ PGAIO_TID_INVALID
Definition aio.h:119
@ PGAIO_OP_INVALID
Definition aio.h:90
PgAioResult pgaio_io_call_complete_local(PgAioHandle *ioh)
@ PGAIO_HS_COMPLETED_LOCAL
#define pg_write_barrier()
Definition atomics.h:155
#define DEBUG4
Definition elog.h:27
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition ilist.h:693
void ResourceOwnerForgetAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
Definition resowner.c:1107

References Assert, dclist_delete_from(), dclist_push_head(), DEBUG4, fb(), HOLD_INTERRUPTS, PgAioBackend::idle_ios, PgAioBackend::in_flight_ios, MyProcNumber, pg_write_barrier, pgaio_debug_io, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, pgaio_io_call_complete_local(), pgaio_io_update_state(), pgaio_my_backend, PGAIO_OP_INVALID, pgaio_result_status_string(), PGAIO_RS_UNKNOWN, PGAIO_TID_INVALID, ResourceOwnerForgetAioHandle(), and RESUME_INTERRUPTS.

Referenced by pgaio_io_process_completion(), pgaio_io_release(), pgaio_io_release_resowner(), pgaio_io_wait(), pgaio_io_wait_for_free(), and pgaio_wref_check_done().

◆ pgaio_io_release()

void pgaio_io_release ( PgAioHandle ioh)

Definition at line 240 of file aio.c.

241{
243 {
244 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
245 Assert(ioh->resowner);
246
248
249 /*
250 * Note that no interrupts are processed between the handed_out_io
251 * check and the call to reclaim - that's important as otherwise an
252 * interrupt could have already reclaimed the handle.
253 */
255 }
256 else
257 {
258 elog(ERROR, "release in unexpected state");
259 }
260}

References Assert, elog, ERROR, fb(), PgAioBackend::handed_out_io, PGAIO_HS_HANDED_OUT, pgaio_io_reclaim(), and pgaio_my_backend.

Referenced by AsyncReadBuffers(), handle_get_release(), and handle_release_last().

◆ pgaio_io_release_resowner()

void pgaio_io_release_resowner ( dlist_node ioh_node,
bool  on_error 
)

Definition at line 266 of file aio.c.

267{
269
270 Assert(ioh->resowner);
271
272 /*
273 * Otherwise an interrupt, in the middle of releasing the IO, could end up
274 * trying to wait for the IO, leading to state confusion.
275 */
277
278 ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
279 ioh->resowner = NULL;
280
281 switch ((PgAioHandleState) ioh->state)
282 {
283 case PGAIO_HS_IDLE:
284 elog(ERROR, "unexpected");
285 break;
288
290 {
292 if (!on_error)
293 elog(WARNING, "leaked AIO handle");
294 }
295
297 break;
298 case PGAIO_HS_DEFINED:
299 case PGAIO_HS_STAGED:
300 if (!on_error)
301 elog(WARNING, "AIO handle was not submitted");
303 break;
308 /* this is expected to happen */
309 break;
310 }
311
312 /*
313 * Need to unregister the reporting of the IO's result, the memory it's
314 * referencing likely has gone away.
315 */
316 if (ioh->report_return)
317 ioh->report_return = NULL;
318
320}
PgAioHandleState
#define dlist_container(type, membername, ptr)
Definition ilist.h:593

References Assert, dlist_container, elog, ERROR, fb(), PgAioBackend::handed_out_io, HOLD_INTERRUPTS, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, PGAIO_HS_STAGED, PGAIO_HS_SUBMITTED, pgaio_io_reclaim(), pgaio_my_backend, pgaio_submit_staged(), ResourceOwnerForgetAioHandle(), RESUME_INTERRUPTS, and WARNING.

Referenced by ResourceOwnerReleaseInternal().

◆ pgaio_io_resowner_register()

static void pgaio_io_resowner_register ( PgAioHandle ioh,
struct ResourceOwnerData resowner 
)
static

Definition at line 409 of file aio.c.

410{
411 Assert(!ioh->resowner);
412 Assert(resowner);
413
414 ResourceOwnerRememberAioHandle(resowner, &ioh->resowner_node);
415 ioh->resowner = resowner;
416}
void ResourceOwnerRememberAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
Definition resowner.c:1101

References Assert, fb(), and ResourceOwnerRememberAioHandle().

Referenced by pgaio_io_acquire_nb().

◆ pgaio_io_set_flag()

void pgaio_io_set_flag ( PgAioHandle ioh,
PgAioHandleFlags  flag 
)

Definition at line 330 of file aio.c.

331{
332 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
333
334 ioh->flags |= flag;
335}
char * flag(int b)
Definition test-ctype.c:33

References Assert, fb(), flag(), and PGAIO_HS_HANDED_OUT.

Referenced by AsyncReadBuffers(), mdstartreadv(), and read_rel_block_ll().

◆ pgaio_io_stage()

void pgaio_io_stage ( PgAioHandle ioh,
PgAioOp  op 
)

Definition at line 424 of file aio.c.

425{
427
428 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
431
432 /*
433 * Otherwise an interrupt, in the middle of staging and possibly executing
434 * the IO, could end up trying to wait for the IO, leading to state
435 * confusion.
436 */
438
439 ioh->op = op;
440 ioh->result = 0;
441
443
444 /* allow a new IO to be staged */
446
448
450
451 /*
452 * Synchronous execution has to be executed, well, synchronously, so check
453 * that first.
454 */
456
458 "staged (synchronous: %d, in_batch: %d)",
460
462 {
465
466 /*
467 * Unless code explicitly opted into batching IOs, submit the IO
468 * immediately.
469 */
472 }
473 else
474 {
477 }
478
480}
bool pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
Definition aio.c:483
void pgaio_io_prepare_submit(PgAioHandle *ioh)
Definition aio.c:510
void pgaio_io_call_stage(PgAioHandle *ioh)
void pgaio_io_perform_synchronously(PgAioHandle *ioh)
Definition aio_io.c:116
bool pgaio_io_has_target(PgAioHandle *ioh)
Definition aio_target.c:40
#define DEBUG3
Definition elog.h:28
PgAioHandle * staged_ios[PGAIO_SUBMIT_BATCH_SIZE]

References Assert, DEBUG3, fb(), PgAioBackend::handed_out_io, HOLD_INTERRUPTS, PgAioBackend::in_batchmode, PgAioBackend::num_staged_ios, pgaio_debug_io, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_STAGED, pgaio_io_call_stage(), pgaio_io_has_target(), pgaio_io_needs_synchronous_execution(), pgaio_io_perform_synchronously(), pgaio_io_prepare_submit(), pgaio_io_update_state(), pgaio_my_backend, PGAIO_SUBMIT_BATCH_SIZE, pgaio_submit_staged(), RESUME_INTERRUPTS, and PgAioBackend::staged_ios.

Referenced by pgaio_io_start_readv(), and pgaio_io_start_writev().

◆ pgaio_io_state_get_name()

static const char * pgaio_io_state_get_name ( PgAioHandleState  s)
static

Definition at line 908 of file aio.c.

909{
910#define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym
911 switch (s)
912 {
921 }
922#undef PGAIO_HS_TOSTR_CASE
923
924 return NULL; /* silence compiler */
925}
#define PGAIO_HS_TOSTR_CASE(sym)

References fb(), and PGAIO_HS_TOSTR_CASE.

Referenced by pgaio_io_get_state_name(), and pgaio_io_update_state().

◆ pgaio_io_update_state()

static void pgaio_io_update_state ( PgAioHandle ioh,
PgAioHandleState  new_state 
)
inlinestatic

Definition at line 386 of file aio.c.

387{
388 /*
389 * All callers need to have held interrupts in some form, otherwise
390 * interrupt processing could wait for the IO to complete, while in an
391 * intermediary state.
392 */
394
396 "updating state to %s",
398
399 /*
400 * Ensure the changes signified by the new state are visible before the
401 * new state becomes visible.
402 */
404
405 ioh->state = new_state;
406}
#define DEBUG5
Definition elog.h:26
#define INTERRUPTS_CAN_BE_PROCESSED()
Definition miscadmin.h:130

References Assert, DEBUG5, fb(), INTERRUPTS_CAN_BE_PROCESSED, pg_write_barrier, pgaio_debug_io, and pgaio_io_state_get_name().

Referenced by pgaio_io_acquire_nb(), pgaio_io_prepare_submit(), pgaio_io_process_completion(), pgaio_io_reclaim(), and pgaio_io_stage().

◆ pgaio_io_wait()

static void pgaio_io_wait ( PgAioHandle ioh,
uint64  ref_generation 
)
static

Definition at line 579 of file aio.c.

580{
582 bool am_owner;
583
584 am_owner = ioh->owner_procno == MyProcNumber;
585
587 return;
588
589 if (am_owner)
590 {
595 {
596 elog(PANIC, "waiting for own IO %d in wrong state: %s",
598 }
599 }
600
601 while (true)
602 {
604 return;
605
606 switch (state)
607 {
608 case PGAIO_HS_IDLE:
610 elog(ERROR, "IO in wrong state: %d", state);
611 break;
612
614
615 /*
616 * If we need to wait via the IO method, do so now. Don't
617 * check via the IO method if the issuing backend is executing
618 * the IO synchronously.
619 */
621 {
623 continue;
624 }
625 /* fallthrough */
626
627 /* waiting for owner to submit */
628 case PGAIO_HS_DEFINED:
629 case PGAIO_HS_STAGED:
630 /* waiting for reaper to complete */
631 /* fallthrough */
633 /* shouldn't be able to hit this otherwise */
635 /* ensure we're going to get woken up */
637
639 {
642 break;
644 }
645
647 break;
648
651
652 /*
653 * Note that no interrupts are processed between
654 * pgaio_io_was_recycled() and this check - that's important
655 * as otherwise an interrupt could have already reclaimed the
656 * handle.
657 */
658 if (am_owner)
660 return;
661 }
662 }
663}
int pgaio_io_get_id(PgAioHandle *ioh)
Definition aio.c:342
const char * pgaio_io_get_state_name(PgAioHandle *ioh)
Definition aio.c:928
bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
Definition aio.c:559
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define PANIC
Definition elog.h:42
bool IsUnderPostmaster
Definition globals.c:120
void(* wait_one)(PgAioHandle *ioh, uint64 ref_generation)

References Assert, ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), elog, ERROR, fb(), IsUnderPostmaster, MyProcNumber, PANIC, PGAIO_HF_SYNCHRONOUS, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, PGAIO_HS_STAGED, PGAIO_HS_SUBMITTED, pgaio_io_get_id(), pgaio_io_get_state_name(), pgaio_io_reclaim(), pgaio_io_was_recycled(), pgaio_method_ops, and IoMethodOps::wait_one.

Referenced by pgaio_closing_fd(), pgaio_io_wait_for_free(), pgaio_shutdown(), and pgaio_wref_wait().

◆ pgaio_io_wait_for_free()

static void pgaio_io_wait_for_free ( void  )
static

Definition at line 761 of file aio.c.

762{
763 int reclaimed = 0;
764
765 pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
769
770 /*
771 * First check if any of our IOs actually have completed - when using
772 * worker, that'll often be the case. We could do so as part of the loop
773 * below, but that'd potentially lead us to wait for some IO submitted
774 * before.
775 */
776 for (int i = 0; i < io_max_concurrency; i++)
777 {
779
780 if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
781 {
782 /*
783 * Note that no interrupts are processed between the state check
784 * and the call to reclaim - that's important as otherwise an
785 * interrupt could have already reclaimed the handle.
786 *
787 * Need to ensure that there's no reordering, in the more common
788 * paths, where we wait for IO, that's done by
789 * pgaio_io_was_recycled().
790 */
793 reclaimed++;
794 }
795 }
796
797 if (reclaimed > 0)
798 return;
799
800 /*
801 * If we have any unsubmitted IOs, submit them now. We'll start waiting in
802 * a second, so it's better they're in flight. This also addresses the
803 * edge-case that all IOs are unsubmitted.
804 */
807
808 /* possibly some IOs finished during submission */
810 return;
811
814 errmsg_internal("no free IOs despite no in-flight IOs"),
815 errdetail_internal("%d pending, %u in-flight, %u idle IOs",
819
820 /*
821 * Wait for the oldest in-flight IO to complete.
822 *
823 * XXX: Reusing the general IO wait is suboptimal, we don't need to wait
824 * for that specific IO to complete, we just need *any* IO to complete.
825 */
826 {
829 uint64 generation = ioh->generation;
830
831 switch ((PgAioHandleState) ioh->state)
832 {
833 /* should not be in in-flight list */
834 case PGAIO_HS_IDLE:
835 case PGAIO_HS_DEFINED:
837 case PGAIO_HS_STAGED:
839 elog(ERROR, "shouldn't get here with io:%d in state %d",
840 pgaio_io_get_id(ioh), ioh->state);
841 break;
842
846 "waiting for free io with %u in flight",
848
849 /*
850 * In a more general case this would be racy, because the
851 * generation could increase after we read ioh->state above.
852 * But we are only looking at IOs by the current backend and
853 * the IO can only be recycled by this backend. Even this is
854 * only OK because we get the handle's generation before
855 * potentially processing interrupts, e.g. as part of
856 * pgaio_debug_io().
857 */
858 pgaio_io_wait(ioh, generation);
859 break;
860
862
863 /*
864 * It's possible that another backend just finished this IO.
865 *
866 * Note that no interrupts are processed between the state
867 * check and the call to reclaim - that's important as
868 * otherwise an interrupt could have already reclaimed the
869 * handle.
870 *
871 * Need to ensure that there's no reordering, in the more
872 * common paths, where we wait for IO, that's done by
873 * pgaio_io_was_recycled().
874 */
877 break;
878 }
879
881 elog(PANIC, "no idle IO after waiting for IO to terminate");
882 return;
883 }
884}
int io_max_concurrency
Definition aio.c:75
#define pg_read_barrier()
Definition atomics.h:154
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errdetail_internal(const char *fmt,...)
Definition elog.c:1243
#define ereport(elevel,...)
Definition elog.h:150
#define dclist_head_element(type, membername, lhead)
Definition ilist.h:955
int i
Definition isn.c:77
uint32 io_handle_off

References dclist_count(), dclist_head_element, dclist_is_empty(), DEBUG2, elog, ereport, errdetail_internal(), errmsg_internal(), ERROR, fb(), i, PgAioBackend::idle_ios, PgAioBackend::in_flight_ios, PgAioBackend::io_handle_off, PgAioCtl::io_handles, io_max_concurrency, PgAioBackend::num_staged_ios, PANIC, pg_read_barrier, pgaio_ctl, pgaio_debug, pgaio_debug_io, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, PGAIO_HS_STAGED, PGAIO_HS_SUBMITTED, pgaio_io_get_id(), pgaio_io_reclaim(), pgaio_io_wait(), pgaio_my_backend, and pgaio_submit_staged().

Referenced by pgaio_io_acquire().

◆ pgaio_io_was_recycled()

bool pgaio_io_was_recycled ( PgAioHandle ioh,
uint64  ref_generation,
PgAioHandleState state 
)

Definition at line 559 of file aio.c.

560{
561 *state = ioh->state;
562
563 /*
564 * Ensure that we don't see an earlier state of the handle than ioh->state
565 * due to compiler or CPU reordering. This protects both ->generation as
566 * directly used here, and other fields in the handle accessed in the
567 * caller if the handle was not reused.
568 */
570
571 return ioh->generation != ref_generation;
572}

References fb(), and pg_read_barrier.

Referenced by pgaio_io_wait(), and pgaio_wref_check_done().

◆ pgaio_result_status_string()

const char * pgaio_result_status_string ( PgAioResultStatus  rs)

Definition at line 934 of file aio.c.

935{
936 switch (rs)
937 {
938 case PGAIO_RS_UNKNOWN:
939 return "UNKNOWN";
940 case PGAIO_RS_OK:
941 return "OK";
942 case PGAIO_RS_WARNING:
943 return "WARNING";
944 case PGAIO_RS_PARTIAL:
945 return "PARTIAL";
946 case PGAIO_RS_ERROR:
947 return "ERROR";
948 }
949
950 return NULL; /* silence compiler */
951}
@ PGAIO_RS_OK
Definition aio_types.h:81
@ PGAIO_RS_PARTIAL
Definition aio_types.h:82
@ PGAIO_RS_ERROR
Definition aio_types.h:84
@ PGAIO_RS_WARNING
Definition aio_types.h:83

References fb(), PGAIO_RS_ERROR, PGAIO_RS_OK, PGAIO_RS_PARTIAL, PGAIO_RS_UNKNOWN, and PGAIO_RS_WARNING.

Referenced by pg_get_aios(), pgaio_io_call_complete_local(), pgaio_io_call_complete_shared(), and pgaio_io_reclaim().

◆ pgaio_shutdown()

void pgaio_shutdown ( int  code,
Datum  arg 
)

Definition at line 1288 of file aio.c.

1289{
1292
1293 /* first clean up resources as we would at a transaction boundary */
1294 AtEOXact_Aio(code == 0);
1295
1296 /*
1297 * Before exiting, make sure that all IOs are finished. That has two main
1298 * purposes:
1299 *
1300 * - Some kernel-level AIO mechanisms don't deal well with the issuer of
1301 * an AIO exiting before IO completed
1302 *
1303 * - It'd be confusing to see partially finished IOs in stats views etc
1304 */
1306 {
1308 uint64 generation = ioh->generation;
1309
1311 "waiting for IO to complete during shutdown, %u in-flight IOs",
1313
1314 /* see comment in pgaio_io_wait_for_free() about raciness */
1315 pgaio_io_wait(ioh, generation);
1316 }
1317
1319}
void AtEOXact_Aio(bool is_commit)
Definition aio.c:1193

References Assert, AtEOXact_Aio(), dclist_count(), dclist_head_element, dclist_is_empty(), DEBUG2, fb(), PgAioBackend::handed_out_io, PgAioBackend::in_flight_ios, pgaio_debug_io, pgaio_io_wait(), and pgaio_my_backend.

Referenced by pgaio_init_backend().

◆ pgaio_submit_staged()

void pgaio_submit_staged ( void  )

◆ pgaio_wref_check_done()

bool pgaio_wref_check_done ( PgAioWaitRef iow)

Definition at line 1005 of file aio.c.

1006{
1009 bool am_owner;
1011
1013
1015 return true;
1016
1017 if (state == PGAIO_HS_IDLE)
1018 return true;
1019
1020 am_owner = ioh->owner_procno == MyProcNumber;
1021
1024 {
1025 /*
1026 * Note that no interrupts are processed between
1027 * pgaio_io_was_recycled() and this check - that's important as
1028 * otherwise an interrupt could have already reclaimed the handle.
1029 */
1030 if (am_owner)
1032 return true;
1033 }
1034
1035 /*
1036 * XXX: It likely would be worth checking in with the io method, to give
1037 * the IO method a chance to check if there are completion events queued.
1038 */
1039
1040 return false;
1041}
static PgAioHandle * pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation)
Definition aio.c:891

References fb(), MyProcNumber, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_IDLE, pgaio_io_from_wref(), pgaio_io_reclaim(), and pgaio_io_was_recycled().

Referenced by WaitReadBuffers().

◆ pgaio_wref_clear()

void pgaio_wref_clear ( PgAioWaitRef iow)

Definition at line 964 of file aio.c.

965{
966 iow->aio_index = PG_UINT32_MAX;
967}
#define PG_UINT32_MAX
Definition c.h:604

References fb(), and PG_UINT32_MAX.

Referenced by AsyncReadBuffers(), BufferManagerShmemInit(), InitLocalBuffers(), StartReadBuffersImpl(), TerminateBufferIO(), and TerminateLocalBufferIO().

◆ pgaio_wref_get_id()

int pgaio_wref_get_id ( PgAioWaitRef iow)

Definition at line 980 of file aio.c.

981{
983 return iow->aio_index;
984}
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition aio.c:971

References Assert, fb(), and pgaio_wref_valid().

◆ pgaio_wref_valid()

bool pgaio_wref_valid ( PgAioWaitRef iow)

Definition at line 971 of file aio.c.

972{
973 return iow->aio_index != PG_UINT32_MAX;
974}

References fb(), and PG_UINT32_MAX.

Referenced by InvalidateLocalBuffer(), pgaio_wref_get_id(), ProcessReadBuffersResult(), StartLocalBufferIO(), WaitIO(), and WaitReadBuffers().

◆ pgaio_wref_wait()

◆ StaticAssertDecl()

StaticAssertDecl ( lengthof(io_method_options = =lengthof(pgaio_method_ops_table)+1,
"io_method_options out of sync with pgaio_method_ops_table"   
)

Variable Documentation

◆ io_max_concurrency

int io_max_concurrency = -1

◆ io_method

◆ io_method_options

const struct config_enum_entry io_method_options[]
Initial value:
= {
{"sync", IOMETHOD_SYNC, false},
{"worker", IOMETHOD_WORKER, false},
{NULL, 0, false}
}
@ IOMETHOD_WORKER
Definition aio.h:35
@ IOMETHOD_SYNC
Definition aio.h:34

Definition at line 64 of file aio.c.

64 {
65 {"sync", IOMETHOD_SYNC, false},
66 {"worker", IOMETHOD_WORKER, false},
67#ifdef IOMETHOD_IO_URING_ENABLED
68 {"io_uring", IOMETHOD_IO_URING, false},
69#endif
70 {NULL, 0, false}
71};

◆ pgaio_ctl

◆ pgaio_method_ops

◆ pgaio_method_ops_table

const IoMethodOps* const pgaio_method_ops_table[]
static
Initial value:
= {
}
const IoMethodOps pgaio_sync_ops
Definition method_sync.c:28
const IoMethodOps pgaio_worker_ops

Definition at line 84 of file aio.c.

84 {
87#ifdef IOMETHOD_IO_URING_ENABLED
89#endif
90};

Referenced by assign_io_method().

◆ pgaio_my_backend