PostgreSQL Source Code git master
pg_test_fsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_test_fsync --- tests all supported fsync() methods
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 *
7 * src/bin/pg_test_fsync/pg_test_fsync.c
8 *
9 *-------------------------------------------------------------------------
10 */
11
12#include "postgres_fe.h"
13
14#include <limits.h>
15#include <sys/stat.h>
16#include <sys/time.h>
17#include <fcntl.h>
18#include <time.h>
19#include <unistd.h>
20#include <signal.h>
21
22#include "common/logging.h"
23#include "common/pg_prng.h"
24#include "getopt_long.h"
25
26/*
27 * put the temp files in the local directory
28 * unless the user specifies otherwise
29 */
30#define FSYNC_FILENAME "./pg_test_fsync.out"
31
32#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
33
34#define LABEL_FORMAT " %-30s"
35#define NA_FORMAT "%21s\n"
36/* translator: maintain alignment with NA_FORMAT */
37#define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
38#define USECS_SEC 1000000
39
40/* These are macros to avoid timing the function call overhead. */
41#ifndef WIN32
42#define START_TIMER \
43do { \
44 alarm_triggered = false; \
45 alarm(secs_per_test); \
46 gettimeofday(&start_t, NULL); \
47} while (0)
48#else
49/* WIN32 doesn't support alarm, so we create a thread and sleep there */
50#define START_TIMER \
51do { \
52 alarm_triggered = false; \
53 if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
54 INVALID_HANDLE_VALUE) \
55 pg_fatal("could not create thread for alarm"); \
56 gettimeofday(&start_t, NULL); \
57} while (0)
58#endif
59
60#define STOP_TIMER \
61do { \
62 gettimeofday(&stop_t, NULL); \
63 print_elapse(start_t, stop_t, ops); \
64} while (0)
65
66
67static const char *progname;
68
69static unsigned int secs_per_test = 5;
70static int needs_unlink = 0;
72static char *filename = FSYNC_FILENAME;
73static struct timeval start_t,
75static sig_atomic_t alarm_triggered = false;
76
77
78static void handle_args(int argc, char *argv[]);
79static void prepare_buf(void);
80static void test_open(void);
81static void test_non_sync(void);
82static void test_sync(int writes_per_op);
83static void test_open_syncs(void);
84static void test_open_sync(const char *msg, int writes_size);
85static void test_file_descriptor_sync(void);
86
87#ifndef WIN32
88static void process_alarm(SIGNAL_ARGS);
89#else
90static DWORD WINAPI process_alarm(LPVOID param);
91#endif
92static void signal_cleanup(SIGNAL_ARGS);
93
94#ifdef HAVE_FSYNC_WRITETHROUGH
95static int pg_fsync_writethrough(int fd);
96#endif
97static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
98
99#define die(msg) pg_fatal("%s: %m", _(msg))
100
101
102int
103main(int argc, char *argv[])
104{
105 pg_logging_init(argv[0]);
106 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
107 progname = get_progname(argv[0]);
108
109 handle_args(argc, argv);
110
111 /* Prevent leaving behind the test file */
112 pqsignal(SIGINT, signal_cleanup);
113 pqsignal(SIGTERM, signal_cleanup);
114
115 /* the following are not valid on Windows */
116#ifndef WIN32
119#endif
120
122
123 prepare_buf();
124
125 test_open();
126
127 /* Test using 1 XLOG_BLCKSZ write */
128 test_sync(1);
129
130 /* Test using 2 XLOG_BLCKSZ writes */
131 test_sync(2);
132
134
136
138
139 unlink(filename);
140
141 return 0;
142}
143
144static void
145handle_args(int argc, char *argv[])
146{
147 static struct option long_options[] = {
148 {"filename", required_argument, NULL, 'f'},
149 {"secs-per-test", required_argument, NULL, 's'},
150 {NULL, 0, NULL, 0}
151 };
152
153 int option; /* Command line option */
154 int optindex = 0; /* used by getopt_long */
155 unsigned long optval; /* used for option parsing */
156 char *endptr;
157
158 if (argc > 1)
159 {
160 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
161 {
162 printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
163 exit(0);
164 }
165 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
166 {
167 puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
168 exit(0);
169 }
170 }
171
172 while ((option = getopt_long(argc, argv, "f:s:",
173 long_options, &optindex)) != -1)
174 {
175 switch (option)
176 {
177 case 'f':
179 break;
180
181 case 's':
182 errno = 0;
183 optval = strtoul(optarg, &endptr, 10);
184
185 if (endptr == optarg || *endptr != '\0' ||
186 errno != 0 || optval != (unsigned int) optval)
187 {
188 pg_log_error("invalid argument for option %s", "--secs-per-test");
189 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
190 exit(1);
191 }
192
193 secs_per_test = (unsigned int) optval;
194 if (secs_per_test == 0)
195 pg_fatal("%s must be in range %u..%u",
196 "--secs-per-test", 1, UINT_MAX);
197 break;
198
199 default:
200 /* getopt_long already emitted a complaint */
201 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
202 exit(1);
203 }
204 }
205
206 if (argc > optind)
207 {
208 pg_log_error("too many command-line arguments (first is \"%s\")",
209 argv[optind]);
210 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
211 exit(1);
212 }
213
214 printf(ngettext("%u second per test\n",
215 "%u seconds per test\n",
218#if defined(O_DIRECT)
219 printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
220#elif defined(F_NOCACHE)
221 printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
222#else
223 printf(_("Direct I/O is not supported on this platform.\n"));
224#endif
225}
226
227static void
229{
230 int ops;
231
232 /* write random data into buffer */
233 for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
234 buf[ops] = (char) pg_prng_int32(&pg_global_prng_state);
235}
236
237static void
239{
240 int tmpfile;
241
242 /*
243 * test if we can open the target file
244 */
245 if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
246 die("could not open output file");
247 needs_unlink = 1;
248 if (write(tmpfile, buf, DEFAULT_XLOG_SEG_SIZE) !=
250 die("write failed");
251
252 /* fsync now so that dirty buffers don't skew later tests */
253 if (fsync(tmpfile) != 0)
254 die("fsync failed");
255
256 close(tmpfile);
257}
258
259static int
260open_direct(const char *path, int flags, mode_t mode)
261{
262 int fd;
263
264#ifdef O_DIRECT
265 flags |= O_DIRECT;
266#endif
267
268 fd = open(path, flags, mode);
269
270#if !defined(O_DIRECT) && defined(F_NOCACHE)
271 if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
272 {
273 int save_errno = errno;
274
275 close(fd);
276 errno = save_errno;
277 return -1;
278 }
279#endif
280
281 return fd;
282}
283
284static void
285test_sync(int writes_per_op)
286{
287 int tmpfile,
288 ops,
289 writes;
290 bool fs_warning = false;
291
292 if (writes_per_op == 1)
293 printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
294 else
295 printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
296 printf(_("(in \"wal_sync_method\" preference order, except fdatasync is Linux's default)\n"));
297
298 /*
299 * Test open_datasync if available
300 */
301 printf(LABEL_FORMAT, "open_datasync");
302 fflush(stdout);
303
304#ifdef O_DSYNC
305 if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
306 {
307 printf(NA_FORMAT, _("n/a*"));
308 fs_warning = true;
309 }
310 else
311 {
313 for (ops = 0; alarm_triggered == false; ops++)
314 {
315 for (writes = 0; writes < writes_per_op; writes++)
316 if (pg_pwrite(tmpfile,
317 buf,
318 XLOG_BLCKSZ,
319 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
320 die("write failed");
321 }
323 close(tmpfile);
324 }
325#else
326 printf(NA_FORMAT, _("n/a"));
327#endif
328
329/*
330 * Test fdatasync if available
331 */
332 printf(LABEL_FORMAT, "fdatasync");
333 fflush(stdout);
334
335 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
336 die("could not open output file");
338 for (ops = 0; alarm_triggered == false; ops++)
339 {
340 for (writes = 0; writes < writes_per_op; writes++)
341 if (pg_pwrite(tmpfile,
342 buf,
343 XLOG_BLCKSZ,
344 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
345 die("write failed");
346 fdatasync(tmpfile);
347 }
349 close(tmpfile);
350
351/*
352 * Test fsync
353 */
354 printf(LABEL_FORMAT, "fsync");
355 fflush(stdout);
356
357 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
358 die("could not open output file");
360 for (ops = 0; alarm_triggered == false; ops++)
361 {
362 for (writes = 0; writes < writes_per_op; writes++)
363 if (pg_pwrite(tmpfile,
364 buf,
365 XLOG_BLCKSZ,
366 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
367 die("write failed");
368 if (fsync(tmpfile) != 0)
369 die("fsync failed");
370 }
372 close(tmpfile);
373
374/*
375 * If fsync_writethrough is available, test as well
376 */
377 printf(LABEL_FORMAT, "fsync_writethrough");
378 fflush(stdout);
379
380#ifdef HAVE_FSYNC_WRITETHROUGH
381 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
382 die("could not open output file");
384 for (ops = 0; alarm_triggered == false; ops++)
385 {
386 for (writes = 0; writes < writes_per_op; writes++)
387 if (pg_pwrite(tmpfile,
388 buf,
389 XLOG_BLCKSZ,
390 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
391 die("write failed");
392 if (pg_fsync_writethrough(tmpfile) != 0)
393 die("fsync failed");
394 }
396 close(tmpfile);
397#else
398 printf(NA_FORMAT, _("n/a"));
399#endif
400
401/*
402 * Test open_sync if available
403 */
404 printf(LABEL_FORMAT, "open_sync");
405 fflush(stdout);
406
407#ifdef O_SYNC
408 if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
409 {
410 printf(NA_FORMAT, _("n/a*"));
411 fs_warning = true;
412 }
413 else
414 {
416 for (ops = 0; alarm_triggered == false; ops++)
417 {
418 for (writes = 0; writes < writes_per_op; writes++)
419 if (pg_pwrite(tmpfile,
420 buf,
421 XLOG_BLCKSZ,
422 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
423
424 /*
425 * This can generate write failures if the filesystem has
426 * a large block size, e.g. 4k, and there is no support
427 * for O_DIRECT writes smaller than the file system block
428 * size, e.g. XFS.
429 */
430 die("write failed");
431 }
433 close(tmpfile);
434 }
435#else
436 printf(NA_FORMAT, _("n/a"));
437#endif
438
439 if (fs_warning)
440 {
441 printf(_("* This file system and its mount options do not support direct\n"
442 " I/O, e.g. ext4 in journaled mode.\n"));
443 }
444}
445
446static void
448{
449 printf(_("\nCompare open_sync with different write sizes:\n"));
450 printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
451 "open_sync sizes.)\n"));
452
453 test_open_sync(_(" 1 * 16kB open_sync write"), 16);
454 test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
455 test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
456 test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
457 test_open_sync(_("16 * 1kB open_sync writes"), 1);
458}
459
460/*
461 * Test open_sync with different size files
462 */
463static void
464test_open_sync(const char *msg, int writes_size)
465{
466#ifdef O_SYNC
467 int tmpfile,
468 ops,
469 writes;
470#endif
471
472 printf(LABEL_FORMAT, msg);
473 fflush(stdout);
474
475#ifdef O_SYNC
476 if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
477 printf(NA_FORMAT, _("n/a*"));
478 else
479 {
481 for (ops = 0; alarm_triggered == false; ops++)
482 {
483 for (writes = 0; writes < 16 / writes_size; writes++)
484 if (pg_pwrite(tmpfile,
485 buf,
486 writes_size * 1024,
487 writes * writes_size * 1024) !=
488 writes_size * 1024)
489 die("write failed");
490 }
492 close(tmpfile);
493 }
494#else
495 printf(NA_FORMAT, _("n/a"));
496#endif
497}
498
499static void
501{
502 int tmpfile,
503 ops;
504
505 /*
506 * Test whether fsync can sync data written on a different descriptor for
507 * the same file. This checks the efficiency of multi-process fsyncs
508 * against the same file. Possibly this should be done with writethrough
509 * on platforms which support it.
510 */
511 printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
512 printf(_("(If the times are similar, fsync() can sync data written on a different\n"
513 "descriptor.)\n"));
514
515 /*
516 * first write, fsync and close, which is the normal behavior without
517 * multiple descriptors
518 */
519 printf(LABEL_FORMAT, "write, fsync, close");
520 fflush(stdout);
521
523 for (ops = 0; alarm_triggered == false; ops++)
524 {
525 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
526 die("could not open output file");
527 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
528 die("write failed");
529 if (fsync(tmpfile) != 0)
530 die("fsync failed");
531 close(tmpfile);
532
533 /*
534 * open and close the file again to be consistent with the following
535 * test
536 */
537 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
538 die("could not open output file");
539 close(tmpfile);
540 }
542
543 /*
544 * Now open, write, close, open again and fsync This simulates processes
545 * fsyncing each other's writes.
546 */
547 printf(LABEL_FORMAT, "write, close, fsync");
548 fflush(stdout);
549
551 for (ops = 0; alarm_triggered == false; ops++)
552 {
553 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
554 die("could not open output file");
555 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
556 die("write failed");
557 close(tmpfile);
558 /* reopen file */
559 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
560 die("could not open output file");
561 if (fsync(tmpfile) != 0)
562 die("fsync failed");
563 close(tmpfile);
564 }
566}
567
568static void
570{
571 int tmpfile,
572 ops;
573
574 /*
575 * Test a simple write without fsync
576 */
577 printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
578 printf(LABEL_FORMAT, "write");
579 fflush(stdout);
580
581 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
582 die("could not open output file");
584 for (ops = 0; alarm_triggered == false; ops++)
585 {
586 if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
587 die("write failed");
588 }
590 close(tmpfile);
591}
592
593static void
595{
596 int rc;
597
598 /* Delete the file if it exists. Ignore errors */
599 if (needs_unlink)
600 unlink(filename);
601 /* Finish incomplete line on stdout */
602 rc = write(STDOUT_FILENO, "\n", 1);
603 (void) rc; /* silence compiler warnings */
604 _exit(1);
605}
606
607#ifdef HAVE_FSYNC_WRITETHROUGH
608
609static int
611{
612#if defined(F_FULLFSYNC)
613 return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
614#else
615 errno = ENOSYS;
616 return -1;
617#endif
618}
619#endif
620
621/*
622 * print out the writes per second for tests
623 */
624static void
625print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
626{
627 double total_time = (stop_t.tv_sec - start_t.tv_sec) +
628 (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
629 double per_second = ops / total_time;
630 double avg_op_time_us = (total_time / ops) * USECS_SEC;
631
632 printf(_(OPS_FORMAT), per_second, avg_op_time_us);
633}
634
635#ifndef WIN32
636static void
638{
639 alarm_triggered = true;
640}
641#else
642static DWORD WINAPI
643process_alarm(LPVOID param)
644{
645 /* WIN32 doesn't support alarm, so we create a thread and sleep here */
646 Sleep(secs_per_test * 1000);
647 alarm_triggered = true;
648 ExitThread(0);
649}
650#endif
#define ngettext(s, p, n)
Definition: c.h:1179
struct PGAlignedXLogBlock PGAlignedXLogBlock
#define SIGNAL_ARGS
Definition: c.h:1347
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1212
#define PG_BINARY
Definition: c.h:1271
uint64_t uint64
Definition: c.h:553
int fdatasync(int fildes)
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:430
#define _(x)
Definition: elog.c:91
int pg_fsync_writethrough(int fd)
Definition: fd.c:458
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define required_argument
Definition: getopt_long.h:26
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define DEFAULT_XLOG_SEG_SIZE
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
int32 pg_prng_int32(pg_prng_state *state)
Definition: pg_prng.c:243
void pg_prng_seed(pg_prng_state *state, uint64 seed)
Definition: pg_prng.c:89
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static void test_file_descriptor_sync(void)
int main(int argc, char *argv[])
static void prepare_buf(void)
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
#define NA_FORMAT
Definition: pg_test_fsync.c:35
static void test_open_syncs(void)
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:30
static void handle_args(int argc, char *argv[])
static int open_direct(const char *path, int flags, mode_t mode)
#define START_TIMER
Definition: pg_test_fsync.c:42
#define STOP_TIMER
Definition: pg_test_fsync.c:60
#define LABEL_FORMAT
Definition: pg_test_fsync.c:34
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
static int needs_unlink
Definition: pg_test_fsync.c:70
static void test_sync(int writes_per_op)
static void test_open(void)
static void test_non_sync(void)
#define die(msg)
Definition: pg_test_fsync.c:99
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:73
static void test_open_sync(const char *msg, int writes_size)
static const char * progname
Definition: pg_test_fsync.c:67
static void process_alarm(SIGNAL_ARGS)
#define USECS_SEC
Definition: pg_test_fsync.c:38
static sig_atomic_t alarm_triggered
Definition: pg_test_fsync.c:75
#define OPS_FORMAT
Definition: pg_test_fsync.c:37
static unsigned int secs_per_test
Definition: pg_test_fsync.c:69
static char * filename
Definition: pg_test_fsync.c:72
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:32
static void signal_cleanup(SIGNAL_ARGS)
#define pqsignal
Definition: port.h:551
#define pg_pwrite
Definition: port.h:248
const char * get_progname(const char *argv0)
Definition: path.c:652
#define printf(...)
Definition: port.h:266
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define STDOUT_FILENO
Definition: unistd.h:8
#define fsync(fd)
Definition: win32_port.h:83
#define SIGHUP
Definition: win32_port.h:158
#define S_IRUSR
Definition: win32_port.h:279
#define SIGALRM
Definition: win32_port.h:164
#define O_DIRECT
Definition: win32_port.h:345
#define S_IWUSR
Definition: win32_port.h:282
#define O_DSYNC
Definition: win32_port.h:346