PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_test_fsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_test_fsync --- tests all supported fsync() methods
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 *
7 * src/bin/pg_test_fsync/pg_test_fsync.c
8 *
9 *-------------------------------------------------------------------------
10 */
11
12#include "postgres_fe.h"
13
14#include <limits.h>
15#include <sys/stat.h>
16#include <sys/time.h>
17#include <fcntl.h>
18#include <time.h>
19#include <unistd.h>
20#include <signal.h>
21
22#include "common/logging.h"
23#include "common/pg_prng.h"
24#include "getopt_long.h"
25
26/*
27 * put the temp files in the local directory
28 * unless the user specifies otherwise
29 */
30#define FSYNC_FILENAME "./pg_test_fsync.out"
31
32#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
33
34#define LABEL_FORMAT " %-30s"
35#define NA_FORMAT "%21s\n"
36/* translator: maintain alignment with NA_FORMAT */
37#define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
38#define USECS_SEC 1000000
39
40/* These are macros to avoid timing the function call overhead. */
41#ifndef WIN32
42#define START_TIMER \
43do { \
44 alarm_triggered = false; \
45 alarm(secs_per_test); \
46 gettimeofday(&start_t, NULL); \
47} while (0)
48#else
49/* WIN32 doesn't support alarm, so we create a thread and sleep there */
50#define START_TIMER \
51do { \
52 alarm_triggered = false; \
53 if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
54 INVALID_HANDLE_VALUE) \
55 pg_fatal("could not create thread for alarm"); \
56 gettimeofday(&start_t, NULL); \
57} while (0)
58#endif
59
60#define STOP_TIMER \
61do { \
62 gettimeofday(&stop_t, NULL); \
63 print_elapse(start_t, stop_t, ops); \
64} while (0)
65
66
67static const char *progname;
68
69static unsigned int secs_per_test = 5;
70static int needs_unlink = 0;
74static struct timeval start_t,
76static sig_atomic_t alarm_triggered = false;
77
78
79static void handle_args(int argc, char *argv[]);
80static void prepare_buf(void);
81static void test_open(void);
82static void test_non_sync(void);
83static void test_sync(int writes_per_op);
84static void test_open_syncs(void);
85static void test_open_sync(const char *msg, int writes_size);
86static void test_file_descriptor_sync(void);
87
88#ifndef WIN32
89static void process_alarm(SIGNAL_ARGS);
90#else
91static DWORD WINAPI process_alarm(LPVOID param);
92#endif
93static void signal_cleanup(SIGNAL_ARGS);
94
95#ifdef HAVE_FSYNC_WRITETHROUGH
96static int pg_fsync_writethrough(int fd);
97#endif
98static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
99
100#define die(msg) pg_fatal("%s: %m", _(msg))
101
102
103int
104main(int argc, char *argv[])
105{
106 pg_logging_init(argv[0]);
107 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
108 progname = get_progname(argv[0]);
109
110 handle_args(argc, argv);
111
112 /* Prevent leaving behind the test file */
113 pqsignal(SIGINT, signal_cleanup);
114 pqsignal(SIGTERM, signal_cleanup);
115
116 /* the following are not valid on Windows */
117#ifndef WIN32
120#endif
121
123
124 prepare_buf();
125
126 test_open();
127
128 /* Test using 1 XLOG_BLCKSZ write */
129 test_sync(1);
130
131 /* Test using 2 XLOG_BLCKSZ writes */
132 test_sync(2);
133
135
137
139
140 unlink(filename);
141
142 return 0;
143}
144
145static void
146handle_args(int argc, char *argv[])
147{
148 static struct option long_options[] = {
149 {"filename", required_argument, NULL, 'f'},
150 {"secs-per-test", required_argument, NULL, 's'},
151 {NULL, 0, NULL, 0}
152 };
153
154 int option; /* Command line option */
155 int optindex = 0; /* used by getopt_long */
156 unsigned long optval; /* used for option parsing */
157 char *endptr;
158
159 if (argc > 1)
160 {
161 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
162 {
163 printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
164 exit(0);
165 }
166 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
167 {
168 puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
169 exit(0);
170 }
171 }
172
173 while ((option = getopt_long(argc, argv, "f:s:",
174 long_options, &optindex)) != -1)
175 {
176 switch (option)
177 {
178 case 'f':
180 break;
181
182 case 's':
183 errno = 0;
184 optval = strtoul(optarg, &endptr, 10);
185
186 if (endptr == optarg || *endptr != '\0' ||
187 errno != 0 || optval != (unsigned int) optval)
188 {
189 pg_log_error("invalid argument for option %s", "--secs-per-test");
190 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
191 exit(1);
192 }
193
194 secs_per_test = (unsigned int) optval;
195 if (secs_per_test == 0)
196 pg_fatal("%s must be in range %u..%u",
197 "--secs-per-test", 1, UINT_MAX);
198 break;
199
200 default:
201 /* getopt_long already emitted a complaint */
202 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
203 exit(1);
204 }
205 }
206
207 if (argc > optind)
208 {
209 pg_log_error("too many command-line arguments (first is \"%s\")",
210 argv[optind]);
211 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
212 exit(1);
213 }
214
215 printf(ngettext("%u second per test\n",
216 "%u seconds per test\n",
219#if defined(O_DIRECT)
220 printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
221#elif defined(F_NOCACHE)
222 printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
223#else
224 printf(_("Direct I/O is not supported on this platform.\n"));
225#endif
226}
227
228static void
230{
231 int ops;
232
233 /* write random data into buffer */
234 for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
236
237 buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
238}
239
240static void
242{
243 int tmpfile;
244
245 /*
246 * test if we can open the target file
247 */
248 if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
249 die("could not open output file");
250 needs_unlink = 1;
251 if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
253 die("write failed");
254
255 /* fsync now so that dirty buffers don't skew later tests */
256 if (fsync(tmpfile) != 0)
257 die("fsync failed");
258
259 close(tmpfile);
260}
261
262static int
263open_direct(const char *path, int flags, mode_t mode)
264{
265 int fd;
266
267#ifdef O_DIRECT
268 flags |= O_DIRECT;
269#endif
270
271 fd = open(path, flags, mode);
272
273#if !defined(O_DIRECT) && defined(F_NOCACHE)
274 if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
275 {
276 int save_errno = errno;
277
278 close(fd);
279 errno = save_errno;
280 return -1;
281 }
282#endif
283
284 return fd;
285}
286
287static void
288test_sync(int writes_per_op)
289{
290 int tmpfile,
291 ops,
292 writes;
293 bool fs_warning = false;
294
295 if (writes_per_op == 1)
296 printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
297 else
298 printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
299 printf(_("(in \"wal_sync_method\" preference order, except fdatasync is Linux's default)\n"));
300
301 /*
302 * Test open_datasync if available
303 */
304 printf(LABEL_FORMAT, "open_datasync");
305 fflush(stdout);
306
307#ifdef O_DSYNC
308 if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
309 {
310 printf(NA_FORMAT, _("n/a*"));
311 fs_warning = true;
312 }
313 else
314 {
316 for (ops = 0; alarm_triggered == false; ops++)
317 {
318 for (writes = 0; writes < writes_per_op; writes++)
319 if (pg_pwrite(tmpfile,
320 buf,
321 XLOG_BLCKSZ,
322 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
323 die("write failed");
324 }
326 close(tmpfile);
327 }
328#else
329 printf(NA_FORMAT, _("n/a"));
330#endif
331
332/*
333 * Test fdatasync if available
334 */
335 printf(LABEL_FORMAT, "fdatasync");
336 fflush(stdout);
337
338 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
339 die("could not open output file");
341 for (ops = 0; alarm_triggered == false; ops++)
342 {
343 for (writes = 0; writes < writes_per_op; writes++)
344 if (pg_pwrite(tmpfile,
345 buf,
346 XLOG_BLCKSZ,
347 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
348 die("write failed");
349 fdatasync(tmpfile);
350 }
352 close(tmpfile);
353
354/*
355 * Test fsync
356 */
357 printf(LABEL_FORMAT, "fsync");
358 fflush(stdout);
359
360 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
361 die("could not open output file");
363 for (ops = 0; alarm_triggered == false; ops++)
364 {
365 for (writes = 0; writes < writes_per_op; writes++)
366 if (pg_pwrite(tmpfile,
367 buf,
368 XLOG_BLCKSZ,
369 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
370 die("write failed");
371 if (fsync(tmpfile) != 0)
372 die("fsync failed");
373 }
375 close(tmpfile);
376
377/*
378 * If fsync_writethrough is available, test as well
379 */
380 printf(LABEL_FORMAT, "fsync_writethrough");
381 fflush(stdout);
382
383#ifdef HAVE_FSYNC_WRITETHROUGH
384 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
385 die("could not open output file");
387 for (ops = 0; alarm_triggered == false; ops++)
388 {
389 for (writes = 0; writes < writes_per_op; writes++)
390 if (pg_pwrite(tmpfile,
391 buf,
392 XLOG_BLCKSZ,
393 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
394 die("write failed");
395 if (pg_fsync_writethrough(tmpfile) != 0)
396 die("fsync failed");
397 }
399 close(tmpfile);
400#else
401 printf(NA_FORMAT, _("n/a"));
402#endif
403
404/*
405 * Test open_sync if available
406 */
407 printf(LABEL_FORMAT, "open_sync");
408 fflush(stdout);
409
410#ifdef O_SYNC
411 if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
412 {
413 printf(NA_FORMAT, _("n/a*"));
414 fs_warning = true;
415 }
416 else
417 {
419 for (ops = 0; alarm_triggered == false; ops++)
420 {
421 for (writes = 0; writes < writes_per_op; writes++)
422 if (pg_pwrite(tmpfile,
423 buf,
424 XLOG_BLCKSZ,
425 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
426
427 /*
428 * This can generate write failures if the filesystem has
429 * a large block size, e.g. 4k, and there is no support
430 * for O_DIRECT writes smaller than the file system block
431 * size, e.g. XFS.
432 */
433 die("write failed");
434 }
436 close(tmpfile);
437 }
438#else
439 printf(NA_FORMAT, _("n/a"));
440#endif
441
442 if (fs_warning)
443 {
444 printf(_("* This file system and its mount options do not support direct\n"
445 " I/O, e.g. ext4 in journaled mode.\n"));
446 }
447}
448
449static void
451{
452 printf(_("\nCompare open_sync with different write sizes:\n"));
453 printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
454 "open_sync sizes.)\n"));
455
456 test_open_sync(_(" 1 * 16kB open_sync write"), 16);
457 test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
458 test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
459 test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
460 test_open_sync(_("16 * 1kB open_sync writes"), 1);
461}
462
463/*
464 * Test open_sync with different size files
465 */
466static void
467test_open_sync(const char *msg, int writes_size)
468{
469#ifdef O_SYNC
470 int tmpfile,
471 ops,
472 writes;
473#endif
474
475 printf(LABEL_FORMAT, msg);
476 fflush(stdout);
477
478#ifdef O_SYNC
479 if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
480 printf(NA_FORMAT, _("n/a*"));
481 else
482 {
484 for (ops = 0; alarm_triggered == false; ops++)
485 {
486 for (writes = 0; writes < 16 / writes_size; writes++)
487 if (pg_pwrite(tmpfile,
488 buf,
489 writes_size * 1024,
490 writes * writes_size * 1024) !=
491 writes_size * 1024)
492 die("write failed");
493 }
495 close(tmpfile);
496 }
497#else
498 printf(NA_FORMAT, _("n/a"));
499#endif
500}
501
502static void
504{
505 int tmpfile,
506 ops;
507
508 /*
509 * Test whether fsync can sync data written on a different descriptor for
510 * the same file. This checks the efficiency of multi-process fsyncs
511 * against the same file. Possibly this should be done with writethrough
512 * on platforms which support it.
513 */
514 printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
515 printf(_("(If the times are similar, fsync() can sync data written on a different\n"
516 "descriptor.)\n"));
517
518 /*
519 * first write, fsync and close, which is the normal behavior without
520 * multiple descriptors
521 */
522 printf(LABEL_FORMAT, "write, fsync, close");
523 fflush(stdout);
524
526 for (ops = 0; alarm_triggered == false; ops++)
527 {
528 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
529 die("could not open output file");
530 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
531 die("write failed");
532 if (fsync(tmpfile) != 0)
533 die("fsync failed");
534 close(tmpfile);
535
536 /*
537 * open and close the file again to be consistent with the following
538 * test
539 */
540 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
541 die("could not open output file");
542 close(tmpfile);
543 }
545
546 /*
547 * Now open, write, close, open again and fsync This simulates processes
548 * fsyncing each other's writes.
549 */
550 printf(LABEL_FORMAT, "write, close, fsync");
551 fflush(stdout);
552
554 for (ops = 0; alarm_triggered == false; ops++)
555 {
556 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
557 die("could not open output file");
558 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
559 die("write failed");
560 close(tmpfile);
561 /* reopen file */
562 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
563 die("could not open output file");
564 if (fsync(tmpfile) != 0)
565 die("fsync failed");
566 close(tmpfile);
567 }
569}
570
571static void
573{
574 int tmpfile,
575 ops;
576
577 /*
578 * Test a simple write without fsync
579 */
580 printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
581 printf(LABEL_FORMAT, "write");
582 fflush(stdout);
583
584 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
585 die("could not open output file");
587 for (ops = 0; alarm_triggered == false; ops++)
588 {
589 if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
590 die("write failed");
591 }
593 close(tmpfile);
594}
595
596static void
598{
599 int rc;
600
601 /* Delete the file if it exists. Ignore errors */
602 if (needs_unlink)
603 unlink(filename);
604 /* Finish incomplete line on stdout */
605 rc = write(STDOUT_FILENO, "\n", 1);
606 (void) rc; /* silence compiler warnings */
607 _exit(1);
608}
609
610#ifdef HAVE_FSYNC_WRITETHROUGH
611
612static int
614{
615#if defined(F_FULLFSYNC)
616 return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
617#else
618 errno = ENOSYS;
619 return -1;
620#endif
621}
622#endif
623
624/*
625 * print out the writes per second for tests
626 */
627static void
628print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
629{
630 double total_time = (stop_t.tv_sec - start_t.tv_sec) +
631 (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
632 double per_second = ops / total_time;
633 double avg_op_time_us = (total_time / ops) * USECS_SEC;
634
635 printf(_(OPS_FORMAT), per_second, avg_op_time_us);
636}
637
638#ifndef WIN32
639static void
641{
642 alarm_triggered = true;
643}
644#else
645static DWORD WINAPI
646process_alarm(LPVOID param)
647{
648 /* WIN32 doesn't support alarm, so we create a thread and sleep here */
649 Sleep(secs_per_test * 1000);
650 alarm_triggered = true;
651 ExitThread(0);
652}
653#endif
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:775
#define ngettext(s, p, n)
Definition: c.h:1152
#define SIGNAL_ARGS
Definition: c.h:1320
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1185
#define PG_BINARY
Definition: c.h:1244
uint64_t uint64
Definition: c.h:503
int fdatasync(int fildes)
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:429
#define _(x)
Definition: elog.c:91
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define required_argument
Definition: getopt_long.h:26
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define DEFAULT_XLOG_SEG_SIZE
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
int32 pg_prng_int32(pg_prng_state *state)
Definition: pg_prng.c:243
void pg_prng_seed(pg_prng_state *state, uint64 seed)
Definition: pg_prng.c:89
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static void test_file_descriptor_sync(void)
int main(int argc, char *argv[])
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
static void prepare_buf(void)
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
#define NA_FORMAT
Definition: pg_test_fsync.c:35
static void test_open_syncs(void)
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:30
static char * filename
Definition: pg_test_fsync.c:73
static void handle_args(int argc, char *argv[])
static int open_direct(const char *path, int flags, mode_t mode)
#define START_TIMER
Definition: pg_test_fsync.c:42
#define STOP_TIMER
Definition: pg_test_fsync.c:60
#define LABEL_FORMAT
Definition: pg_test_fsync.c:34
static int needs_unlink
Definition: pg_test_fsync.c:70
static void test_sync(int writes_per_op)
static void test_open(void)
static void test_non_sync(void)
#define die(msg)
static char * buf
Definition: pg_test_fsync.c:72
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:74
static void test_open_sync(const char *msg, int writes_size)
static const char * progname
Definition: pg_test_fsync.c:67
static void process_alarm(SIGNAL_ARGS)
#define USECS_SEC
Definition: pg_test_fsync.c:38
static sig_atomic_t alarm_triggered
Definition: pg_test_fsync.c:76
#define OPS_FORMAT
Definition: pg_test_fsync.c:37
static unsigned int secs_per_test
Definition: pg_test_fsync.c:69
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:32
static void signal_cleanup(SIGNAL_ARGS)
#define pqsignal
Definition: port.h:531
#define pg_pwrite
Definition: port.h:227
const char * get_progname(const char *argv0)
Definition: path.c:652
#define printf(...)
Definition: port.h:245
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define STDOUT_FILENO
Definition: unistd.h:8
#define fsync(fd)
Definition: win32_port.h:83
#define SIGHUP
Definition: win32_port.h:158
#define S_IRUSR
Definition: win32_port.h:279
#define SIGALRM
Definition: win32_port.h:164
#define S_IWUSR
Definition: win32_port.h:282
#define O_DSYNC
Definition: win32_port.h:342