PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_test_fsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_test_fsync --- tests all supported fsync() methods
4 *
5 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6 *
7 * src/bin/pg_test_fsync/pg_test_fsync.c
8 *
9 *-------------------------------------------------------------------------
10 */
11
12#include "postgres_fe.h"
13
14#include <limits.h>
15#include <sys/stat.h>
16#include <sys/time.h>
17#include <fcntl.h>
18#include <time.h>
19#include <unistd.h>
20#include <signal.h>
21
22#include "common/logging.h"
23#include "common/pg_prng.h"
24#include "getopt_long.h"
25
26/*
27 * put the temp files in the local directory
28 * unless the user specifies otherwise
29 */
30#define FSYNC_FILENAME "./pg_test_fsync.out"
31
32#define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
33
34#define LABEL_FORMAT " %-30s"
35#define NA_FORMAT "%21s\n"
36/* translator: maintain alignment with NA_FORMAT */
37#define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
38#define USECS_SEC 1000000
39
40/* These are macros to avoid timing the function call overhead. */
41#ifndef WIN32
42#define START_TIMER \
43do { \
44 alarm_triggered = false; \
45 alarm(secs_per_test); \
46 gettimeofday(&start_t, NULL); \
47} while (0)
48#else
49/* WIN32 doesn't support alarm, so we create a thread and sleep there */
50#define START_TIMER \
51do { \
52 alarm_triggered = false; \
53 if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
54 INVALID_HANDLE_VALUE) \
55 pg_fatal("could not create thread for alarm"); \
56 gettimeofday(&start_t, NULL); \
57} while (0)
58#endif
59
60#define STOP_TIMER \
61do { \
62 gettimeofday(&stop_t, NULL); \
63 print_elapse(start_t, stop_t, ops); \
64} while (0)
65
66
67static const char *progname;
68
69static unsigned int secs_per_test = 5;
70static int needs_unlink = 0;
74static struct timeval start_t,
76static sig_atomic_t alarm_triggered = false;
77
78
79static void handle_args(int argc, char *argv[]);
80static void prepare_buf(void);
81static void test_open(void);
82static void test_non_sync(void);
83static void test_sync(int writes_per_op);
84static void test_open_syncs(void);
85static void test_open_sync(const char *msg, int writes_size);
86static void test_file_descriptor_sync(void);
87
88#ifndef WIN32
89static void process_alarm(SIGNAL_ARGS);
90#else
91static DWORD WINAPI process_alarm(LPVOID param);
92#endif
93static void signal_cleanup(SIGNAL_ARGS);
94
95#ifdef HAVE_FSYNC_WRITETHROUGH
96static int pg_fsync_writethrough(int fd);
97#endif
98static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
99
100#define die(msg) pg_fatal("%s: %m", _(msg))
101
102
103int
104main(int argc, char *argv[])
105{
106 pg_logging_init(argv[0]);
107 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
108 progname = get_progname(argv[0]);
109
110 handle_args(argc, argv);
111
112 /* Prevent leaving behind the test file */
113 pqsignal(SIGINT, signal_cleanup);
114 pqsignal(SIGTERM, signal_cleanup);
115#ifndef WIN32
117#endif
118#ifdef SIGHUP
119 /* Not defined on win32 */
121#endif
122
124
125 prepare_buf();
126
127 test_open();
128
129 /* Test using 1 XLOG_BLCKSZ write */
130 test_sync(1);
131
132 /* Test using 2 XLOG_BLCKSZ writes */
133 test_sync(2);
134
136
138
140
141 unlink(filename);
142
143 return 0;
144}
145
146static void
147handle_args(int argc, char *argv[])
148{
149 static struct option long_options[] = {
150 {"filename", required_argument, NULL, 'f'},
151 {"secs-per-test", required_argument, NULL, 's'},
152 {NULL, 0, NULL, 0}
153 };
154
155 int option; /* Command line option */
156 int optindex = 0; /* used by getopt_long */
157 unsigned long optval; /* used for option parsing */
158 char *endptr;
159
160 if (argc > 1)
161 {
162 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
163 {
164 printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
165 exit(0);
166 }
167 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
168 {
169 puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
170 exit(0);
171 }
172 }
173
174 while ((option = getopt_long(argc, argv, "f:s:",
175 long_options, &optindex)) != -1)
176 {
177 switch (option)
178 {
179 case 'f':
181 break;
182
183 case 's':
184 errno = 0;
185 optval = strtoul(optarg, &endptr, 10);
186
187 if (endptr == optarg || *endptr != '\0' ||
188 errno != 0 || optval != (unsigned int) optval)
189 {
190 pg_log_error("invalid argument for option %s", "--secs-per-test");
191 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
192 exit(1);
193 }
194
195 secs_per_test = (unsigned int) optval;
196 if (secs_per_test == 0)
197 pg_fatal("%s must be in range %u..%u",
198 "--secs-per-test", 1, UINT_MAX);
199 break;
200
201 default:
202 /* getopt_long already emitted a complaint */
203 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
204 exit(1);
205 }
206 }
207
208 if (argc > optind)
209 {
210 pg_log_error("too many command-line arguments (first is \"%s\")",
211 argv[optind]);
212 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
213 exit(1);
214 }
215
216 printf(ngettext("%u second per test\n",
217 "%u seconds per test\n",
220#if defined(O_DIRECT)
221 printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
222#elif defined(F_NOCACHE)
223 printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
224#else
225 printf(_("Direct I/O is not supported on this platform.\n"));
226#endif
227}
228
229static void
231{
232 int ops;
233
234 /* write random data into buffer */
235 for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
237
238 buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
239}
240
241static void
243{
244 int tmpfile;
245
246 /*
247 * test if we can open the target file
248 */
249 if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
250 die("could not open output file");
251 needs_unlink = 1;
252 if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
254 die("write failed");
255
256 /* fsync now so that dirty buffers don't skew later tests */
257 if (fsync(tmpfile) != 0)
258 die("fsync failed");
259
260 close(tmpfile);
261}
262
263static int
264open_direct(const char *path, int flags, mode_t mode)
265{
266 int fd;
267
268#ifdef O_DIRECT
269 flags |= O_DIRECT;
270#endif
271
272 fd = open(path, flags, mode);
273
274#if !defined(O_DIRECT) && defined(F_NOCACHE)
275 if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
276 {
277 int save_errno = errno;
278
279 close(fd);
280 errno = save_errno;
281 return -1;
282 }
283#endif
284
285 return fd;
286}
287
288static void
289test_sync(int writes_per_op)
290{
291 int tmpfile,
292 ops,
293 writes;
294 bool fs_warning = false;
295
296 if (writes_per_op == 1)
297 printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
298 else
299 printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
300 printf(_("(in \"wal_sync_method\" preference order, except fdatasync is Linux's default)\n"));
301
302 /*
303 * Test open_datasync if available
304 */
305 printf(LABEL_FORMAT, "open_datasync");
306 fflush(stdout);
307
308#ifdef O_DSYNC
309 if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
310 {
311 printf(NA_FORMAT, _("n/a*"));
312 fs_warning = true;
313 }
314 else
315 {
317 for (ops = 0; alarm_triggered == false; ops++)
318 {
319 for (writes = 0; writes < writes_per_op; writes++)
320 if (pg_pwrite(tmpfile,
321 buf,
322 XLOG_BLCKSZ,
323 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
324 die("write failed");
325 }
327 close(tmpfile);
328 }
329#else
330 printf(NA_FORMAT, _("n/a"));
331#endif
332
333/*
334 * Test fdatasync if available
335 */
336 printf(LABEL_FORMAT, "fdatasync");
337 fflush(stdout);
338
339 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
340 die("could not open output file");
342 for (ops = 0; alarm_triggered == false; ops++)
343 {
344 for (writes = 0; writes < writes_per_op; writes++)
345 if (pg_pwrite(tmpfile,
346 buf,
347 XLOG_BLCKSZ,
348 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
349 die("write failed");
350 fdatasync(tmpfile);
351 }
353 close(tmpfile);
354
355/*
356 * Test fsync
357 */
358 printf(LABEL_FORMAT, "fsync");
359 fflush(stdout);
360
361 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
362 die("could not open output file");
364 for (ops = 0; alarm_triggered == false; ops++)
365 {
366 for (writes = 0; writes < writes_per_op; writes++)
367 if (pg_pwrite(tmpfile,
368 buf,
369 XLOG_BLCKSZ,
370 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
371 die("write failed");
372 if (fsync(tmpfile) != 0)
373 die("fsync failed");
374 }
376 close(tmpfile);
377
378/*
379 * If fsync_writethrough is available, test as well
380 */
381 printf(LABEL_FORMAT, "fsync_writethrough");
382 fflush(stdout);
383
384#ifdef HAVE_FSYNC_WRITETHROUGH
385 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
386 die("could not open output file");
388 for (ops = 0; alarm_triggered == false; ops++)
389 {
390 for (writes = 0; writes < writes_per_op; writes++)
391 if (pg_pwrite(tmpfile,
392 buf,
393 XLOG_BLCKSZ,
394 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
395 die("write failed");
396 if (pg_fsync_writethrough(tmpfile) != 0)
397 die("fsync failed");
398 }
400 close(tmpfile);
401#else
402 printf(NA_FORMAT, _("n/a"));
403#endif
404
405/*
406 * Test open_sync if available
407 */
408 printf(LABEL_FORMAT, "open_sync");
409 fflush(stdout);
410
411#ifdef O_SYNC
412 if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
413 {
414 printf(NA_FORMAT, _("n/a*"));
415 fs_warning = true;
416 }
417 else
418 {
420 for (ops = 0; alarm_triggered == false; ops++)
421 {
422 for (writes = 0; writes < writes_per_op; writes++)
423 if (pg_pwrite(tmpfile,
424 buf,
425 XLOG_BLCKSZ,
426 writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
427
428 /*
429 * This can generate write failures if the filesystem has
430 * a large block size, e.g. 4k, and there is no support
431 * for O_DIRECT writes smaller than the file system block
432 * size, e.g. XFS.
433 */
434 die("write failed");
435 }
437 close(tmpfile);
438 }
439#else
440 printf(NA_FORMAT, _("n/a"));
441#endif
442
443 if (fs_warning)
444 {
445 printf(_("* This file system and its mount options do not support direct\n"
446 " I/O, e.g. ext4 in journaled mode.\n"));
447 }
448}
449
450static void
452{
453 printf(_("\nCompare open_sync with different write sizes:\n"));
454 printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
455 "open_sync sizes.)\n"));
456
457 test_open_sync(_(" 1 * 16kB open_sync write"), 16);
458 test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
459 test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
460 test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
461 test_open_sync(_("16 * 1kB open_sync writes"), 1);
462}
463
464/*
465 * Test open_sync with different size files
466 */
467static void
468test_open_sync(const char *msg, int writes_size)
469{
470#ifdef O_SYNC
471 int tmpfile,
472 ops,
473 writes;
474#endif
475
476 printf(LABEL_FORMAT, msg);
477 fflush(stdout);
478
479#ifdef O_SYNC
480 if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
481 printf(NA_FORMAT, _("n/a*"));
482 else
483 {
485 for (ops = 0; alarm_triggered == false; ops++)
486 {
487 for (writes = 0; writes < 16 / writes_size; writes++)
488 if (pg_pwrite(tmpfile,
489 buf,
490 writes_size * 1024,
491 writes * writes_size * 1024) !=
492 writes_size * 1024)
493 die("write failed");
494 }
496 close(tmpfile);
497 }
498#else
499 printf(NA_FORMAT, _("n/a"));
500#endif
501}
502
503static void
505{
506 int tmpfile,
507 ops;
508
509 /*
510 * Test whether fsync can sync data written on a different descriptor for
511 * the same file. This checks the efficiency of multi-process fsyncs
512 * against the same file. Possibly this should be done with writethrough
513 * on platforms which support it.
514 */
515 printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
516 printf(_("(If the times are similar, fsync() can sync data written on a different\n"
517 "descriptor.)\n"));
518
519 /*
520 * first write, fsync and close, which is the normal behavior without
521 * multiple descriptors
522 */
523 printf(LABEL_FORMAT, "write, fsync, close");
524 fflush(stdout);
525
527 for (ops = 0; alarm_triggered == false; ops++)
528 {
529 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
530 die("could not open output file");
531 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
532 die("write failed");
533 if (fsync(tmpfile) != 0)
534 die("fsync failed");
535 close(tmpfile);
536
537 /*
538 * open and close the file again to be consistent with the following
539 * test
540 */
541 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
542 die("could not open output file");
543 close(tmpfile);
544 }
546
547 /*
548 * Now open, write, close, open again and fsync This simulates processes
549 * fsyncing each other's writes.
550 */
551 printf(LABEL_FORMAT, "write, close, fsync");
552 fflush(stdout);
553
555 for (ops = 0; alarm_triggered == false; ops++)
556 {
557 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
558 die("could not open output file");
559 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
560 die("write failed");
561 close(tmpfile);
562 /* reopen file */
563 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
564 die("could not open output file");
565 if (fsync(tmpfile) != 0)
566 die("fsync failed");
567 close(tmpfile);
568 }
570}
571
572static void
574{
575 int tmpfile,
576 ops;
577
578 /*
579 * Test a simple write without fsync
580 */
581 printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
582 printf(LABEL_FORMAT, "write");
583 fflush(stdout);
584
585 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
586 die("could not open output file");
588 for (ops = 0; alarm_triggered == false; ops++)
589 {
590 if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
591 die("write failed");
592 }
594 close(tmpfile);
595}
596
597static void
599{
600 int rc;
601
602 /* Delete the file if it exists. Ignore errors */
603 if (needs_unlink)
604 unlink(filename);
605 /* Finish incomplete line on stdout */
606 rc = write(STDOUT_FILENO, "\n", 1);
607 (void) rc; /* silence compiler warnings */
608 _exit(1);
609}
610
611#ifdef HAVE_FSYNC_WRITETHROUGH
612
613static int
615{
616#if defined(F_FULLFSYNC)
617 return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
618#else
619 errno = ENOSYS;
620 return -1;
621#endif
622}
623#endif
624
625/*
626 * print out the writes per second for tests
627 */
628static void
629print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
630{
631 double total_time = (stop_t.tv_sec - start_t.tv_sec) +
632 (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
633 double per_second = ops / total_time;
634 double avg_op_time_us = (total_time / ops) * USECS_SEC;
635
636 printf(_(OPS_FORMAT), per_second, avg_op_time_us);
637}
638
639#ifndef WIN32
640static void
642{
643 alarm_triggered = true;
644}
645#else
646static DWORD WINAPI
647process_alarm(LPVOID param)
648{
649 /* WIN32 doesn't support alarm, so we create a thread and sleep here */
650 Sleep(secs_per_test * 1000);
651 alarm_triggered = true;
652 ExitThread(0);
653}
654#endif
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:758
#define ngettext(s, p, n)
Definition: c.h:1135
#define SIGNAL_ARGS
Definition: c.h:1303
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1168
#define PG_BINARY
Definition: c.h:1227
uint64_t uint64
Definition: c.h:486
int fdatasync(int fildes)
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:429
#define _(x)
Definition: elog.c:90
int pg_fsync_writethrough(int fd)
Definition: fd.c:460
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define required_argument
Definition: getopt_long.h:25
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
static void const char fflush(stdout)
exit(1)
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define DEFAULT_XLOG_SEG_SIZE
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
int32 pg_prng_int32(pg_prng_state *state)
Definition: pg_prng.c:243
void pg_prng_seed(pg_prng_state *state, uint64 seed)
Definition: pg_prng.c:89
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static void test_file_descriptor_sync(void)
int main(int argc, char *argv[])
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
static void prepare_buf(void)
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
#define NA_FORMAT
Definition: pg_test_fsync.c:35
static void test_open_syncs(void)
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:30
static char * filename
Definition: pg_test_fsync.c:73
static void handle_args(int argc, char *argv[])
static int open_direct(const char *path, int flags, mode_t mode)
#define START_TIMER
Definition: pg_test_fsync.c:42
#define STOP_TIMER
Definition: pg_test_fsync.c:60
#define LABEL_FORMAT
Definition: pg_test_fsync.c:34
static int needs_unlink
Definition: pg_test_fsync.c:70
static void test_sync(int writes_per_op)
static void test_open(void)
static void test_non_sync(void)
#define die(msg)
static char * buf
Definition: pg_test_fsync.c:72
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:74
static void test_open_sync(const char *msg, int writes_size)
static const char * progname
Definition: pg_test_fsync.c:67
static void process_alarm(SIGNAL_ARGS)
#define USECS_SEC
Definition: pg_test_fsync.c:38
static sig_atomic_t alarm_triggered
Definition: pg_test_fsync.c:76
#define OPS_FORMAT
Definition: pg_test_fsync.c:37
static unsigned int secs_per_test
Definition: pg_test_fsync.c:69
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:32
static void signal_cleanup(SIGNAL_ARGS)
#define pg_pwrite
Definition: port.h:226
pqsigfunc pqsignal(int signo, pqsigfunc func)
const char * get_progname(const char *argv0)
Definition: path.c:575
#define printf(...)
Definition: port.h:244
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define STDOUT_FILENO
Definition: unistd.h:8
#define fsync(fd)
Definition: win32_port.h:85
#define SIGHUP
Definition: win32_port.h:168
#define S_IRUSR
Definition: win32_port.h:289
#define SIGALRM
Definition: win32_port.h:174
#define S_IWUSR
Definition: win32_port.h:292
#define O_DSYNC
Definition: win32_port.h:352