PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*
2  * pg_test_fsync.c
3  * tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14 
15 #include "access/xlogdefs.h"
16 #include "common/logging.h"
17 #include "getopt_long.h"
18 
19 /*
20  * put the temp files in the local directory
21  * unless the user specifies otherwise
22  */
23 #define FSYNC_FILENAME "./pg_test_fsync.out"
24 
25 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
26 
27 #define LABEL_FORMAT " %-30s"
28 #define NA_FORMAT "%21s\n"
29 /* translator: maintain alignment with NA_FORMAT */
30 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
31 #define USECS_SEC 1000000
32 
33 /* These are macros to avoid timing the function call overhead. */
34 #ifndef WIN32
35 #define START_TIMER \
36 do { \
37  alarm_triggered = false; \
38  alarm(secs_per_test); \
39  gettimeofday(&start_t, NULL); \
40 } while (0)
41 #else
42 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
43 #define START_TIMER \
44 do { \
45  alarm_triggered = false; \
46  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
47  INVALID_HANDLE_VALUE) \
48  { \
49  pg_log_error("could not create thread for alarm"); \
50  exit(1); \
51  } \
52  gettimeofday(&start_t, NULL); \
53 } while (0)
54 #endif
55 
56 #define STOP_TIMER \
57 do { \
58  gettimeofday(&stop_t, NULL); \
59  print_elapse(start_t, stop_t, ops); \
60 } while (0)
61 
62 
63 static const char *progname;
64 
65 static int secs_per_test = 5;
66 static int needs_unlink = 0;
68  *buf,
70 static struct timeval start_t,
71  stop_t;
72 static bool alarm_triggered = false;
73 
74 
75 static void handle_args(int argc, char *argv[]);
76 static void prepare_buf(void);
77 static void test_open(void);
78 static void test_non_sync(void);
79 static void test_sync(int writes_per_op);
80 static void test_open_syncs(void);
81 static void test_open_sync(const char *msg, int writes_size);
82 static void test_file_descriptor_sync(void);
83 
84 #ifndef WIN32
85 static void process_alarm(int sig);
86 #else
87 static DWORD WINAPI process_alarm(LPVOID param);
88 #endif
89 static void signal_cleanup(int sig);
90 
91 #ifdef HAVE_FSYNC_WRITETHROUGH
92 static int pg_fsync_writethrough(int fd);
93 #endif
94 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
95 
96 #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0)
97 
98 
99 int
100 main(int argc, char *argv[])
101 {
102  pg_logging_init(argv[0]);
103  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
104  progname = get_progname(argv[0]);
105 
106  handle_args(argc, argv);
107 
108  /* Prevent leaving behind the test file */
109  pqsignal(SIGINT, signal_cleanup);
110  pqsignal(SIGTERM, signal_cleanup);
111 #ifndef WIN32
113 #endif
114 #ifdef SIGHUP
115  /* Not defined on win32 */
117 #endif
118 
119  prepare_buf();
120 
121  test_open();
122 
123  /* Test using 1 XLOG_BLCKSZ write */
124  test_sync(1);
125 
126  /* Test using 2 XLOG_BLCKSZ writes */
127  test_sync(2);
128 
129  test_open_syncs();
130 
132 
133  test_non_sync();
134 
135  unlink(filename);
136 
137  return 0;
138 }
139 
140 static void
141 handle_args(int argc, char *argv[])
142 {
143  static struct option long_options[] = {
144  {"filename", required_argument, NULL, 'f'},
145  {"secs-per-test", required_argument, NULL, 's'},
146  {NULL, 0, NULL, 0}
147  };
148 
149  int option; /* Command line option */
150  int optindex = 0; /* used by getopt_long */
151 
152  if (argc > 1)
153  {
154  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
155  {
156  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
157  exit(0);
158  }
159  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
160  {
161  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
162  exit(0);
163  }
164  }
165 
166  while ((option = getopt_long(argc, argv, "f:s:",
167  long_options, &optindex)) != -1)
168  {
169  switch (option)
170  {
171  case 'f':
173  break;
174 
175  case 's':
176  secs_per_test = atoi(optarg);
177  break;
178 
179  default:
180  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
181  progname);
182  exit(1);
183  break;
184  }
185  }
186 
187  if (argc > optind)
188  {
189  pg_log_error("too many command-line arguments (first is \"%s\")",
190  argv[optind]);
191  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
192  progname);
193  exit(1);
194  }
195 
196  printf(ngettext("%d second per test\n",
197  "%d seconds per test\n",
198  secs_per_test),
199  secs_per_test);
200 #if PG_O_DIRECT != 0
201  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
202 #else
203  printf(_("Direct I/O is not supported on this platform.\n"));
204 #endif
205 }
206 
207 static void
209 {
210  int ops;
211 
212  /* write random data into buffer */
213  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
214  full_buf[ops] = random();
215 
216  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
217 }
218 
219 static void
221 {
222  int tmpfile;
223 
224  /*
225  * test if we can open the target file
226  */
227  if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
228  die("could not open output file");
229  needs_unlink = 1;
230  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
232  die("write failed");
233 
234  /* fsync now so that dirty buffers don't skew later tests */
235  if (fsync(tmpfile) != 0)
236  die("fsync failed");
237 
238  close(tmpfile);
239 }
240 
241 static void
242 test_sync(int writes_per_op)
243 {
244  int tmpfile,
245  ops,
246  writes;
247  bool fs_warning = false;
248 
249  if (writes_per_op == 1)
250  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
251  else
252  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
253  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
254 
255  /*
256  * Test open_datasync if available
257  */
258  printf(LABEL_FORMAT, "open_datasync");
259  fflush(stdout);
260 
261 #ifdef OPEN_DATASYNC_FLAG
262  if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
263  {
264  printf(NA_FORMAT, _("n/a*"));
265  fs_warning = true;
266  }
267  else
268  {
269  START_TIMER;
270  for (ops = 0; alarm_triggered == false; ops++)
271  {
272  for (writes = 0; writes < writes_per_op; writes++)
273  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
274  die("write failed");
275  if (lseek(tmpfile, 0, SEEK_SET) == -1)
276  die("seek failed");
277  }
278  STOP_TIMER;
279  close(tmpfile);
280  }
281 #else
282  printf(NA_FORMAT, _("n/a"));
283 #endif
284 
285 /*
286  * Test fdatasync if available
287  */
288  printf(LABEL_FORMAT, "fdatasync");
289  fflush(stdout);
290 
291 #ifdef HAVE_FDATASYNC
292  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
293  die("could not open output file");
294  START_TIMER;
295  for (ops = 0; alarm_triggered == false; ops++)
296  {
297  for (writes = 0; writes < writes_per_op; writes++)
298  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
299  die("write failed");
300  fdatasync(tmpfile);
301  if (lseek(tmpfile, 0, SEEK_SET) == -1)
302  die("seek failed");
303  }
304  STOP_TIMER;
305  close(tmpfile);
306 #else
307  printf(NA_FORMAT, _("n/a"));
308 #endif
309 
310 /*
311  * Test fsync
312  */
313  printf(LABEL_FORMAT, "fsync");
314  fflush(stdout);
315 
316  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
317  die("could not open output file");
318  START_TIMER;
319  for (ops = 0; alarm_triggered == false; ops++)
320  {
321  for (writes = 0; writes < writes_per_op; writes++)
322  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
323  die("write failed");
324  if (fsync(tmpfile) != 0)
325  die("fsync failed");
326  if (lseek(tmpfile, 0, SEEK_SET) == -1)
327  die("seek failed");
328  }
329  STOP_TIMER;
330  close(tmpfile);
331 
332 /*
333  * If fsync_writethrough is available, test as well
334  */
335  printf(LABEL_FORMAT, "fsync_writethrough");
336  fflush(stdout);
337 
338 #ifdef HAVE_FSYNC_WRITETHROUGH
339  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
340  die("could not open output file");
341  START_TIMER;
342  for (ops = 0; alarm_triggered == false; ops++)
343  {
344  for (writes = 0; writes < writes_per_op; writes++)
345  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
346  die("write failed");
347  if (pg_fsync_writethrough(tmpfile) != 0)
348  die("fsync failed");
349  if (lseek(tmpfile, 0, SEEK_SET) == -1)
350  die("seek failed");
351  }
352  STOP_TIMER;
353  close(tmpfile);
354 #else
355  printf(NA_FORMAT, _("n/a"));
356 #endif
357 
358 /*
359  * Test open_sync if available
360  */
361  printf(LABEL_FORMAT, "open_sync");
362  fflush(stdout);
363 
364 #ifdef OPEN_SYNC_FLAG
365  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
366  {
367  printf(NA_FORMAT, _("n/a*"));
368  fs_warning = true;
369  }
370  else
371  {
372  START_TIMER;
373  for (ops = 0; alarm_triggered == false; ops++)
374  {
375  for (writes = 0; writes < writes_per_op; writes++)
376  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
377 
378  /*
379  * This can generate write failures if the filesystem has
380  * a large block size, e.g. 4k, and there is no support
381  * for O_DIRECT writes smaller than the file system block
382  * size, e.g. XFS.
383  */
384  die("write failed");
385  if (lseek(tmpfile, 0, SEEK_SET) == -1)
386  die("seek failed");
387  }
388  STOP_TIMER;
389  close(tmpfile);
390  }
391 #else
392  printf(NA_FORMAT, _("n/a"));
393 #endif
394 
395  if (fs_warning)
396  {
397  printf(_("* This file system and its mount options do not support direct\n"
398  " I/O, e.g. ext4 in journaled mode.\n"));
399  }
400 }
401 
402 static void
404 {
405  printf(_("\nCompare open_sync with different write sizes:\n"));
406  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
407  "open_sync sizes.)\n"));
408 
409  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
410  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
411  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
412  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
413  test_open_sync(_("16 * 1kB open_sync writes"), 1);
414 }
415 
416 /*
417  * Test open_sync with different size files
418  */
419 static void
420 test_open_sync(const char *msg, int writes_size)
421 {
422 #ifdef OPEN_SYNC_FLAG
423  int tmpfile,
424  ops,
425  writes;
426 #endif
427 
428  printf(LABEL_FORMAT, msg);
429  fflush(stdout);
430 
431 #ifdef OPEN_SYNC_FLAG
432  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
433  printf(NA_FORMAT, _("n/a*"));
434  else
435  {
436  START_TIMER;
437  for (ops = 0; alarm_triggered == false; ops++)
438  {
439  for (writes = 0; writes < 16 / writes_size; writes++)
440  if (write(tmpfile, buf, writes_size * 1024) !=
441  writes_size * 1024)
442  die("write failed");
443  if (lseek(tmpfile, 0, SEEK_SET) == -1)
444  die("seek failed");
445  }
446  STOP_TIMER;
447  close(tmpfile);
448  }
449 #else
450  printf(NA_FORMAT, _("n/a"));
451 #endif
452 }
453 
454 static void
456 {
457  int tmpfile,
458  ops;
459 
460  /*
461  * Test whether fsync can sync data written on a different descriptor for
462  * the same file. This checks the efficiency of multi-process fsyncs
463  * against the same file. Possibly this should be done with writethrough
464  * on platforms which support it.
465  */
466  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
467  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
468  "descriptor.)\n"));
469 
470  /*
471  * first write, fsync and close, which is the normal behavior without
472  * multiple descriptors
473  */
474  printf(LABEL_FORMAT, "write, fsync, close");
475  fflush(stdout);
476 
477  START_TIMER;
478  for (ops = 0; alarm_triggered == false; ops++)
479  {
480  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
481  die("could not open output file");
482  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
483  die("write failed");
484  if (fsync(tmpfile) != 0)
485  die("fsync failed");
486  close(tmpfile);
487 
488  /*
489  * open and close the file again to be consistent with the following
490  * test
491  */
492  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
493  die("could not open output file");
494  close(tmpfile);
495  }
496  STOP_TIMER;
497 
498  /*
499  * Now open, write, close, open again and fsync This simulates processes
500  * fsyncing each other's writes.
501  */
502  printf(LABEL_FORMAT, "write, close, fsync");
503  fflush(stdout);
504 
505  START_TIMER;
506  for (ops = 0; alarm_triggered == false; ops++)
507  {
508  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
509  die("could not open output file");
510  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
511  die("write failed");
512  close(tmpfile);
513  /* reopen file */
514  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
515  die("could not open output file");
516  if (fsync(tmpfile) != 0)
517  die("fsync failed");
518  close(tmpfile);
519  }
520  STOP_TIMER;
521 }
522 
523 static void
525 {
526  int tmpfile,
527  ops;
528 
529  /*
530  * Test a simple write without fsync
531  */
532  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
533  printf(LABEL_FORMAT, "write");
534  fflush(stdout);
535 
536  START_TIMER;
537  for (ops = 0; alarm_triggered == false; ops++)
538  {
539  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
540  die("could not open output file");
541  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
542  die("write failed");
543  close(tmpfile);
544  }
545  STOP_TIMER;
546 }
547 
548 static void
549 signal_cleanup(int signum)
550 {
551  /* Delete the file if it exists. Ignore errors */
552  if (needs_unlink)
553  unlink(filename);
554  /* Finish incomplete line on stdout */
555  puts("");
556  exit(signum);
557 }
558 
559 #ifdef HAVE_FSYNC_WRITETHROUGH
560 
561 static int
563 {
564 #ifdef WIN32
565  return _commit(fd);
566 #elif defined(F_FULLFSYNC)
567  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
568 #else
569  errno = ENOSYS;
570  return -1;
571 #endif
572 }
573 #endif
574 
575 /*
576  * print out the writes per second for tests
577  */
578 static void
579 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
580 {
581  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
582  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
583  double per_second = ops / total_time;
584  double avg_op_time_us = (total_time / ops) * USECS_SEC;
585 
586  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
587 }
588 
589 #ifndef WIN32
590 static void
592 {
593  alarm_triggered = true;
594 }
595 #else
596 static DWORD WINAPI
597 process_alarm(LPVOID param)
598 {
599  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
600  Sleep(secs_per_test * 1000);
601  alarm_triggered = true;
602  ExitThread(0);
603 }
604 #endif
#define PG_O_DIRECT
Definition: xlogdefs.h:72
static void test_sync(int writes_per_op)
static int secs_per_test
Definition: pg_test_fsync.c:65
static char * filename
Definition: pg_test_fsync.c:69
#define write(a, b, c)
Definition: win32.h:14
const char * get_progname(const char *argv0)
Definition: path.c:453
#define pg_log_error(...)
Definition: logging.h:79
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
long random(void)
Definition: random.c:22
int pg_fsync_writethrough(int fd)
Definition: fd.c:359
void pg_logging_init(const char *argv0)
Definition: logging.c:39
#define USECS_SEC
Definition: pg_test_fsync.c:31
#define DEFAULT_XLOG_SEG_SIZE
#define printf(...)
Definition: port.h:198
#define fprintf
Definition: port.h:196
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:67
static void test_open(void)
static bool alarm_triggered
Definition: pg_test_fsync.c:72
static void prepare_buf(void)
#define required_argument
Definition: getopt_long.h:25
#define fsync(fd)
Definition: win32_port.h:63
int optind
Definition: getopt.c:50
#define S_IWUSR
Definition: win32_port.h:263
static const char * progname
Definition: pg_test_fsync.c:63
static char * buf
Definition: pg_test_fsync.c:67
#define SIGHUP
Definition: win32_port.h:154
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
#define NA_FORMAT
Definition: pg_test_fsync.c:28
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:25
#define STOP_TIMER
Definition: pg_test_fsync.c:56
#define ngettext(s, p, n)
Definition: c.h:1128
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1160
#define O_DSYNC
Definition: win32_port.h:308
#define LABEL_FORMAT
Definition: pg_test_fsync.c:27
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
static void test_open_syncs(void)
static void test_non_sync(void)
static int sig
Definition: pg_ctl.c:84
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:70
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:679
static void process_alarm(int sig)
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
static void signal_cleanup(int sig)
int main(int argc, char *argv[])
#define START_TIMER
Definition: pg_test_fsync.c:35
#define OPS_FORMAT
Definition: pg_test_fsync.c:30
#define SIGALRM
Definition: win32_port.h:160
#define S_IRUSR
Definition: win32_port.h:260
static int needs_unlink
Definition: pg_test_fsync.c:66
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:565
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:23
char * optarg
Definition: getopt.c:52
static void test_file_descriptor_sync(void)
#define close(a)
Definition: win32.h:12
#define _(x)
Definition: elog.c:87
#define die(msg)
Definition: pg_test_fsync.c:96
static void test_open_sync(const char *msg, int writes_size)
static void handle_args(int argc, char *argv[])