PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*
2  * pg_test_fsync.c
3  * tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14 
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17 
18 
19 /*
20  * put the temp files in the local directory
21  * unless the user specifies otherwise
22  */
23 #define FSYNC_FILENAME "./pg_test_fsync.out"
24 
25 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
26 
27 #define LABEL_FORMAT " %-30s"
28 #define NA_FORMAT "%21s\n"
29 /* translator: maintain alignment with NA_FORMAT */
30 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
31 #define USECS_SEC 1000000
32 
33 /* These are macros to avoid timing the function call overhead. */
34 #ifndef WIN32
35 #define START_TIMER \
36 do { \
37  alarm_triggered = false; \
38  alarm(secs_per_test); \
39  gettimeofday(&start_t, NULL); \
40 } while (0)
41 #else
42 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
43 #define START_TIMER \
44 do { \
45  alarm_triggered = false; \
46  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
47  INVALID_HANDLE_VALUE) \
48  { \
49  fprintf(stderr, _("Could not create thread for alarm\n")); \
50  exit(1); \
51  } \
52  gettimeofday(&start_t, NULL); \
53 } while (0)
54 #endif
55 
56 #define STOP_TIMER \
57 do { \
58  gettimeofday(&stop_t, NULL); \
59  print_elapse(start_t, stop_t, ops); \
60 } while (0)
61 
62 
63 static const char *progname;
64 
65 static int secs_per_test = 5;
66 static int needs_unlink = 0;
68  *buf,
70 static struct timeval start_t,
71  stop_t;
72 static bool alarm_triggered = false;
73 
74 
75 static void handle_args(int argc, char *argv[]);
76 static void prepare_buf(void);
77 static void test_open(void);
78 static void test_non_sync(void);
79 static void test_sync(int writes_per_op);
80 static void test_open_syncs(void);
81 static void test_open_sync(const char *msg, int writes_size);
82 static void test_file_descriptor_sync(void);
83 
84 #ifndef WIN32
85 static void process_alarm(int sig);
86 #else
87 static DWORD WINAPI process_alarm(LPVOID param);
88 #endif
89 static void signal_cleanup(int sig);
90 
91 #ifdef HAVE_FSYNC_WRITETHROUGH
92 static int pg_fsync_writethrough(int fd);
93 #endif
94 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
95 static void die(const char *str);
96 
97 
98 int
99 main(int argc, char *argv[])
100 {
101  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
102  progname = get_progname(argv[0]);
103 
104  handle_args(argc, argv);
105 
106  /* Prevent leaving behind the test file */
107  pqsignal(SIGINT, signal_cleanup);
108  pqsignal(SIGTERM, signal_cleanup);
109 #ifndef WIN32
111 #endif
112 #ifdef SIGHUP
113  /* Not defined on win32 */
115 #endif
116 
117  prepare_buf();
118 
119  test_open();
120 
121  /* Test using 1 XLOG_BLCKSZ write */
122  test_sync(1);
123 
124  /* Test using 2 XLOG_BLCKSZ writes */
125  test_sync(2);
126 
127  test_open_syncs();
128 
130 
131  test_non_sync();
132 
133  unlink(filename);
134 
135  return 0;
136 }
137 
138 static void
139 handle_args(int argc, char *argv[])
140 {
141  static struct option long_options[] = {
142  {"filename", required_argument, NULL, 'f'},
143  {"secs-per-test", required_argument, NULL, 's'},
144  {NULL, 0, NULL, 0}
145  };
146 
147  int option; /* Command line option */
148  int optindex = 0; /* used by getopt_long */
149 
150  if (argc > 1)
151  {
152  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
153  {
154  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
155  exit(0);
156  }
157  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
158  {
159  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
160  exit(0);
161  }
162  }
163 
164  while ((option = getopt_long(argc, argv, "f:s:",
165  long_options, &optindex)) != -1)
166  {
167  switch (option)
168  {
169  case 'f':
170  filename = strdup(optarg);
171  break;
172 
173  case 's':
174  secs_per_test = atoi(optarg);
175  break;
176 
177  default:
178  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
179  progname);
180  exit(1);
181  break;
182  }
183  }
184 
185  if (argc > optind)
186  {
187  fprintf(stderr,
188  _("%s: too many command-line arguments (first is \"%s\")\n"),
189  progname, argv[optind]);
190  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
191  progname);
192  exit(1);
193  }
194 
195  printf(ngettext("%d second per test\n",
196  "%d seconds per test\n",
197  secs_per_test),
198  secs_per_test);
199 #if PG_O_DIRECT != 0
200  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
201 #else
202  printf(_("Direct I/O is not supported on this platform.\n"));
203 #endif
204 }
205 
206 static void
208 {
209  int ops;
210 
211  /* write random data into buffer */
212  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
213  full_buf[ops] = random();
214 
215  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
216 }
217 
218 static void
220 {
221  int tmpfile;
222 
223  /*
224  * test if we can open the target file
225  */
226  if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
227  die("could not open output file");
228  needs_unlink = 1;
229  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
231  die("write failed");
232 
233  /* fsync now so that dirty buffers don't skew later tests */
234  if (fsync(tmpfile) != 0)
235  die("fsync failed");
236 
237  close(tmpfile);
238 }
239 
240 static void
241 test_sync(int writes_per_op)
242 {
243  int tmpfile,
244  ops,
245  writes;
246  bool fs_warning = false;
247 
248  if (writes_per_op == 1)
249  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
250  else
251  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
252  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
253 
254  /*
255  * Test open_datasync if available
256  */
257  printf(LABEL_FORMAT, "open_datasync");
258  fflush(stdout);
259 
260 #ifdef OPEN_DATASYNC_FLAG
261  if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
262  {
263  printf(NA_FORMAT, _("n/a*"));
264  fs_warning = true;
265  }
266  else
267  {
268  START_TIMER;
269  for (ops = 0; alarm_triggered == false; ops++)
270  {
271  for (writes = 0; writes < writes_per_op; writes++)
272  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
273  die("write failed");
274  if (lseek(tmpfile, 0, SEEK_SET) == -1)
275  die("seek failed");
276  }
277  STOP_TIMER;
278  close(tmpfile);
279  }
280 #else
281  printf(NA_FORMAT, _("n/a"));
282 #endif
283 
284 /*
285  * Test fdatasync if available
286  */
287  printf(LABEL_FORMAT, "fdatasync");
288  fflush(stdout);
289 
290 #ifdef HAVE_FDATASYNC
291  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
292  die("could not open output file");
293  START_TIMER;
294  for (ops = 0; alarm_triggered == false; ops++)
295  {
296  for (writes = 0; writes < writes_per_op; writes++)
297  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
298  die("write failed");
299  fdatasync(tmpfile);
300  if (lseek(tmpfile, 0, SEEK_SET) == -1)
301  die("seek failed");
302  }
303  STOP_TIMER;
304  close(tmpfile);
305 #else
306  printf(NA_FORMAT, _("n/a"));
307 #endif
308 
309 /*
310  * Test fsync
311  */
312  printf(LABEL_FORMAT, "fsync");
313  fflush(stdout);
314 
315  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
316  die("could not open output file");
317  START_TIMER;
318  for (ops = 0; alarm_triggered == false; ops++)
319  {
320  for (writes = 0; writes < writes_per_op; writes++)
321  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
322  die("write failed");
323  if (fsync(tmpfile) != 0)
324  die("fsync failed");
325  if (lseek(tmpfile, 0, SEEK_SET) == -1)
326  die("seek failed");
327  }
328  STOP_TIMER;
329  close(tmpfile);
330 
331 /*
332  * If fsync_writethrough is available, test as well
333  */
334  printf(LABEL_FORMAT, "fsync_writethrough");
335  fflush(stdout);
336 
337 #ifdef HAVE_FSYNC_WRITETHROUGH
338  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
339  die("could not open output file");
340  START_TIMER;
341  for (ops = 0; alarm_triggered == false; ops++)
342  {
343  for (writes = 0; writes < writes_per_op; writes++)
344  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
345  die("write failed");
346  if (pg_fsync_writethrough(tmpfile) != 0)
347  die("fsync failed");
348  if (lseek(tmpfile, 0, SEEK_SET) == -1)
349  die("seek failed");
350  }
351  STOP_TIMER;
352  close(tmpfile);
353 #else
354  printf(NA_FORMAT, _("n/a"));
355 #endif
356 
357 /*
358  * Test open_sync if available
359  */
360  printf(LABEL_FORMAT, "open_sync");
361  fflush(stdout);
362 
363 #ifdef OPEN_SYNC_FLAG
364  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
365  {
366  printf(NA_FORMAT, _("n/a*"));
367  fs_warning = true;
368  }
369  else
370  {
371  START_TIMER;
372  for (ops = 0; alarm_triggered == false; ops++)
373  {
374  for (writes = 0; writes < writes_per_op; writes++)
375  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
376 
377  /*
378  * This can generate write failures if the filesystem has
379  * a large block size, e.g. 4k, and there is no support
380  * for O_DIRECT writes smaller than the file system block
381  * size, e.g. XFS.
382  */
383  die("write failed");
384  if (lseek(tmpfile, 0, SEEK_SET) == -1)
385  die("seek failed");
386  }
387  STOP_TIMER;
388  close(tmpfile);
389  }
390 #else
391  printf(NA_FORMAT, _("n/a"));
392 #endif
393 
394  if (fs_warning)
395  {
396  printf(_("* This file system and its mount options do not support direct\n"
397  " I/O, e.g. ext4 in journaled mode.\n"));
398  }
399 }
400 
401 static void
403 {
404  printf(_("\nCompare open_sync with different write sizes:\n"));
405  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
406  "open_sync sizes.)\n"));
407 
408  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
409  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
410  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
411  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
412  test_open_sync(_("16 * 1kB open_sync writes"), 1);
413 }
414 
415 /*
416  * Test open_sync with different size files
417  */
418 static void
419 test_open_sync(const char *msg, int writes_size)
420 {
421 #ifdef OPEN_SYNC_FLAG
422  int tmpfile,
423  ops,
424  writes;
425 #endif
426 
427  printf(LABEL_FORMAT, msg);
428  fflush(stdout);
429 
430 #ifdef OPEN_SYNC_FLAG
431  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
432  printf(NA_FORMAT, _("n/a*"));
433  else
434  {
435  START_TIMER;
436  for (ops = 0; alarm_triggered == false; ops++)
437  {
438  for (writes = 0; writes < 16 / writes_size; writes++)
439  if (write(tmpfile, buf, writes_size * 1024) !=
440  writes_size * 1024)
441  die("write failed");
442  if (lseek(tmpfile, 0, SEEK_SET) == -1)
443  die("seek failed");
444  }
445  STOP_TIMER;
446  close(tmpfile);
447  }
448 #else
449  printf(NA_FORMAT, _("n/a"));
450 #endif
451 }
452 
453 static void
455 {
456  int tmpfile,
457  ops;
458 
459  /*
460  * Test whether fsync can sync data written on a different descriptor for
461  * the same file. This checks the efficiency of multi-process fsyncs
462  * against the same file. Possibly this should be done with writethrough
463  * on platforms which support it.
464  */
465  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
466  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
467  "descriptor.)\n"));
468 
469  /*
470  * first write, fsync and close, which is the normal behavior without
471  * multiple descriptors
472  */
473  printf(LABEL_FORMAT, "write, fsync, close");
474  fflush(stdout);
475 
476  START_TIMER;
477  for (ops = 0; alarm_triggered == false; ops++)
478  {
479  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
480  die("could not open output file");
481  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
482  die("write failed");
483  if (fsync(tmpfile) != 0)
484  die("fsync failed");
485  close(tmpfile);
486 
487  /*
488  * open and close the file again to be consistent with the following
489  * test
490  */
491  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
492  die("could not open output file");
493  close(tmpfile);
494  }
495  STOP_TIMER;
496 
497  /*
498  * Now open, write, close, open again and fsync This simulates processes
499  * fsyncing each other's writes.
500  */
501  printf(LABEL_FORMAT, "write, close, fsync");
502  fflush(stdout);
503 
504  START_TIMER;
505  for (ops = 0; alarm_triggered == false; ops++)
506  {
507  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
508  die("could not open output file");
509  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
510  die("write failed");
511  close(tmpfile);
512  /* reopen file */
513  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
514  die("could not open output file");
515  if (fsync(tmpfile) != 0)
516  die("fsync failed");
517  close(tmpfile);
518  }
519  STOP_TIMER;
520 }
521 
522 static void
524 {
525  int tmpfile,
526  ops;
527 
528  /*
529  * Test a simple write without fsync
530  */
531  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
532  printf(LABEL_FORMAT, "write");
533  fflush(stdout);
534 
535  START_TIMER;
536  for (ops = 0; alarm_triggered == false; ops++)
537  {
538  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
539  die("could not open output file");
540  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
541  die("write failed");
542  close(tmpfile);
543  }
544  STOP_TIMER;
545 }
546 
547 static void
548 signal_cleanup(int signum)
549 {
550  /* Delete the file if it exists. Ignore errors */
551  if (needs_unlink)
552  unlink(filename);
553  /* Finish incomplete line on stdout */
554  puts("");
555  exit(signum);
556 }
557 
558 #ifdef HAVE_FSYNC_WRITETHROUGH
559 
560 static int
562 {
563 #ifdef WIN32
564  return _commit(fd);
565 #elif defined(F_FULLFSYNC)
566  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
567 #else
568  errno = ENOSYS;
569  return -1;
570 #endif
571 }
572 #endif
573 
574 /*
575  * print out the writes per second for tests
576  */
577 static void
578 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
579 {
580  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
581  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
582  double per_second = ops / total_time;
583  double avg_op_time_us = (total_time / ops) * USECS_SEC;
584 
585  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
586 }
587 
588 #ifndef WIN32
589 static void
591 {
592  alarm_triggered = true;
593 }
594 #else
595 static DWORD WINAPI
596 process_alarm(LPVOID param)
597 {
598  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
599  Sleep(secs_per_test * 1000);
600  alarm_triggered = true;
601  ExitThread(0);
602 }
603 #endif
604 
605 static void
606 die(const char *str)
607 {
608  fprintf(stderr, _("%s: %s\n"), _(str), strerror(errno));
609  exit(1);
610 }
#define PG_O_DIRECT
Definition: xlogdefs.h:65
static void test_sync(int writes_per_op)
static int secs_per_test
Definition: pg_test_fsync.c:65
static char * filename
Definition: pg_test_fsync.c:69
#define write(a, b, c)
Definition: win32.h:14
const char * get_progname(const char *argv0)
Definition: path.c:453
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
long random(void)
Definition: random.c:22
int pg_fsync_writethrough(int fd)
Definition: fd.c:367
#define USECS_SEC
Definition: pg_test_fsync.c:31
#define DEFAULT_XLOG_SEG_SIZE
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:67
static void test_open(void)
static void die(const char *str)
static bool alarm_triggered
Definition: pg_test_fsync.c:72
static void prepare_buf(void)
#define required_argument
Definition: getopt_long.h:25
#define fsync(fd)
Definition: win32_port.h:63
int optind
Definition: getopt.c:51
#define S_IWUSR
Definition: win32_port.h:274
static const char * progname
Definition: pg_test_fsync.c:63
static char * buf
Definition: pg_test_fsync.c:67
#define SIGHUP
Definition: win32_port.h:163
#define NA_FORMAT
Definition: pg_test_fsync.c:28
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:25
#define STOP_TIMER
Definition: pg_test_fsync.c:56
#define ngettext(s, p, n)
Definition: c.h:967
#define PG_TEXTDOMAIN(domain)
Definition: c.h:999
#define O_DSYNC
Definition: win32_port.h:319
#define LABEL_FORMAT
Definition: pg_test_fsync.c:27
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
static void test_open_syncs(void)
static void test_non_sync(void)
static int sig
Definition: pg_ctl.c:80
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:70
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:616
static void process_alarm(int sig)
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:168
static void signal_cleanup(int sig)
int main(int argc, char *argv[])
Definition: pg_test_fsync.c:99
#define START_TIMER
Definition: pg_test_fsync.c:35
#define OPS_FORMAT
Definition: pg_test_fsync.c:30
#define SIGALRM
Definition: win32_port.h:169
#define S_IRUSR
Definition: win32_port.h:271
static int needs_unlink
Definition: pg_test_fsync.c:66
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:550
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:23
char * optarg
Definition: getopt.c:53
const char * strerror(int errnum)
Definition: strerror.c:19
static void test_file_descriptor_sync(void)
#define close(a)
Definition: win32.h:12
#define _(x)
Definition: elog.c:84
static void test_open_sync(const char *msg, int writes_size)
static void handle_args(int argc, char *argv[])