PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pg_test_fsync.c
Go to the documentation of this file.
1 /*
2  * pg_test_fsync.c
3  * tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14 
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17 
18 
19 /*
20  * put the temp files in the local directory
21  * unless the user specifies otherwise
22  */
23 #define FSYNC_FILENAME "./pg_test_fsync.out"
24 
25 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
26 
27 #define LABEL_FORMAT " %-30s"
28 #define NA_FORMAT "%20s"
29 #define OPS_FORMAT "%13.3f ops/sec %6.0f usecs/op"
30 #define USECS_SEC 1000000
31 
32 /* These are macros to avoid timing the function call overhead. */
33 #ifndef WIN32
34 #define START_TIMER \
35 do { \
36  alarm_triggered = false; \
37  alarm(secs_per_test); \
38  gettimeofday(&start_t, NULL); \
39 } while (0)
40 #else
41 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
42 #define START_TIMER \
43 do { \
44  alarm_triggered = false; \
45  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
46  INVALID_HANDLE_VALUE) \
47  { \
48  fprintf(stderr, _("Cannot create thread for alarm\n")); \
49  exit(1); \
50  } \
51  gettimeofday(&start_t, NULL); \
52 } while (0)
53 #endif
54 
55 #define STOP_TIMER \
56 do { \
57  gettimeofday(&stop_t, NULL); \
58  print_elapse(start_t, stop_t, ops); \
59 } while (0)
60 
61 
62 static const char *progname;
63 
64 static int secs_per_test = 5;
65 static int needs_unlink = 0;
66 static char full_buf[XLOG_SEG_SIZE],
67  *buf,
69 static struct timeval start_t,
70  stop_t;
71 static bool alarm_triggered = false;
72 
73 
74 static void handle_args(int argc, char *argv[]);
75 static void prepare_buf(void);
76 static void test_open(void);
77 static void test_non_sync(void);
78 static void test_sync(int writes_per_op);
79 static void test_open_syncs(void);
80 static void test_open_sync(const char *msg, int writes_size);
81 static void test_file_descriptor_sync(void);
82 
83 #ifndef WIN32
84 static void process_alarm(int sig);
85 #else
86 static DWORD WINAPI process_alarm(LPVOID param);
87 #endif
88 static void signal_cleanup(int sig);
89 
90 #ifdef HAVE_FSYNC_WRITETHROUGH
91 static int pg_fsync_writethrough(int fd);
92 #endif
93 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
94 static void die(const char *str);
95 
96 
97 int
98 main(int argc, char *argv[])
99 {
100  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
101  progname = get_progname(argv[0]);
102 
103  handle_args(argc, argv);
104 
105  /* Prevent leaving behind the test file */
106  pqsignal(SIGINT, signal_cleanup);
107  pqsignal(SIGTERM, signal_cleanup);
108 #ifndef WIN32
110 #endif
111 #ifdef SIGHUP
112  /* Not defined on win32 */
114 #endif
115 
116  prepare_buf();
117 
118  test_open();
119 
120  /* Test using 1 XLOG_BLCKSZ write */
121  test_sync(1);
122 
123  /* Test using 2 XLOG_BLCKSZ writes */
124  test_sync(2);
125 
126  test_open_syncs();
127 
129 
130  test_non_sync();
131 
132  unlink(filename);
133 
134  return 0;
135 }
136 
137 static void
138 handle_args(int argc, char *argv[])
139 {
140  static struct option long_options[] = {
141  {"filename", required_argument, NULL, 'f'},
142  {"secs-per-test", required_argument, NULL, 's'},
143  {NULL, 0, NULL, 0}
144  };
145 
146  int option; /* Command line option */
147  int optindex = 0; /* used by getopt_long */
148 
149  if (argc > 1)
150  {
151  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
152  {
153  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
154  exit(0);
155  }
156  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
157  {
158  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
159  exit(0);
160  }
161  }
162 
163  while ((option = getopt_long(argc, argv, "f:s:",
164  long_options, &optindex)) != -1)
165  {
166  switch (option)
167  {
168  case 'f':
169  filename = strdup(optarg);
170  break;
171 
172  case 's':
173  secs_per_test = atoi(optarg);
174  break;
175 
176  default:
177  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
178  progname);
179  exit(1);
180  break;
181  }
182  }
183 
184  if (argc > optind)
185  {
186  fprintf(stderr,
187  _("%s: too many command-line arguments (first is \"%s\")\n"),
188  progname, argv[optind]);
189  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
190  progname);
191  exit(1);
192  }
193 
194  printf(_("%d seconds per test\n"), secs_per_test);
195 #if PG_O_DIRECT != 0
196  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
197 #else
198  printf(_("Direct I/O is not supported on this platform.\n"));
199 #endif
200 }
201 
202 static void
204 {
205  int ops;
206 
207  /* write random data into buffer */
208  for (ops = 0; ops < XLOG_SEG_SIZE; ops++)
209  full_buf[ops] = random();
210 
211  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
212 }
213 
214 static void
216 {
217  int tmpfile;
218 
219  /*
220  * test if we can open the target file
221  */
222  if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
223  die("could not open output file");
224  needs_unlink = 1;
225  if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
226  die("write failed");
227 
228  /* fsync now so that dirty buffers don't skew later tests */
229  if (fsync(tmpfile) != 0)
230  die("fsync failed");
231 
232  close(tmpfile);
233 }
234 
235 static void
236 test_sync(int writes_per_op)
237 {
238  int tmpfile,
239  ops,
240  writes;
241  bool fs_warning = false;
242 
243  if (writes_per_op == 1)
244  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
245  else
246  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
247  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
248 
249  /*
250  * Test open_datasync if available
251  */
252  printf(LABEL_FORMAT, "open_datasync");
253  fflush(stdout);
254 
255 #ifdef OPEN_DATASYNC_FLAG
256  if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
257  {
258  printf(NA_FORMAT, _("n/a*\n"));
259  fs_warning = true;
260  }
261  else
262  {
263  START_TIMER;
264  for (ops = 0; alarm_triggered == false; ops++)
265  {
266  for (writes = 0; writes < writes_per_op; writes++)
267  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
268  die("write failed");
269  if (lseek(tmpfile, 0, SEEK_SET) == -1)
270  die("seek failed");
271  }
272  STOP_TIMER;
273  close(tmpfile);
274  }
275 #else
276  printf(NA_FORMAT, _("n/a\n"));
277 #endif
278 
279 /*
280  * Test fdatasync if available
281  */
282  printf(LABEL_FORMAT, "fdatasync");
283  fflush(stdout);
284 
285 #ifdef HAVE_FDATASYNC
286  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
287  die("could not open output file");
288  START_TIMER;
289  for (ops = 0; alarm_triggered == false; ops++)
290  {
291  for (writes = 0; writes < writes_per_op; writes++)
292  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
293  die("write failed");
294  fdatasync(tmpfile);
295  if (lseek(tmpfile, 0, SEEK_SET) == -1)
296  die("seek failed");
297  }
298  STOP_TIMER;
299  close(tmpfile);
300 #else
301  printf(NA_FORMAT, _("n/a\n"));
302 #endif
303 
304 /*
305  * Test fsync
306  */
307  printf(LABEL_FORMAT, "fsync");
308  fflush(stdout);
309 
310  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
311  die("could not open output file");
312  START_TIMER;
313  for (ops = 0; alarm_triggered == false; ops++)
314  {
315  for (writes = 0; writes < writes_per_op; writes++)
316  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
317  die("write failed");
318  if (fsync(tmpfile) != 0)
319  die("fsync failed");
320  if (lseek(tmpfile, 0, SEEK_SET) == -1)
321  die("seek failed");
322  }
323  STOP_TIMER;
324  close(tmpfile);
325 
326 /*
327  * If fsync_writethrough is available, test as well
328  */
329  printf(LABEL_FORMAT, "fsync_writethrough");
330  fflush(stdout);
331 
332 #ifdef HAVE_FSYNC_WRITETHROUGH
333  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
334  die("could not open output file");
335  START_TIMER;
336  for (ops = 0; alarm_triggered == false; ops++)
337  {
338  for (writes = 0; writes < writes_per_op; writes++)
339  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
340  die("write failed");
341  if (pg_fsync_writethrough(tmpfile) != 0)
342  die("fsync failed");
343  if (lseek(tmpfile, 0, SEEK_SET) == -1)
344  die("seek failed");
345  }
346  STOP_TIMER;
347  close(tmpfile);
348 #else
349  printf(NA_FORMAT, _("n/a\n"));
350 #endif
351 
352 /*
353  * Test open_sync if available
354  */
355  printf(LABEL_FORMAT, "open_sync");
356  fflush(stdout);
357 
358 #ifdef OPEN_SYNC_FLAG
359  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
360  {
361  printf(NA_FORMAT, _("n/a*\n"));
362  fs_warning = true;
363  }
364  else
365  {
366  START_TIMER;
367  for (ops = 0; alarm_triggered == false; ops++)
368  {
369  for (writes = 0; writes < writes_per_op; writes++)
370  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
371 
372  /*
373  * This can generate write failures if the filesystem has
374  * a large block size, e.g. 4k, and there is no support
375  * for O_DIRECT writes smaller than the file system block
376  * size, e.g. XFS.
377  */
378  die("write failed");
379  if (lseek(tmpfile, 0, SEEK_SET) == -1)
380  die("seek failed");
381  }
382  STOP_TIMER;
383  close(tmpfile);
384  }
385 #else
386  printf(NA_FORMAT, _("n/a\n"));
387 #endif
388 
389  if (fs_warning)
390  {
391  printf(_("* This file system and its mount options do not support direct\n"
392  " I/O, e.g. ext4 in journaled mode.\n"));
393  }
394 }
395 
396 static void
398 {
399  printf(_("\nCompare open_sync with different write sizes:\n"));
400  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
401  "open_sync sizes.)\n"));
402 
403  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
404  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
405  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
406  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
407  test_open_sync(_("16 * 1kB open_sync writes"), 1);
408 }
409 
410 /*
411  * Test open_sync with different size files
412  */
413 static void
414 test_open_sync(const char *msg, int writes_size)
415 {
416 #ifdef OPEN_SYNC_FLAG
417  int tmpfile,
418  ops,
419  writes;
420 #endif
421 
422  printf(LABEL_FORMAT, msg);
423  fflush(stdout);
424 
425 #ifdef OPEN_SYNC_FLAG
426  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
427  printf(NA_FORMAT, _("n/a*\n"));
428  else
429  {
430  START_TIMER;
431  for (ops = 0; alarm_triggered == false; ops++)
432  {
433  for (writes = 0; writes < 16 / writes_size; writes++)
434  if (write(tmpfile, buf, writes_size * 1024) !=
435  writes_size * 1024)
436  die("write failed");
437  if (lseek(tmpfile, 0, SEEK_SET) == -1)
438  die("seek failed");
439  }
440  STOP_TIMER;
441  close(tmpfile);
442  }
443 #else
444  printf(NA_FORMAT, _("n/a\n"));
445 #endif
446 }
447 
448 static void
450 {
451  int tmpfile,
452  ops;
453 
454  /*
455  * Test whether fsync can sync data written on a different descriptor for
456  * the same file. This checks the efficiency of multi-process fsyncs
457  * against the same file. Possibly this should be done with writethrough
458  * on platforms which support it.
459  */
460  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
461  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
462  "descriptor.)\n"));
463 
464  /*
465  * first write, fsync and close, which is the normal behavior without
466  * multiple descriptors
467  */
468  printf(LABEL_FORMAT, "write, fsync, close");
469  fflush(stdout);
470 
471  START_TIMER;
472  for (ops = 0; alarm_triggered == false; ops++)
473  {
474  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
475  die("could not open output file");
476  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
477  die("write failed");
478  if (fsync(tmpfile) != 0)
479  die("fsync failed");
480  close(tmpfile);
481 
482  /*
483  * open and close the file again to be consistent with the following
484  * test
485  */
486  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
487  die("could not open output file");
488  close(tmpfile);
489  }
490  STOP_TIMER;
491 
492  /*
493  * Now open, write, close, open again and fsync This simulates processes
494  * fsyncing each other's writes.
495  */
496  printf(LABEL_FORMAT, "write, close, fsync");
497  fflush(stdout);
498 
499  START_TIMER;
500  for (ops = 0; alarm_triggered == false; ops++)
501  {
502  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
503  die("could not open output file");
504  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
505  die("write failed");
506  close(tmpfile);
507  /* reopen file */
508  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
509  die("could not open output file");
510  if (fsync(tmpfile) != 0)
511  die("fsync failed");
512  close(tmpfile);
513  }
514  STOP_TIMER;
515 }
516 
517 static void
519 {
520  int tmpfile,
521  ops;
522 
523  /*
524  * Test a simple write without fsync
525  */
526  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
527  printf(LABEL_FORMAT, "write");
528  fflush(stdout);
529 
530  START_TIMER;
531  for (ops = 0; alarm_triggered == false; ops++)
532  {
533  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
534  die("could not open output file");
535  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
536  die("write failed");
537  close(tmpfile);
538  }
539  STOP_TIMER;
540 }
541 
542 static void
543 signal_cleanup(int signum)
544 {
545  /* Delete the file if it exists. Ignore errors */
546  if (needs_unlink)
547  unlink(filename);
548  /* Finish incomplete line on stdout */
549  puts("");
550  exit(signum);
551 }
552 
553 #ifdef HAVE_FSYNC_WRITETHROUGH
554 
555 static int
557 {
558 #ifdef WIN32
559  return _commit(fd);
560 #elif defined(F_FULLFSYNC)
561  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
562 #else
563  errno = ENOSYS;
564  return -1;
565 #endif
566 }
567 #endif
568 
569 /*
570  * print out the writes per second for tests
571  */
572 static void
573 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
574 {
575  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
576  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
577  double per_second = ops / total_time;
578  double avg_op_time_us = (total_time / ops) * USECS_SEC;
579 
580  printf(OPS_FORMAT "\n", per_second, avg_op_time_us);
581 }
582 
583 #ifndef WIN32
584 static void
586 {
587  alarm_triggered = true;
588 }
589 #else
590 static DWORD WINAPI
591 process_alarm(LPVOID param)
592 {
593  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
594  Sleep(secs_per_test * 1000);
595  alarm_triggered = true;
596  ExitThread(0);
597 }
598 #endif
599 
600 static void
601 die(const char *str)
602 {
603  fprintf(stderr, _("%s: %s\n"), _(str), strerror(errno));
604  exit(1);
605 }
#define PG_O_DIRECT
Definition: xlogdefs.h:65
static void test_sync(int writes_per_op)
static char full_buf[XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:66
static int secs_per_test
Definition: pg_test_fsync.c:64
static char * filename
Definition: pg_test_fsync.c:68
#define write(a, b, c)
Definition: win32.h:14
const char * get_progname(const char *argv0)
Definition: path.c:453
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
long random(void)
Definition: random.c:22
int pg_fsync_writethrough(int fd)
Definition: fd.c:362
#define USECS_SEC
Definition: pg_test_fsync.c:30
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static void test_open(void)
static void die(const char *str)
static bool alarm_triggered
Definition: pg_test_fsync.c:71
static void prepare_buf(void)
#define required_argument
Definition: getopt_long.h:25
int optind
Definition: getopt.c:51
static const char * progname
Definition: pg_test_fsync.c:62
#define O_DSYNC
Definition: win32.h:270
static char * buf
Definition: pg_test_fsync.c:66
#define NA_FORMAT
Definition: pg_test_fsync.c:28
int unlink(const char *filename)
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:25
#define fsync(fd)
Definition: win32.h:62
#define STOP_TIMER
Definition: pg_test_fsync.c:55
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1012
#define LABEL_FORMAT
Definition: pg_test_fsync.c:27
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
static void test_open_syncs(void)
static void test_non_sync(void)
static int sig
Definition: pg_ctl.c:88
#define SIGHUP
Definition: win32.h:188
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:69
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:581
static void process_alarm(int sig)
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:168
static void signal_cleanup(int sig)
#define NULL
Definition: c.h:229
int main(int argc, char *argv[])
Definition: pg_test_fsync.c:98
#define START_TIMER
Definition: pg_test_fsync.c:34
#define OPS_FORMAT
Definition: pg_test_fsync.c:29
static int needs_unlink
Definition: pg_test_fsync.c:65
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:550
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:23
char * optarg
Definition: getopt.c:53
const char * strerror(int errnum)
Definition: strerror.c:19
static void test_file_descriptor_sync(void)
#define close(a)
Definition: win32.h:12
#define SIGALRM
Definition: win32.h:194
#define _(x)
Definition: elog.c:84
static void test_open_sync(const char *msg, int writes_size)
static void handle_args(int argc, char *argv[])