PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
filemap.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * filemap.c
4  * A data structure for keeping track of files that have changed.
5  *
6  * Copyright (c) 2013-2017, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 
11 #include "postgres_fe.h"
12 
13 #include <sys/stat.h>
14 #include <unistd.h>
15 
16 #include "datapagemap.h"
17 #include "filemap.h"
18 #include "logging.h"
19 #include "pg_rewind.h"
20 
21 #include "common/string.h"
22 #include "catalog/pg_tablespace.h"
23 #include "storage/fd.h"
24 
26 
27 static bool isRelDataFile(const char *path);
28 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
29  BlockNumber segno);
30 static int path_cmp(const void *a, const void *b);
31 static int final_filemap_cmp(const void *a, const void *b);
32 static void filemap_list_to_array(filemap_t *map);
33 
34 /*
35  * Create a new file map (stored in the global pointer "filemap").
36  */
37 void
39 {
40  filemap_t *map;
41 
42  map = pg_malloc(sizeof(filemap_t));
43  map->first = map->last = NULL;
44  map->nlist = 0;
45  map->array = NULL;
46  map->narray = 0;
47 
48  Assert(filemap == NULL);
49  filemap = map;
50 }
51 
52 /*
53  * Callback for processing source file list.
54  *
55  * This is called once for every file in the source server. We decide what
56  * action needs to be taken for the file, depending on whether the file
57  * exists in the target and whether the size matches.
58  */
59 void
60 process_source_file(const char *path, file_type_t type, size_t newsize,
61  const char *link_target)
62 {
63  bool exists;
64  char localpath[MAXPGPATH];
65  struct stat statbuf;
66  filemap_t *map = filemap;
68  size_t oldsize = 0;
69  file_entry_t *entry;
70 
71  Assert(map->array == NULL);
72 
73  /*
74  * Completely ignore some special files in source and destination.
75  */
76  if (strcmp(path, "postmaster.pid") == 0 ||
77  strcmp(path, "postmaster.opts") == 0)
78  return;
79 
80  /*
81  * Pretend that pg_wal is a directory, even if it's really a symlink. We
82  * don't want to mess with the symlink itself, nor complain if it's a
83  * symlink in source but not in target or vice versa.
84  */
85  if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
86  type = FILE_TYPE_DIRECTORY;
87 
88  /*
89  * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
90  * This has the effect that all temporary files in the destination will be
91  * removed.
92  */
93  if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
94  return;
95  if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
96  return;
97 
98  /*
99  * sanity check: a filename that looks like a data file better be a
100  * regular file
101  */
102  if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
103  pg_fatal("data file \"%s\" in source is not a regular file\n", path);
104 
105  snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
106 
107  /* Does the corresponding file exist in the target data dir? */
108  if (lstat(localpath, &statbuf) < 0)
109  {
110  if (errno != ENOENT)
111  pg_fatal("could not stat file \"%s\": %s\n",
112  localpath, strerror(errno));
113 
114  exists = false;
115  }
116  else
117  exists = true;
118 
119  switch (type)
120  {
121  case FILE_TYPE_DIRECTORY:
122  if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
123  {
124  /* it's a directory in source, but not in target. Strange.. */
125  pg_fatal("\"%s\" is not a directory\n", localpath);
126  }
127 
128  if (!exists)
129  action = FILE_ACTION_CREATE;
130  else
131  action = FILE_ACTION_NONE;
132  oldsize = 0;
133  break;
134 
135  case FILE_TYPE_SYMLINK:
136  if (exists &&
137 #ifndef WIN32
138  !S_ISLNK(statbuf.st_mode)
139 #else
140  !pgwin32_is_junction(localpath)
141 #endif
142  )
143  {
144  /*
145  * It's a symbolic link in source, but not in target.
146  * Strange..
147  */
148  pg_fatal("\"%s\" is not a symbolic link\n", localpath);
149  }
150 
151  if (!exists)
152  action = FILE_ACTION_CREATE;
153  else
154  action = FILE_ACTION_NONE;
155  oldsize = 0;
156  break;
157 
158  case FILE_TYPE_REGULAR:
159  if (exists && !S_ISREG(statbuf.st_mode))
160  pg_fatal("\"%s\" is not a regular file\n", localpath);
161 
162  if (!exists || !isRelDataFile(path))
163  {
164  /*
165  * File exists in source, but not in target. Or it's a
166  * non-data file that we have no special processing for. Copy
167  * it in toto.
168  *
169  * An exception: PG_VERSIONs should be identical, but avoid
170  * overwriting it for paranoia.
171  */
172  if (pg_str_endswith(path, "PG_VERSION"))
173  {
174  action = FILE_ACTION_NONE;
175  oldsize = statbuf.st_size;
176  }
177  else
178  {
179  action = FILE_ACTION_COPY;
180  oldsize = 0;
181  }
182  }
183  else
184  {
185  /*
186  * It's a data file that exists in both.
187  *
188  * If it's larger in target, we can truncate it. There will
189  * also be a WAL record of the truncation in the source
190  * system, so WAL replay would eventually truncate the target
191  * too, but we might as well do it now.
192  *
193  * If it's smaller in the target, it means that it has been
194  * truncated in the target, or enlarged in the source, or
195  * both. If it was truncated in the target, we need to copy
196  * the missing tail from the source system. If it was enlarged
197  * in the source system, there will be WAL records in the
198  * source system for the new blocks, so we wouldn't need to
199  * copy them here. But we don't know which scenario we're
200  * dealing with, and there's no harm in copying the missing
201  * blocks now, so do it now.
202  *
203  * If it's the same size, do nothing here. Any blocks modified
204  * in the target will be copied based on parsing the target
205  * system's WAL, and any blocks modified in the source will be
206  * updated after rewinding, when the source system's WAL is
207  * replayed.
208  */
209  oldsize = statbuf.st_size;
210  if (oldsize < newsize)
211  action = FILE_ACTION_COPY_TAIL;
212  else if (oldsize > newsize)
213  action = FILE_ACTION_TRUNCATE;
214  else
215  action = FILE_ACTION_NONE;
216  }
217  break;
218  }
219 
220  /* Create a new entry for this file */
221  entry = pg_malloc(sizeof(file_entry_t));
222  entry->path = pg_strdup(path);
223  entry->type = type;
224  entry->action = action;
225  entry->oldsize = oldsize;
226  entry->newsize = newsize;
227  entry->link_target = link_target ? pg_strdup(link_target) : NULL;
228  entry->next = NULL;
229  entry->pagemap.bitmap = NULL;
230  entry->pagemap.bitmapsize = 0;
231  entry->isrelfile = isRelDataFile(path);
232 
233  if (map->last)
234  {
235  map->last->next = entry;
236  map->last = entry;
237  }
238  else
239  map->first = map->last = entry;
240  map->nlist++;
241 }
242 
243 /*
244  * Callback for processing target file list.
245  *
246  * All source files must be already processed before calling this. This only
247  * marks target data directory's files that didn't exist in the source for
248  * deletion.
249  */
250 void
251 process_target_file(const char *path, file_type_t type, size_t oldsize,
252  const char *link_target)
253 {
254  bool exists;
255  char localpath[MAXPGPATH];
256  struct stat statbuf;
257  file_entry_t key;
258  file_entry_t *key_ptr;
259  filemap_t *map = filemap;
260  file_entry_t *entry;
261 
262  snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
263  if (lstat(localpath, &statbuf) < 0)
264  {
265  if (errno != ENOENT)
266  pg_fatal("could not stat file \"%s\": %s\n",
267  localpath, strerror(errno));
268 
269  exists = false;
270  }
271 
272  if (map->array == NULL)
273  {
274  /* on first call, initialize lookup array */
275  if (map->nlist == 0)
276  {
277  /* should not happen */
278  pg_fatal("source file list is empty\n");
279  }
280 
282 
283  Assert(map->array != NULL);
284 
285  qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
286  }
287 
288  /*
289  * Completely ignore some special files
290  */
291  if (strcmp(path, "postmaster.pid") == 0 ||
292  strcmp(path, "postmaster.opts") == 0)
293  return;
294 
295  /*
296  * Like in process_source_file, pretend that xlog is always a directory.
297  */
298  if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
299  type = FILE_TYPE_DIRECTORY;
300 
301  key.path = (char *) path;
302  key_ptr = &key;
303  exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
304  path_cmp) != NULL);
305 
306  /* Remove any file or folder that doesn't exist in the source system. */
307  if (!exists)
308  {
309  entry = pg_malloc(sizeof(file_entry_t));
310  entry->path = pg_strdup(path);
311  entry->type = type;
312  entry->action = FILE_ACTION_REMOVE;
313  entry->oldsize = oldsize;
314  entry->newsize = 0;
315  entry->link_target = link_target ? pg_strdup(link_target) : NULL;
316  entry->next = NULL;
317  entry->pagemap.bitmap = NULL;
318  entry->pagemap.bitmapsize = 0;
319  entry->isrelfile = isRelDataFile(path);
320 
321  if (map->last == NULL)
322  map->first = entry;
323  else
324  map->last->next = entry;
325  map->last = entry;
326  map->nlist++;
327  }
328  else
329  {
330  /*
331  * We already handled all files that exist in the source system in
332  * process_source_file().
333  */
334  }
335 }
336 
337 /*
338  * This callback gets called while we read the WAL in the target, for every
339  * block that have changed in the target system. It makes note of all the
340  * changed blocks in the pagemap of the file.
341  */
342 void
344 {
345  char *path;
346  file_entry_t key;
347  file_entry_t *key_ptr;
348  file_entry_t *entry;
349  BlockNumber blkno_inseg;
350  int segno;
351  filemap_t *map = filemap;
352  file_entry_t **e;
353 
354  Assert(map->array);
355 
356  segno = blkno / RELSEG_SIZE;
357  blkno_inseg = blkno % RELSEG_SIZE;
358 
359  path = datasegpath(rnode, forknum, segno);
360 
361  key.path = (char *) path;
362  key_ptr = &key;
363 
364  e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
365  path_cmp);
366  if (e)
367  entry = *e;
368  else
369  entry = NULL;
370  pfree(path);
371 
372  if (entry)
373  {
374  Assert(entry->isrelfile);
375 
376  switch (entry->action)
377  {
378  case FILE_ACTION_NONE:
380  /* skip if we're truncating away the modified block anyway */
381  if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
382  datapagemap_add(&entry->pagemap, blkno_inseg);
383  break;
384 
386 
387  /*
388  * skip the modified block if it is part of the "tail" that
389  * we're copying anyway.
390  */
391  if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
392  datapagemap_add(&entry->pagemap, blkno_inseg);
393  break;
394 
395  case FILE_ACTION_COPY:
396  case FILE_ACTION_REMOVE:
397  break;
398 
399  case FILE_ACTION_CREATE:
400  pg_fatal("unexpected page modification for directory or symbolic link \"%s\"\n", entry->path);
401  }
402  }
403  else
404  {
405  /*
406  * If we don't have any record of this file in the file map, it means
407  * that it's a relation that doesn't exist in the source system, and
408  * it was subsequently removed in the target system, too. We can
409  * safely ignore it.
410  */
411  }
412 }
413 
414 /*
415  * Convert the linked list of entries in map->first/last to the array,
416  * map->array.
417  */
418 static void
420 {
421  int narray;
422  file_entry_t *entry,
423  *next;
424 
425  map->array = (file_entry_t **)
426  pg_realloc(map->array,
427  (map->nlist + map->narray) * sizeof(file_entry_t *));
428 
429  narray = map->narray;
430  for (entry = map->first; entry != NULL; entry = next)
431  {
432  map->array[narray++] = entry;
433  next = entry->next;
434  entry->next = NULL;
435  }
436  Assert(narray == map->nlist + map->narray);
437  map->narray = narray;
438  map->nlist = 0;
439  map->first = map->last = NULL;
440 }
441 
442 void
444 {
445  filemap_t *map = filemap;
446 
448  qsort(map->array, map->narray, sizeof(file_entry_t *),
450 }
451 
452 static const char *
454 {
455  switch (action)
456  {
457  case FILE_ACTION_NONE:
458  return "NONE";
459  case FILE_ACTION_COPY:
460  return "COPY";
462  return "TRUNCATE";
464  return "COPY_TAIL";
465  case FILE_ACTION_CREATE:
466  return "CREATE";
467  case FILE_ACTION_REMOVE:
468  return "REMOVE";
469 
470  default:
471  return "unknown";
472  }
473 }
474 
475 /*
476  * Calculate the totals needed for progress reports.
477  */
478 void
480 {
481  file_entry_t *entry;
482  int i;
483  filemap_t *map = filemap;
484 
485  map->total_size = 0;
486  map->fetch_size = 0;
487 
488  for (i = 0; i < map->narray; i++)
489  {
490  entry = map->array[i];
491 
492  if (entry->type != FILE_TYPE_REGULAR)
493  continue;
494 
495  map->total_size += entry->newsize;
496 
497  if (entry->action == FILE_ACTION_COPY)
498  {
499  map->fetch_size += entry->newsize;
500  continue;
501  }
502 
503  if (entry->action == FILE_ACTION_COPY_TAIL)
504  map->fetch_size += (entry->newsize - entry->oldsize);
505 
506  if (entry->pagemap.bitmapsize > 0)
507  {
509  BlockNumber blk;
510 
511  iter = datapagemap_iterate(&entry->pagemap);
512  while (datapagemap_next(iter, &blk))
513  map->fetch_size += BLCKSZ;
514 
515  pg_free(iter);
516  }
517  }
518 }
519 
520 void
522 {
523  filemap_t *map = filemap;
524  file_entry_t *entry;
525  int i;
526 
527  for (i = 0; i < map->narray; i++)
528  {
529  entry = map->array[i];
530  if (entry->action != FILE_ACTION_NONE ||
531  entry->pagemap.bitmapsize > 0)
532  {
534  /*------
535  translator: first %s is a file path, second is a keyword such as COPY */
536  "%s (%s)\n", entry->path,
537  action_to_str(entry->action));
538 
539  if (entry->pagemap.bitmapsize > 0)
540  datapagemap_print(&entry->pagemap);
541  }
542  }
543  fflush(stdout);
544 }
545 
546 /*
547  * Does it look like a relation data file?
548  *
549  * For our purposes, only files belonging to the main fork are considered
550  * relation files. Other forks are always copied in toto, because we cannot
551  * reliably track changes to them, because WAL only contains block references
552  * for the main fork.
553  */
554 static bool
555 isRelDataFile(const char *path)
556 {
557  char buf[20 + 1];
558  RelFileNode rnode;
559  unsigned int segNo;
560  int nmatch;
561  bool matched;
562 
563  /*----
564  * Relation data files can be in one of the following directories:
565  *
566  * global/
567  * shared relations
568  *
569  * base/<db oid>/
570  * regular relations, default tablespace
571  *
572  * pg_tblspc/<tblspc oid>/PG_9.4_201403261/
573  * within a non-default tablespace (the name of the directory
574  * depends on version)
575  *
576  * And the relation data files themselves have a filename like:
577  *
578  * <oid>.<segment number>
579  *
580  *----
581  */
582  rnode.spcNode = InvalidOid;
583  rnode.dbNode = InvalidOid;
584  rnode.relNode = InvalidOid;
585  segNo = 0;
586  matched = false;
587 
588  nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo);
589  if (nmatch == 1 || nmatch == 2)
590  {
592  rnode.dbNode = 0;
593  matched = true;
594  }
595  else
596  {
597  nmatch = sscanf(path, "base/%u/%u.%u",
598  &rnode.dbNode, &rnode.relNode, &segNo);
599  if (nmatch == 2 || nmatch == 3)
600  {
602  matched = true;
603  }
604  else
605  {
606  nmatch = sscanf(path, "pg_tblspc/%u/PG_%20s/%u/%u.%u",
607  &rnode.spcNode, buf, &rnode.dbNode, &rnode.relNode,
608  &segNo);
609  if (nmatch == 4 || nmatch == 5)
610  matched = true;
611  }
612  }
613 
614  /*
615  * The sscanf tests above can match files that have extra characters at
616  * the end, and the last check can also match a path belonging to a
617  * different version (different TABLESPACE_VERSION_DIRECTORY). To make
618  * eliminate such cases, cross-check that GetRelationPath creates the
619  * exact same filename, when passed the RelFileNode information we
620  * extracted from the filename.
621  */
622  if (matched)
623  {
624  char *check_path = datasegpath(rnode, MAIN_FORKNUM, segNo);
625 
626  if (strcmp(check_path, path) != 0)
627  matched = false;
628 
629  pfree(check_path);
630  }
631 
632  return matched;
633 }
634 
635 /*
636  * A helper function to create the path of a relation file and segment.
637  *
638  * The returned path is palloc'd
639  */
640 static char *
642 {
643  char *path;
644  char *segpath;
645 
646  path = relpathperm(rnode, forknum);
647  if (segno > 0)
648  {
649  segpath = psprintf("%s.%u", path, segno);
650  pfree(path);
651  return segpath;
652  }
653  else
654  return path;
655 }
656 
657 static int
658 path_cmp(const void *a, const void *b)
659 {
660  file_entry_t *fa = *((file_entry_t **) a);
661  file_entry_t *fb = *((file_entry_t **) b);
662 
663  return strcmp(fa->path, fb->path);
664 }
665 
666 /*
667  * In the final stage, the filemap is sorted so that removals come last.
668  * From disk space usage point of view, it would be better to do removals
669  * first, but for now, safety first. If a whole directory is deleted, all
670  * files and subdirectories inside it need to removed first. On creation,
671  * parent directory needs to be created before files and directories inside
672  * it. To achieve that, the file_action_t enum is ordered so that we can
673  * just sort on that first. Furthermore, sort REMOVE entries in reverse
674  * path order, so that "foo/bar" subdirectory is removed before "foo".
675  */
676 static int
677 final_filemap_cmp(const void *a, const void *b)
678 {
679  file_entry_t *fa = *((file_entry_t **) a);
680  file_entry_t *fb = *((file_entry_t **) b);
681 
682  if (fa->action > fb->action)
683  return 1;
684  if (fa->action < fb->action)
685  return -1;
686 
687  if (fa->action == FILE_ACTION_REMOVE)
688  return -strcmp(fa->path, fb->path);
689  else
690  return strcmp(fa->path, fb->path);
691 }
void datapagemap_add(datapagemap_t *map, BlockNumber blkno)
Definition: datapagemap.c:32
void calculate_totals(void)
Definition: filemap.c:479
static int final_filemap_cmp(const void *a, const void *b)
Definition: filemap.c:677
char * datadir_target
Definition: pg_rewind.c:49
#define relpathperm(rnode, forknum)
Definition: relpath.h:67
static int fa(void)
Definition: preproc-init.c:85
static int32 next
Definition: blutils.c:210
file_entry_t * first
Definition: filemap.h:68
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:31
file_entry_t ** array
Definition: filemap.h:79
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
void process_target_file(const char *path, file_type_t type, size_t oldsize, const char *link_target)
Definition: filemap.c:251
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define GLOBALTABLESPACE_OID
Definition: pg_tablespace.h:64
size_t newsize
Definition: filemap.h:51
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define PG_TEMP_FILE_PREFIX
Definition: fd.h:128
char * bitmap
Definition: datapagemap.h:18
uint32 BlockNumber
Definition: block.h:31
void pg_fatal(const char *fmt,...)
Definition: logging.c:83
void filemap_create(void)
Definition: filemap.c:38
static int fb(int x)
Definition: preproc-init.c:92
datapagemap_t pagemap
Definition: filemap.h:54
int narray
Definition: filemap.h:80
void filemap_finalize(void)
Definition: filemap.c:443
uint64 fetch_size
Definition: filemap.h:87
void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
Definition: filemap.c:343
file_type_t type
Definition: filemap.h:45
static char * datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
Definition: filemap.c:641
bool isrelfile
Definition: filemap.h:52
void pfree(void *pointer)
Definition: mcxt.c:950
int nlist
Definition: filemap.h:70
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
#define MAXPGPATH
file_action_t action
Definition: filemap.h:47
char * link_target
Definition: filemap.h:57
struct file_entry_t * next
Definition: filemap.h:59
static char * buf
Definition: pg_test_fsync.c:66
#define DEFAULTTABLESPACE_OID
Definition: pg_tablespace.h:63
filemap_t * filemap
Definition: filemap.c:25
static const char * action_to_str(file_action_t action)
Definition: filemap.c:453
size_t oldsize
Definition: filemap.h:50
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
uint64 total_size
Definition: filemap.h:86
void pg_log(eLogType type, const char *fmt,...)
Definition: logging.c:69
void * pg_realloc(void *ptr, size_t size)
Definition: fe_memutils.c:65
void print_filemap(void)
Definition: filemap.c:521
ForkNumber
Definition: relpath.h:24
void datapagemap_print(datapagemap_t *map)
Definition: datapagemap.c:117
int bitmapsize
Definition: datapagemap.h:19
#define InvalidOid
Definition: postgres_ext.h:36
#define PG_TEMP_FILES_DIR
Definition: fd.h:127
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:676
file_action_t
Definition: filemap.h:24
static bool isRelDataFile(const char *path)
Definition: filemap.c:555
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
char * path
Definition: filemap.h:44
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void process_source_file(const char *path, file_type_t type, size_t newsize, const char *link_target)
Definition: filemap.c:60
static void filemap_list_to_array(filemap_t *map)
Definition: filemap.c:419
file_entry_t * last
Definition: filemap.h:69
e
Definition: preproc-init.c:82
int i
const char * strerror(int errnum)
Definition: strerror.c:19
Definition: filemap.h:42
static int path_cmp(const void *a, const void *b)
Definition: filemap.c:658
#define qsort(a, b, c, d)
Definition: port.h:443
file_type_t
Definition: filemap.h:35
#define lstat(path, sb)
Definition: win32.h:262