PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
filemap.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * filemap.c
4  * A data structure for keeping track of files that have changed.
5  *
6  * Copyright (c) 2013-2017, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 
11 #include "postgres_fe.h"
12 
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <unistd.h>
16 
17 #include "datapagemap.h"
18 #include "filemap.h"
19 #include "logging.h"
20 #include "pg_rewind.h"
21 
22 #include "common/string.h"
23 #include "catalog/pg_tablespace.h"
24 #include "storage/fd.h"
25 
27 
28 static bool isRelDataFile(const char *path);
29 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
30  BlockNumber segno);
31 static int path_cmp(const void *a, const void *b);
32 static int final_filemap_cmp(const void *a, const void *b);
33 static void filemap_list_to_array(filemap_t *map);
34 
35 /*
36  * Create a new file map (stored in the global pointer "filemap").
37  */
38 void
40 {
41  filemap_t *map;
42 
43  map = pg_malloc(sizeof(filemap_t));
44  map->first = map->last = NULL;
45  map->nlist = 0;
46  map->array = NULL;
47  map->narray = 0;
48 
49  Assert(filemap == NULL);
50  filemap = map;
51 }
52 
53 /*
54  * Callback for processing source file list.
55  *
56  * This is called once for every file in the source server. We decide what
57  * action needs to be taken for the file, depending on whether the file
58  * exists in the target and whether the size matches.
59  */
60 void
61 process_source_file(const char *path, file_type_t type, size_t newsize,
62  const char *link_target)
63 {
64  bool exists;
65  char localpath[MAXPGPATH];
66  struct stat statbuf;
67  filemap_t *map = filemap;
69  size_t oldsize = 0;
70  file_entry_t *entry;
71 
72  Assert(map->array == NULL);
73 
74  /*
75  * Completely ignore some special files in source and destination.
76  */
77  if (strcmp(path, "postmaster.pid") == 0 ||
78  strcmp(path, "postmaster.opts") == 0)
79  return;
80 
81  /*
82  * Pretend that pg_wal is a directory, even if it's really a symlink. We
83  * don't want to mess with the symlink itself, nor complain if it's a
84  * symlink in source but not in target or vice versa.
85  */
86  if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
87  type = FILE_TYPE_DIRECTORY;
88 
89  /*
90  * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
91  * This has the effect that all temporary files in the destination will be
92  * removed.
93  */
94  if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
95  return;
96  if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
97  return;
98 
99  /*
100  * sanity check: a filename that looks like a data file better be a
101  * regular file
102  */
103  if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
104  pg_fatal("data file \"%s\" in source is not a regular file\n", path);
105 
106  snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
107 
108  /* Does the corresponding file exist in the target data dir? */
109  if (lstat(localpath, &statbuf) < 0)
110  {
111  if (errno != ENOENT)
112  pg_fatal("could not stat file \"%s\": %s\n",
113  localpath, strerror(errno));
114 
115  exists = false;
116  }
117  else
118  exists = true;
119 
120  switch (type)
121  {
122  case FILE_TYPE_DIRECTORY:
123  if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
124  {
125  /* it's a directory in source, but not in target. Strange.. */
126  pg_fatal("\"%s\" is not a directory\n", localpath);
127  }
128 
129  if (!exists)
130  action = FILE_ACTION_CREATE;
131  else
132  action = FILE_ACTION_NONE;
133  oldsize = 0;
134  break;
135 
136  case FILE_TYPE_SYMLINK:
137  if (exists &&
138 #ifndef WIN32
139  !S_ISLNK(statbuf.st_mode)
140 #else
141  !pgwin32_is_junction(localpath)
142 #endif
143  )
144  {
145  /*
146  * It's a symbolic link in source, but not in target.
147  * Strange..
148  */
149  pg_fatal("\"%s\" is not a symbolic link\n", localpath);
150  }
151 
152  if (!exists)
153  action = FILE_ACTION_CREATE;
154  else
155  action = FILE_ACTION_NONE;
156  oldsize = 0;
157  break;
158 
159  case FILE_TYPE_REGULAR:
160  if (exists && !S_ISREG(statbuf.st_mode))
161  pg_fatal("\"%s\" is not a regular file\n", localpath);
162 
163  if (!exists || !isRelDataFile(path))
164  {
165  /*
166  * File exists in source, but not in target. Or it's a
167  * non-data file that we have no special processing for. Copy
168  * it in toto.
169  *
170  * An exception: PG_VERSIONs should be identical, but avoid
171  * overwriting it for paranoia.
172  */
173  if (pg_str_endswith(path, "PG_VERSION"))
174  {
175  action = FILE_ACTION_NONE;
176  oldsize = statbuf.st_size;
177  }
178  else
179  {
180  action = FILE_ACTION_COPY;
181  oldsize = 0;
182  }
183  }
184  else
185  {
186  /*
187  * It's a data file that exists in both.
188  *
189  * If it's larger in target, we can truncate it. There will
190  * also be a WAL record of the truncation in the source
191  * system, so WAL replay would eventually truncate the target
192  * too, but we might as well do it now.
193  *
194  * If it's smaller in the target, it means that it has been
195  * truncated in the target, or enlarged in the source, or
196  * both. If it was truncated in the target, we need to copy
197  * the missing tail from the source system. If it was enlarged
198  * in the source system, there will be WAL records in the
199  * source system for the new blocks, so we wouldn't need to
200  * copy them here. But we don't know which scenario we're
201  * dealing with, and there's no harm in copying the missing
202  * blocks now, so do it now.
203  *
204  * If it's the same size, do nothing here. Any blocks modified
205  * in the target will be copied based on parsing the target
206  * system's WAL, and any blocks modified in the source will be
207  * updated after rewinding, when the source system's WAL is
208  * replayed.
209  */
210  oldsize = statbuf.st_size;
211  if (oldsize < newsize)
212  action = FILE_ACTION_COPY_TAIL;
213  else if (oldsize > newsize)
214  action = FILE_ACTION_TRUNCATE;
215  else
216  action = FILE_ACTION_NONE;
217  }
218  break;
219  }
220 
221  /* Create a new entry for this file */
222  entry = pg_malloc(sizeof(file_entry_t));
223  entry->path = pg_strdup(path);
224  entry->type = type;
225  entry->action = action;
226  entry->oldsize = oldsize;
227  entry->newsize = newsize;
228  entry->link_target = link_target ? pg_strdup(link_target) : NULL;
229  entry->next = NULL;
230  entry->pagemap.bitmap = NULL;
231  entry->pagemap.bitmapsize = 0;
232  entry->isrelfile = isRelDataFile(path);
233 
234  if (map->last)
235  {
236  map->last->next = entry;
237  map->last = entry;
238  }
239  else
240  map->first = map->last = entry;
241  map->nlist++;
242 }
243 
244 /*
245  * Callback for processing target file list.
246  *
247  * All source files must be already processed before calling this. This only
248  * marks target data directory's files that didn't exist in the source for
249  * deletion.
250  */
251 void
252 process_target_file(const char *path, file_type_t type, size_t oldsize,
253  const char *link_target)
254 {
255  bool exists;
256  char localpath[MAXPGPATH];
257  struct stat statbuf;
258  file_entry_t key;
259  file_entry_t *key_ptr;
260  filemap_t *map = filemap;
261  file_entry_t *entry;
262 
263  snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
264  if (lstat(localpath, &statbuf) < 0)
265  {
266  if (errno != ENOENT)
267  pg_fatal("could not stat file \"%s\": %s\n",
268  localpath, strerror(errno));
269 
270  exists = false;
271  }
272 
273  if (map->array == NULL)
274  {
275  /* on first call, initialize lookup array */
276  if (map->nlist == 0)
277  {
278  /* should not happen */
279  pg_fatal("source file list is empty\n");
280  }
281 
283 
284  Assert(map->array != NULL);
285 
286  qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
287  }
288 
289  /*
290  * Completely ignore some special files
291  */
292  if (strcmp(path, "postmaster.pid") == 0 ||
293  strcmp(path, "postmaster.opts") == 0)
294  return;
295 
296  /*
297  * Like in process_source_file, pretend that xlog is always a directory.
298  */
299  if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
300  type = FILE_TYPE_DIRECTORY;
301 
302  key.path = (char *) path;
303  key_ptr = &key;
304  exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
305  path_cmp) != NULL);
306 
307  /* Remove any file or folder that doesn't exist in the source system. */
308  if (!exists)
309  {
310  entry = pg_malloc(sizeof(file_entry_t));
311  entry->path = pg_strdup(path);
312  entry->type = type;
313  entry->action = FILE_ACTION_REMOVE;
314  entry->oldsize = oldsize;
315  entry->newsize = 0;
316  entry->link_target = link_target ? pg_strdup(link_target) : NULL;
317  entry->next = NULL;
318  entry->pagemap.bitmap = NULL;
319  entry->pagemap.bitmapsize = 0;
320  entry->isrelfile = isRelDataFile(path);
321 
322  if (map->last == NULL)
323  map->first = entry;
324  else
325  map->last->next = entry;
326  map->last = entry;
327  map->nlist++;
328  }
329  else
330  {
331  /*
332  * We already handled all files that exist in the source system in
333  * process_source_file().
334  */
335  }
336 }
337 
338 /*
339  * This callback gets called while we read the WAL in the target, for every
340  * block that have changed in the target system. It makes note of all the
341  * changed blocks in the pagemap of the file.
342  */
343 void
345 {
346  char *path;
347  file_entry_t key;
348  file_entry_t *key_ptr;
349  file_entry_t *entry;
350  BlockNumber blkno_inseg;
351  int segno;
352  filemap_t *map = filemap;
353  file_entry_t **e;
354 
355  Assert(map->array);
356 
357  segno = blkno / RELSEG_SIZE;
358  blkno_inseg = blkno % RELSEG_SIZE;
359 
360  path = datasegpath(rnode, forknum, segno);
361 
362  key.path = (char *) path;
363  key_ptr = &key;
364 
365  e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
366  path_cmp);
367  if (e)
368  entry = *e;
369  else
370  entry = NULL;
371  pfree(path);
372 
373  if (entry)
374  {
375  Assert(entry->isrelfile);
376 
377  switch (entry->action)
378  {
379  case FILE_ACTION_NONE:
381  /* skip if we're truncating away the modified block anyway */
382  if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
383  datapagemap_add(&entry->pagemap, blkno_inseg);
384  break;
385 
387 
388  /*
389  * skip the modified block if it is part of the "tail" that
390  * we're copying anyway.
391  */
392  if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
393  datapagemap_add(&entry->pagemap, blkno_inseg);
394  break;
395 
396  case FILE_ACTION_COPY:
397  case FILE_ACTION_REMOVE:
398  break;
399 
400  case FILE_ACTION_CREATE:
401  pg_fatal("unexpected page modification for directory or symbolic link \"%s\"\n", entry->path);
402  }
403  }
404  else
405  {
406  /*
407  * If we don't have any record of this file in the file map, it means
408  * that it's a relation that doesn't exist in the source system, and
409  * it was subsequently removed in the target system, too. We can
410  * safely ignore it.
411  */
412  }
413 }
414 
415 /*
416  * Convert the linked list of entries in map->first/last to the array,
417  * map->array.
418  */
419 static void
421 {
422  int narray;
423  file_entry_t *entry,
424  *next;
425 
426  map->array = (file_entry_t **)
427  pg_realloc(map->array,
428  (map->nlist + map->narray) * sizeof(file_entry_t *));
429 
430  narray = map->narray;
431  for (entry = map->first; entry != NULL; entry = next)
432  {
433  map->array[narray++] = entry;
434  next = entry->next;
435  entry->next = NULL;
436  }
437  Assert(narray == map->nlist + map->narray);
438  map->narray = narray;
439  map->nlist = 0;
440  map->first = map->last = NULL;
441 }
442 
443 void
445 {
446  filemap_t *map = filemap;
447 
449  qsort(map->array, map->narray, sizeof(file_entry_t *),
451 }
452 
453 static const char *
455 {
456  switch (action)
457  {
458  case FILE_ACTION_NONE:
459  return "NONE";
460  case FILE_ACTION_COPY:
461  return "COPY";
463  return "TRUNCATE";
465  return "COPY_TAIL";
466  case FILE_ACTION_CREATE:
467  return "CREATE";
468  case FILE_ACTION_REMOVE:
469  return "REMOVE";
470 
471  default:
472  return "unknown";
473  }
474 }
475 
476 /*
477  * Calculate the totals needed for progress reports.
478  */
479 void
481 {
482  file_entry_t *entry;
483  int i;
484  filemap_t *map = filemap;
485 
486  map->total_size = 0;
487  map->fetch_size = 0;
488 
489  for (i = 0; i < map->narray; i++)
490  {
491  entry = map->array[i];
492 
493  if (entry->type != FILE_TYPE_REGULAR)
494  continue;
495 
496  map->total_size += entry->newsize;
497 
498  if (entry->action == FILE_ACTION_COPY)
499  {
500  map->fetch_size += entry->newsize;
501  continue;
502  }
503 
504  if (entry->action == FILE_ACTION_COPY_TAIL)
505  map->fetch_size += (entry->newsize - entry->oldsize);
506 
507  if (entry->pagemap.bitmapsize > 0)
508  {
510  BlockNumber blk;
511 
512  iter = datapagemap_iterate(&entry->pagemap);
513  while (datapagemap_next(iter, &blk))
514  map->fetch_size += BLCKSZ;
515 
516  pg_free(iter);
517  }
518  }
519 }
520 
521 void
523 {
524  filemap_t *map = filemap;
525  file_entry_t *entry;
526  int i;
527 
528  for (i = 0; i < map->narray; i++)
529  {
530  entry = map->array[i];
531  if (entry->action != FILE_ACTION_NONE ||
532  entry->pagemap.bitmapsize > 0)
533  {
535  /*------
536  translator: first %s is a file path, second is a keyword such as COPY */
537  "%s (%s)\n", entry->path,
538  action_to_str(entry->action));
539 
540  if (entry->pagemap.bitmapsize > 0)
541  datapagemap_print(&entry->pagemap);
542  }
543  }
544  fflush(stdout);
545 }
546 
547 /*
548  * Does it look like a relation data file?
549  *
550  * For our purposes, only files belonging to the main fork are considered
551  * relation files. Other forks are always copied in toto, because we cannot
552  * reliably track changes to them, because WAL only contains block references
553  * for the main fork.
554  */
555 static bool
556 isRelDataFile(const char *path)
557 {
558  char buf[20 + 1];
559  RelFileNode rnode;
560  unsigned int segNo;
561  int nmatch;
562  bool matched;
563 
564  /*----
565  * Relation data files can be in one of the following directories:
566  *
567  * global/
568  * shared relations
569  *
570  * base/<db oid>/
571  * regular relations, default tablespace
572  *
573  * pg_tblspc/<tblspc oid>/PG_9.4_201403261/
574  * within a non-default tablespace (the name of the directory
575  * depends on version)
576  *
577  * And the relation data files themselves have a filename like:
578  *
579  * <oid>.<segment number>
580  *
581  *----
582  */
583  rnode.spcNode = InvalidOid;
584  rnode.dbNode = InvalidOid;
585  rnode.relNode = InvalidOid;
586  segNo = 0;
587  matched = false;
588 
589  nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo);
590  if (nmatch == 1 || nmatch == 2)
591  {
593  rnode.dbNode = 0;
594  matched = true;
595  }
596  else
597  {
598  nmatch = sscanf(path, "base/%u/%u.%u",
599  &rnode.dbNode, &rnode.relNode, &segNo);
600  if (nmatch == 2 || nmatch == 3)
601  {
603  matched = true;
604  }
605  else
606  {
607  nmatch = sscanf(path, "pg_tblspc/%u/PG_%20s/%u/%u.%u",
608  &rnode.spcNode, buf, &rnode.dbNode, &rnode.relNode,
609  &segNo);
610  if (nmatch == 4 || nmatch == 5)
611  matched = true;
612  }
613  }
614 
615  /*
616  * The sscanf tests above can match files that have extra characters at
617  * the end, and the last check can also match a path belonging to a
618  * different version (different TABLESPACE_VERSION_DIRECTORY). To make
619  * eliminate such cases, cross-check that GetRelationPath creates the
620  * exact same filename, when passed the RelFileNode information we
621  * extracted from the filename.
622  */
623  if (matched)
624  {
625  char *check_path = datasegpath(rnode, MAIN_FORKNUM, segNo);
626 
627  if (strcmp(check_path, path) != 0)
628  matched = false;
629 
630  pfree(check_path);
631  }
632 
633  return matched;
634 }
635 
636 /*
637  * A helper function to create the path of a relation file and segment.
638  *
639  * The returned path is palloc'd
640  */
641 static char *
643 {
644  char *path;
645  char *segpath;
646 
647  path = relpathperm(rnode, forknum);
648  if (segno > 0)
649  {
650  segpath = psprintf("%s.%u", path, segno);
651  pfree(path);
652  return segpath;
653  }
654  else
655  return path;
656 }
657 
658 static int
659 path_cmp(const void *a, const void *b)
660 {
661  file_entry_t *fa = *((file_entry_t **) a);
662  file_entry_t *fb = *((file_entry_t **) b);
663 
664  return strcmp(fa->path, fb->path);
665 }
666 
667 /*
668  * In the final stage, the filemap is sorted so that removals come last.
669  * From disk space usage point of view, it would be better to do removals
670  * first, but for now, safety first. If a whole directory is deleted, all
671  * files and subdirectories inside it need to removed first. On creation,
672  * parent directory needs to be created before files and directories inside
673  * it. To achieve that, the file_action_t enum is ordered so that we can
674  * just sort on that first. Furthermore, sort REMOVE entries in reverse
675  * path order, so that "foo/bar" subdirectory is removed before "foo".
676  */
677 static int
678 final_filemap_cmp(const void *a, const void *b)
679 {
680  file_entry_t *fa = *((file_entry_t **) a);
681  file_entry_t *fb = *((file_entry_t **) b);
682 
683  if (fa->action > fb->action)
684  return 1;
685  if (fa->action < fb->action)
686  return -1;
687 
688  if (fa->action == FILE_ACTION_REMOVE)
689  return -strcmp(fa->path, fb->path);
690  else
691  return strcmp(fa->path, fb->path);
692 }
void datapagemap_add(datapagemap_t *map, BlockNumber blkno)
Definition: datapagemap.c:32
void calculate_totals(void)
Definition: filemap.c:480
static int final_filemap_cmp(const void *a, const void *b)
Definition: filemap.c:678
char * datadir_target
Definition: pg_rewind.c:49
#define relpathperm(rnode, forknum)
Definition: relpath.h:67
static int fa(void)
Definition: preproc-init.c:85
static int32 next
Definition: blutils.c:210
file_entry_t * first
Definition: filemap.h:68
bool pg_str_endswith(const char *str, const char *end)
Definition: string.c:31
file_entry_t ** array
Definition: filemap.h:79
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
void process_target_file(const char *path, file_type_t type, size_t oldsize, const char *link_target)
Definition: filemap.c:252
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define GLOBALTABLESPACE_OID
Definition: pg_tablespace.h:64
size_t newsize
Definition: filemap.h:51
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define PG_TEMP_FILE_PREFIX
Definition: fd.h:127
char * bitmap
Definition: datapagemap.h:18
uint32 BlockNumber
Definition: block.h:31
void pg_fatal(const char *fmt,...)
Definition: logging.c:83
void filemap_create(void)
Definition: filemap.c:39
static int fb(int x)
Definition: preproc-init.c:92
datapagemap_t pagemap
Definition: filemap.h:54
int narray
Definition: filemap.h:80
void filemap_finalize(void)
Definition: filemap.c:444
uint64 fetch_size
Definition: filemap.h:87
void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
Definition: filemap.c:344
file_type_t type
Definition: filemap.h:45
static char * datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
Definition: filemap.c:642
bool isrelfile
Definition: filemap.h:52
void pfree(void *pointer)
Definition: mcxt.c:992
int nlist
Definition: filemap.h:70
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
#define MAXPGPATH
file_action_t action
Definition: filemap.h:47
char * link_target
Definition: filemap.h:57
struct file_entry_t * next
Definition: filemap.h:59
static char * buf
Definition: pg_test_fsync.c:65
#define DEFAULTTABLESPACE_OID
Definition: pg_tablespace.h:63
filemap_t * filemap
Definition: filemap.c:26
static const char * action_to_str(file_action_t action)
Definition: filemap.c:454
size_t oldsize
Definition: filemap.h:50
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
uint64 total_size
Definition: filemap.h:86
void pg_log(eLogType type, const char *fmt,...)
Definition: logging.c:69
void * pg_realloc(void *ptr, size_t size)
Definition: fe_memutils.c:65
void print_filemap(void)
Definition: filemap.c:522
ForkNumber
Definition: relpath.h:24
void datapagemap_print(datapagemap_t *map)
Definition: datapagemap.c:117
int bitmapsize
Definition: datapagemap.h:19
#define InvalidOid
Definition: postgres_ext.h:36
#define PG_TEMP_FILES_DIR
Definition: fd.h:126
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:670
file_action_t
Definition: filemap.h:24
static bool isRelDataFile(const char *path)
Definition: filemap.c:556
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
char * path
Definition: filemap.h:44
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void process_source_file(const char *path, file_type_t type, size_t newsize, const char *link_target)
Definition: filemap.c:61
static void filemap_list_to_array(filemap_t *map)
Definition: filemap.c:420
file_entry_t * last
Definition: filemap.h:69
e
Definition: preproc-init.c:82
int i
const char * strerror(int errnum)
Definition: strerror.c:19
Definition: filemap.h:42
static int path_cmp(const void *a, const void *b)
Definition: filemap.c:659
#define qsort(a, b, c, d)
Definition: port.h:440
file_type_t
Definition: filemap.h:35
#define lstat(path, sb)
Definition: win32.h:272