PostgreSQL Source Code  git master
file.c
Go to the documentation of this file.
1 /*
2  * file.c
3  *
4  * file system operations
5  *
6  * Copyright (c) 2010-2019, PostgreSQL Global Development Group
7  * src/bin/pg_upgrade/file.c
8  */
9 
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #ifdef HAVE_COPYFILE_H
15 #include <copyfile.h>
16 #endif
17 #ifdef __linux__
18 #include <sys/ioctl.h>
19 #include <linux/fs.h>
20 #endif
21 
22 #include "access/visibilitymap.h"
23 #include "common/file_perm.h"
24 #include "pg_upgrade.h"
25 #include "storage/bufpage.h"
26 #include "storage/checksum.h"
27 #include "storage/checksum_impl.h"
28 
29 #ifdef WIN32
30 static int win32_pghardlink(const char *src, const char *dst);
31 #endif
32 
33 
34 /*
35  * cloneFile()
36  *
37  * Clones/reflinks a relation file from src to dst.
38  *
39  * schemaName/relName are relation's SQL name (used for error messages only).
40  */
41 void
42 cloneFile(const char *src, const char *dst,
43  const char *schemaName, const char *relName)
44 {
45 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
46  if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
47  pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
48  schemaName, relName, src, dst, strerror(errno));
49 #elif defined(__linux__) && defined(FICLONE)
50  int src_fd;
51  int dest_fd;
52 
53  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
54  pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
55  schemaName, relName, src, strerror(errno));
56 
57  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
58  pg_file_create_mode)) < 0)
59  pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
60  schemaName, relName, dst, strerror(errno));
61 
62  if (ioctl(dest_fd, FICLONE, src_fd) < 0)
63  {
64  unlink(dst);
65  pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
66  schemaName, relName, src, dst, strerror(errno));
67  }
68 
69  close(src_fd);
70  close(dest_fd);
71 #endif
72 }
73 
74 
75 /*
76  * copyFile()
77  *
78  * Copies a relation file from src to dst.
79  * schemaName/relName are relation's SQL name (used for error messages only).
80  */
81 void
82 copyFile(const char *src, const char *dst,
83  const char *schemaName, const char *relName)
84 {
85 #ifndef WIN32
86  int src_fd;
87  int dest_fd;
88  char *buffer;
89 
90  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
91  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
92  schemaName, relName, src, strerror(errno));
93 
94  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
95  pg_file_create_mode)) < 0)
96  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
97  schemaName, relName, dst, strerror(errno));
98 
99  /* copy in fairly large chunks for best efficiency */
100 #define COPY_BUF_SIZE (50 * BLCKSZ)
101 
102  buffer = (char *) pg_malloc(COPY_BUF_SIZE);
103 
104  /* perform data copying i.e read src source, write to destination */
105  while (true)
106  {
107  ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
108 
109  if (nbytes < 0)
110  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
111  schemaName, relName, src, strerror(errno));
112 
113  if (nbytes == 0)
114  break;
115 
116  errno = 0;
117  if (write(dest_fd, buffer, nbytes) != nbytes)
118  {
119  /* if write didn't set errno, assume problem is no disk space */
120  if (errno == 0)
121  errno = ENOSPC;
122  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
123  schemaName, relName, dst, strerror(errno));
124  }
125  }
126 
127  pg_free(buffer);
128  close(src_fd);
129  close(dest_fd);
130 
131 #else /* WIN32 */
132 
133  if (CopyFile(src, dst, true) == 0)
134  {
135  _dosmaperr(GetLastError());
136  pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
137  schemaName, relName, src, dst, strerror(errno));
138  }
139 
140 #endif /* WIN32 */
141 }
142 
143 
144 /*
145  * linkFile()
146  *
147  * Hard-links a relation file from src to dst.
148  * schemaName/relName are relation's SQL name (used for error messages only).
149  */
150 void
151 linkFile(const char *src, const char *dst,
152  const char *schemaName, const char *relName)
153 {
154  if (pg_link_file(src, dst) < 0)
155  pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
156  schemaName, relName, src, dst, strerror(errno));
157 }
158 
159 
160 /*
161  * rewriteVisibilityMap()
162  *
163  * Transform a visibility map file, copying from src to dst.
164  * schemaName/relName are relation's SQL name (used for error messages only).
165  *
166  * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
167  * visibility map included one bit per heap page; it now includes two.
168  * When upgrading a cluster from before that time to a current PostgreSQL
169  * version, we could refuse to copy visibility maps from the old cluster
170  * to the new cluster; the next VACUUM would recreate them, but at the
171  * price of scanning the entire table. So, instead, we rewrite the old
172  * visibility maps in the new format. That way, the all-visible bits
173  * remain set for the pages for which they were set previously. The
174  * all-frozen bits are never set by this conversion; we leave that to VACUUM.
175  */
176 void
177 rewriteVisibilityMap(const char *fromfile, const char *tofile,
178  const char *schemaName, const char *relName)
179 {
180  int src_fd;
181  int dst_fd;
182  PGAlignedBlock buffer;
183  PGAlignedBlock new_vmbuf;
184  ssize_t totalBytesRead = 0;
185  ssize_t src_filesize;
186  int rewriteVmBytesPerPage;
187  BlockNumber new_blkno = 0;
188  struct stat statbuf;
189 
190  /* Compute number of old-format bytes per new page */
191  rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
192 
193  if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
194  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
195  schemaName, relName, fromfile, strerror(errno));
196 
197  if (fstat(src_fd, &statbuf) != 0)
198  pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
199  schemaName, relName, fromfile, strerror(errno));
200 
201  if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
202  pg_file_create_mode)) < 0)
203  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
204  schemaName, relName, tofile, strerror(errno));
205 
206  /* Save old file size */
207  src_filesize = statbuf.st_size;
208 
209  /*
210  * Turn each visibility map page into 2 pages one by one. Each new page
211  * has the same page header as the old one. If the last section of the
212  * last page is empty, we skip it, mostly to avoid turning one-page
213  * visibility maps for small relations into two pages needlessly.
214  */
215  while (totalBytesRead < src_filesize)
216  {
217  ssize_t bytesRead;
218  char *old_cur;
219  char *old_break;
220  char *old_blkend;
221  PageHeaderData pageheader;
222  bool old_lastblk;
223 
224  if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
225  {
226  if (bytesRead < 0)
227  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
228  schemaName, relName, fromfile, strerror(errno));
229  else
230  pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
231  schemaName, relName, fromfile);
232  }
233 
234  totalBytesRead += BLCKSZ;
235  old_lastblk = (totalBytesRead == src_filesize);
236 
237  /* Save the page header data */
238  memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
239 
240  /*
241  * These old_* variables point to old visibility map page. old_cur
242  * points to current position on old page. old_blkend points to end of
243  * old block. old_break is the end+1 position on the old page for the
244  * data that will be transferred to the current new page.
245  */
246  old_cur = buffer.data + SizeOfPageHeaderData;
247  old_blkend = buffer.data + bytesRead;
248  old_break = old_cur + rewriteVmBytesPerPage;
249 
250  while (old_break <= old_blkend)
251  {
252  char *new_cur;
253  bool empty = true;
254  bool old_lastpart;
255 
256  /* First, copy old page header to new page */
257  memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
258 
259  /* Rewriting the last part of the last old page? */
260  old_lastpart = old_lastblk && (old_break == old_blkend);
261 
262  new_cur = new_vmbuf.data + SizeOfPageHeaderData;
263 
264  /* Process old page bytes one by one, and turn it into new page. */
265  while (old_cur < old_break)
266  {
267  uint8 byte = *(uint8 *) old_cur;
268  uint16 new_vmbits = 0;
269  int i;
270 
271  /* Generate new format bits while keeping old information */
272  for (i = 0; i < BITS_PER_BYTE; i++)
273  {
274  if (byte & (1 << i))
275  {
276  empty = false;
277  new_vmbits |=
279  }
280  }
281 
282  /* Copy new visibility map bytes to new-format page */
283  new_cur[0] = (char) (new_vmbits & 0xFF);
284  new_cur[1] = (char) (new_vmbits >> 8);
285 
286  old_cur++;
287  new_cur += BITS_PER_HEAPBLOCK;
288  }
289 
290  /* If the last part of the last page is empty, skip writing it */
291  if (old_lastpart && empty)
292  break;
293 
294  /* Set new checksum for visibility map page, if enabled */
296  ((PageHeader) new_vmbuf.data)->pd_checksum =
297  pg_checksum_page(new_vmbuf.data, new_blkno);
298 
299  errno = 0;
300  if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
301  {
302  /* if write didn't set errno, assume problem is no disk space */
303  if (errno == 0)
304  errno = ENOSPC;
305  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
306  schemaName, relName, tofile, strerror(errno));
307  }
308 
309  /* Advance for next new page */
310  old_break += rewriteVmBytesPerPage;
311  new_blkno++;
312  }
313  }
314 
315  /* Clean up */
316  close(dst_fd);
317  close(src_fd);
318 }
319 
320 void
322 {
323  char existing_file[MAXPGPATH];
324  char new_link_file[MAXPGPATH];
325 
326  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
327  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
328  unlink(new_link_file); /* might fail */
329 
330 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
331  if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
332  pg_fatal("could not clone file between old and new data directories: %s\n",
333  strerror(errno));
334 #elif defined(__linux__) && defined(FICLONE)
335  {
336  int src_fd;
337  int dest_fd;
338 
339  if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
340  pg_fatal("could not open file \"%s\": %s\n",
341  existing_file, strerror(errno));
342 
343  if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
344  pg_file_create_mode)) < 0)
345  pg_fatal("could not create file \"%s\": %s\n",
346  new_link_file, strerror(errno));
347 
348  if (ioctl(dest_fd, FICLONE, src_fd) < 0)
349  pg_fatal("could not clone file between old and new data directories: %s\n",
350  strerror(errno));
351 
352  close(src_fd);
353  close(dest_fd);
354  }
355 #else
356  pg_fatal("file cloning not supported on this platform\n");
357 #endif
358 
359  unlink(new_link_file);
360 }
361 
362 void
364 {
365  char existing_file[MAXPGPATH];
366  char new_link_file[MAXPGPATH];
367 
368  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
369  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
370  unlink(new_link_file); /* might fail */
371 
372  if (pg_link_file(existing_file, new_link_file) < 0)
373  pg_fatal("could not create hard link between old and new data directories: %s\n"
374  "In link mode the old and new data directories must be on the same file system.\n",
375  strerror(errno));
376 
377  unlink(new_link_file);
378 }
379 
380 #ifdef WIN32
381 /* implementation of pg_link_file() on Windows */
382 static int
383 win32_pghardlink(const char *src, const char *dst)
384 {
385  /*
386  * CreateHardLinkA returns zero for failure
387  * http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
388  */
389  if (CreateHardLinkA(dst, src, NULL) == 0)
390  {
391  _dosmaperr(GetLastError());
392  return -1;
393  }
394  else
395  return 0;
396 }
397 #endif
void cloneFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:42
void check_file_clone(void)
Definition: file.c:321
int pg_file_create_mode
Definition: file_perm.c:19
void check_hard_link(void)
Definition: file.c:363
#define pg_link_file
Definition: pg_upgrade.h:68
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
ControlData controldata
Definition: pg_upgrade.h:259
#define BITS_PER_BYTE
#define write(a, b, c)
Definition: win32.h:14
void rewriteVisibilityMap(const char *fromfile, const char *tofile, const char *schemaName, const char *relName)
Definition: file.c:177
unsigned char uint8
Definition: c.h:357
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define pg_fatal(...)
Definition: pg_rewind.h:41
uint32 BlockNumber
Definition: block.h:31
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define PG_BINARY
Definition: c.h:1222
void copyFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:82
ClusterInfo new_cluster
Definition: pg_upgrade.c:59
char data[BLCKSZ]
Definition: c.h:1091
#define COPY_BUF_SIZE
unsigned short uint16
Definition: c.h:358
#define MAXPGPATH
bool data_checksum_version
Definition: pg_upgrade.h:223
void linkFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:151
ClusterInfo old_cluster
Definition: pg_upgrade.c:59
#define BITS_PER_HEAPBLOCK
Definition: visibilitymap.h:23
#define byte(x, n)
Definition: rijndael.c:68
#define stat(a, b)
Definition: win32_port.h:255
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define strerror
Definition: port.h:205
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
char * pgdata
Definition: pg_upgrade.h:261
int i
#define close(a)
Definition: win32.h:12
#define snprintf
Definition: port.h:192
uint16 pg_checksum_page(char *page, BlockNumber blkno)
#define read(a, b, c)
Definition: win32.h:13