PostgreSQL Source Code  git master
file.c
Go to the documentation of this file.
1 /*
2  * file.c
3  *
4  * file system operations
5  *
6  * Copyright (c) 2010-2019, PostgreSQL Global Development Group
7  * src/bin/pg_upgrade/file.c
8  */
9 
10 #include "postgres_fe.h"
11 
12 #include "access/visibilitymap.h"
13 #include "common/file_perm.h"
14 #include "pg_upgrade.h"
15 #include "storage/bufpage.h"
16 #include "storage/checksum.h"
17 #include "storage/checksum_impl.h"
18 
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #ifdef HAVE_COPYFILE_H
22 #include <copyfile.h>
23 #endif
24 #ifdef __linux__
25 #include <sys/ioctl.h>
26 #include <linux/fs.h>
27 #endif
28 
29 
30 #ifdef WIN32
31 static int win32_pghardlink(const char *src, const char *dst);
32 #endif
33 
34 
35 /*
36  * cloneFile()
37  *
38  * Clones/reflinks a relation file from src to dst.
39  *
40  * schemaName/relName are relation's SQL name (used for error messages only).
41  */
42 void
43 cloneFile(const char *src, const char *dst,
44  const char *schemaName, const char *relName)
45 {
46 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
47  if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
48  pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
49  schemaName, relName, src, dst, strerror(errno));
50 #elif defined(__linux__) && defined(FICLONE)
51  int src_fd;
52  int dest_fd;
53 
54  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
55  pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
56  schemaName, relName, src, strerror(errno));
57 
58  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
59  pg_file_create_mode)) < 0)
60  pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
61  schemaName, relName, dst, strerror(errno));
62 
63  if (ioctl(dest_fd, FICLONE, src_fd) < 0)
64  {
65  unlink(dst);
66  pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
67  schemaName, relName, src, dst, strerror(errno));
68  }
69 
70  close(src_fd);
71  close(dest_fd);
72 #endif
73 }
74 
75 
76 /*
77  * copyFile()
78  *
79  * Copies a relation file from src to dst.
80  * schemaName/relName are relation's SQL name (used for error messages only).
81  */
82 void
83 copyFile(const char *src, const char *dst,
84  const char *schemaName, const char *relName)
85 {
86 #ifndef WIN32
87  int src_fd;
88  int dest_fd;
89  char *buffer;
90 
91  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
92  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
93  schemaName, relName, src, strerror(errno));
94 
95  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
96  pg_file_create_mode)) < 0)
97  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
98  schemaName, relName, dst, strerror(errno));
99 
100  /* copy in fairly large chunks for best efficiency */
101 #define COPY_BUF_SIZE (50 * BLCKSZ)
102 
103  buffer = (char *) pg_malloc(COPY_BUF_SIZE);
104 
105  /* perform data copying i.e read src source, write to destination */
106  while (true)
107  {
108  ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
109 
110  if (nbytes < 0)
111  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
112  schemaName, relName, src, strerror(errno));
113 
114  if (nbytes == 0)
115  break;
116 
117  errno = 0;
118  if (write(dest_fd, buffer, nbytes) != nbytes)
119  {
120  /* if write didn't set errno, assume problem is no disk space */
121  if (errno == 0)
122  errno = ENOSPC;
123  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
124  schemaName, relName, dst, strerror(errno));
125  }
126  }
127 
128  pg_free(buffer);
129  close(src_fd);
130  close(dest_fd);
131 
132 #else /* WIN32 */
133 
134  if (CopyFile(src, dst, true) == 0)
135  {
136  _dosmaperr(GetLastError());
137  pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
138  schemaName, relName, src, dst, strerror(errno));
139  }
140 
141 #endif /* WIN32 */
142 }
143 
144 
145 /*
146  * linkFile()
147  *
148  * Hard-links a relation file from src to dst.
149  * schemaName/relName are relation's SQL name (used for error messages only).
150  */
151 void
152 linkFile(const char *src, const char *dst,
153  const char *schemaName, const char *relName)
154 {
155  if (pg_link_file(src, dst) < 0)
156  pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
157  schemaName, relName, src, dst, strerror(errno));
158 }
159 
160 
161 /*
162  * rewriteVisibilityMap()
163  *
164  * Transform a visibility map file, copying from src to dst.
165  * schemaName/relName are relation's SQL name (used for error messages only).
166  *
167  * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
168  * visibility map included one bit per heap page; it now includes two.
169  * When upgrading a cluster from before that time to a current PostgreSQL
170  * version, we could refuse to copy visibility maps from the old cluster
171  * to the new cluster; the next VACUUM would recreate them, but at the
172  * price of scanning the entire table. So, instead, we rewrite the old
173  * visibility maps in the new format. That way, the all-visible bits
174  * remain set for the pages for which they were set previously. The
175  * all-frozen bits are never set by this conversion; we leave that to VACUUM.
176  */
177 void
178 rewriteVisibilityMap(const char *fromfile, const char *tofile,
179  const char *schemaName, const char *relName)
180 {
181  int src_fd;
182  int dst_fd;
183  PGAlignedBlock buffer;
184  PGAlignedBlock new_vmbuf;
185  ssize_t totalBytesRead = 0;
186  ssize_t src_filesize;
187  int rewriteVmBytesPerPage;
188  BlockNumber new_blkno = 0;
189  struct stat statbuf;
190 
191  /* Compute number of old-format bytes per new page */
192  rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
193 
194  if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
195  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
196  schemaName, relName, fromfile, strerror(errno));
197 
198  if (fstat(src_fd, &statbuf) != 0)
199  pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
200  schemaName, relName, fromfile, strerror(errno));
201 
202  if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
203  pg_file_create_mode)) < 0)
204  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
205  schemaName, relName, tofile, strerror(errno));
206 
207  /* Save old file size */
208  src_filesize = statbuf.st_size;
209 
210  /*
211  * Turn each visibility map page into 2 pages one by one. Each new page
212  * has the same page header as the old one. If the last section of the
213  * last page is empty, we skip it, mostly to avoid turning one-page
214  * visibility maps for small relations into two pages needlessly.
215  */
216  while (totalBytesRead < src_filesize)
217  {
218  ssize_t bytesRead;
219  char *old_cur;
220  char *old_break;
221  char *old_blkend;
222  PageHeaderData pageheader;
223  bool old_lastblk;
224 
225  if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
226  {
227  if (bytesRead < 0)
228  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
229  schemaName, relName, fromfile, strerror(errno));
230  else
231  pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
232  schemaName, relName, fromfile);
233  }
234 
235  totalBytesRead += BLCKSZ;
236  old_lastblk = (totalBytesRead == src_filesize);
237 
238  /* Save the page header data */
239  memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
240 
241  /*
242  * These old_* variables point to old visibility map page. old_cur
243  * points to current position on old page. old_blkend points to end of
244  * old block. old_break is the end+1 position on the old page for the
245  * data that will be transferred to the current new page.
246  */
247  old_cur = buffer.data + SizeOfPageHeaderData;
248  old_blkend = buffer.data + bytesRead;
249  old_break = old_cur + rewriteVmBytesPerPage;
250 
251  while (old_break <= old_blkend)
252  {
253  char *new_cur;
254  bool empty = true;
255  bool old_lastpart;
256 
257  /* First, copy old page header to new page */
258  memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
259 
260  /* Rewriting the last part of the last old page? */
261  old_lastpart = old_lastblk && (old_break == old_blkend);
262 
263  new_cur = new_vmbuf.data + SizeOfPageHeaderData;
264 
265  /* Process old page bytes one by one, and turn it into new page. */
266  while (old_cur < old_break)
267  {
268  uint8 byte = *(uint8 *) old_cur;
269  uint16 new_vmbits = 0;
270  int i;
271 
272  /* Generate new format bits while keeping old information */
273  for (i = 0; i < BITS_PER_BYTE; i++)
274  {
275  if (byte & (1 << i))
276  {
277  empty = false;
278  new_vmbits |=
280  }
281  }
282 
283  /* Copy new visibility map bytes to new-format page */
284  new_cur[0] = (char) (new_vmbits & 0xFF);
285  new_cur[1] = (char) (new_vmbits >> 8);
286 
287  old_cur++;
288  new_cur += BITS_PER_HEAPBLOCK;
289  }
290 
291  /* If the last part of the last page is empty, skip writing it */
292  if (old_lastpart && empty)
293  break;
294 
295  /* Set new checksum for visibility map page, if enabled */
297  ((PageHeader) new_vmbuf.data)->pd_checksum =
298  pg_checksum_page(new_vmbuf.data, new_blkno);
299 
300  errno = 0;
301  if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
302  {
303  /* if write didn't set errno, assume problem is no disk space */
304  if (errno == 0)
305  errno = ENOSPC;
306  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
307  schemaName, relName, tofile, strerror(errno));
308  }
309 
310  /* Advance for next new page */
311  old_break += rewriteVmBytesPerPage;
312  new_blkno++;
313  }
314  }
315 
316  /* Clean up */
317  close(dst_fd);
318  close(src_fd);
319 }
320 
321 void
323 {
324  char existing_file[MAXPGPATH];
325  char new_link_file[MAXPGPATH];
326 
327  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
328  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
329  unlink(new_link_file); /* might fail */
330 
331 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
332  if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
333  pg_fatal("could not clone file between old and new data directories: %s\n",
334  strerror(errno));
335 #elif defined(__linux__) && defined(FICLONE)
336  {
337  int src_fd;
338  int dest_fd;
339 
340  if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
341  pg_fatal("could not open file \"%s\": %s\n",
342  existing_file, strerror(errno));
343 
344  if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
345  pg_file_create_mode)) < 0)
346  pg_fatal("could not create file \"%s\": %s\n",
347  new_link_file, strerror(errno));
348 
349  if (ioctl(dest_fd, FICLONE, src_fd) < 0)
350  pg_fatal("could not clone file between old and new data directories: %s\n",
351  strerror(errno));
352 
353  close(src_fd);
354  close(dest_fd);
355  }
356 #else
357  pg_fatal("file cloning not supported on this platform\n");
358 #endif
359 
360  unlink(new_link_file);
361 }
362 
363 void
365 {
366  char existing_file[MAXPGPATH];
367  char new_link_file[MAXPGPATH];
368 
369  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
370  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
371  unlink(new_link_file); /* might fail */
372 
373  if (pg_link_file(existing_file, new_link_file) < 0)
374  pg_fatal("could not create hard link between old and new data directories: %s\n"
375  "In link mode the old and new data directories must be on the same file system.\n",
376  strerror(errno));
377 
378  unlink(new_link_file);
379 }
380 
381 #ifdef WIN32
382 /* implementation of pg_link_file() on Windows */
383 static int
384 win32_pghardlink(const char *src, const char *dst)
385 {
386  /*
387  * CreateHardLinkA returns zero for failure
388  * http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
389  */
390  if (CreateHardLinkA(dst, src, NULL) == 0)
391  {
392  _dosmaperr(GetLastError());
393  return -1;
394  }
395  else
396  return 0;
397 }
398 #endif
void cloneFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:43
void check_file_clone(void)
Definition: file.c:322
int pg_file_create_mode
Definition: file_perm.c:19
void check_hard_link(void)
Definition: file.c:364
#define pg_link_file
Definition: pg_upgrade.h:68
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
ControlData controldata
Definition: pg_upgrade.h:259
#define BITS_PER_BYTE
#define write(a, b, c)
Definition: win32.h:14
void rewriteVisibilityMap(const char *fromfile, const char *tofile, const char *schemaName, const char *relName)
Definition: file.c:178
unsigned char uint8
Definition: c.h:356
void _dosmaperr(unsigned long)
Definition: win32error.c:171
#define pg_fatal(...)
Definition: pg_rewind.h:43
uint32 BlockNumber
Definition: block.h:31
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define PG_BINARY
Definition: c.h:1191
void copyFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:83
ClusterInfo new_cluster
Definition: pg_upgrade.c:59
char data[BLCKSZ]
Definition: c.h:1060
#define COPY_BUF_SIZE
unsigned short uint16
Definition: c.h:357
#define MAXPGPATH
bool data_checksum_version
Definition: pg_upgrade.h:223
void linkFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:152
ClusterInfo old_cluster
Definition: pg_upgrade.c:59
#define BITS_PER_HEAPBLOCK
Definition: visibilitymap.h:23
#define byte(x, n)
Definition: rijndael.c:68
#define stat(a, b)
Definition: win32_port.h:255
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define strerror
Definition: port.h:205
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
char * pgdata
Definition: pg_upgrade.h:261
int i
#define close(a)
Definition: win32.h:12
#define snprintf
Definition: port.h:192
uint16 pg_checksum_page(char *page, BlockNumber blkno)
#define read(a, b, c)
Definition: win32.h:13