PostgreSQL Source Code  git master
file.c
Go to the documentation of this file.
1 /*
2  * file.c
3  *
4  * file system operations
5  *
6  * Copyright (c) 2010-2024, PostgreSQL Global Development Group
7  * src/bin/pg_upgrade/file.c
8  */
9 
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <limits.h>
14 #include <fcntl.h>
15 #ifdef HAVE_COPYFILE_H
16 #include <copyfile.h>
17 #endif
18 #ifdef __linux__
19 #include <sys/ioctl.h>
20 #include <linux/fs.h>
21 #endif
22 
24 #include "common/file_perm.h"
25 #include "pg_upgrade.h"
26 #include "storage/bufpage.h"
27 #include "storage/checksum.h"
28 #include "storage/checksum_impl.h"
29 
30 
31 /*
32  * cloneFile()
33  *
34  * Clones/reflinks a relation file from src to dst.
35  *
36  * schemaName/relName are relation's SQL name (used for error messages only).
37  */
38 void
39 cloneFile(const char *src, const char *dst,
40  const char *schemaName, const char *relName)
41 {
42 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
43  if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
44  pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
45  schemaName, relName, src, dst);
46 #elif defined(__linux__) && defined(FICLONE)
47  int src_fd;
48  int dest_fd;
49 
50  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
51  pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %m",
52  schemaName, relName, src);
53 
54  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
55  pg_file_create_mode)) < 0)
56  pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %m",
57  schemaName, relName, dst);
58 
59  if (ioctl(dest_fd, FICLONE, src_fd) < 0)
60  {
61  int save_errno = errno;
62 
63  unlink(dst);
64 
65  pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
66  schemaName, relName, src, dst, strerror(save_errno));
67  }
68 
69  close(src_fd);
70  close(dest_fd);
71 #endif
72 }
73 
74 
75 /*
76  * copyFile()
77  *
78  * Copies a relation file from src to dst.
79  * schemaName/relName are relation's SQL name (used for error messages only).
80  */
81 void
82 copyFile(const char *src, const char *dst,
83  const char *schemaName, const char *relName)
84 {
85 #ifndef WIN32
86  int src_fd;
87  int dest_fd;
88  char *buffer;
89 
90  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
91  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
92  schemaName, relName, src);
93 
94  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
95  pg_file_create_mode)) < 0)
96  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
97  schemaName, relName, dst);
98 
99  /* copy in fairly large chunks for best efficiency */
100 #define COPY_BUF_SIZE (50 * BLCKSZ)
101 
102  buffer = (char *) pg_malloc(COPY_BUF_SIZE);
103 
104  /* perform data copying i.e read src source, write to destination */
105  while (true)
106  {
107  ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
108 
109  if (nbytes < 0)
110  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
111  schemaName, relName, src);
112 
113  if (nbytes == 0)
114  break;
115 
116  errno = 0;
117  if (write(dest_fd, buffer, nbytes) != nbytes)
118  {
119  /* if write didn't set errno, assume problem is no disk space */
120  if (errno == 0)
121  errno = ENOSPC;
122  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
123  schemaName, relName, dst);
124  }
125  }
126 
127  pg_free(buffer);
128  close(src_fd);
129  close(dest_fd);
130 
131 #else /* WIN32 */
132 
133  if (CopyFile(src, dst, true) == 0)
134  {
135  _dosmaperr(GetLastError());
136  pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
137  schemaName, relName, src, dst);
138  }
139 
140 #endif /* WIN32 */
141 }
142 
143 
144 /*
145  * copyFileByRange()
146  *
147  * Copies a relation file from src to dst.
148  * schemaName/relName are relation's SQL name (used for error messages only).
149  */
150 void
151 copyFileByRange(const char *src, const char *dst,
152  const char *schemaName, const char *relName)
153 {
154 #ifdef HAVE_COPY_FILE_RANGE
155  int src_fd;
156  int dest_fd;
157  ssize_t nbytes;
158 
159  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
160  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
161  schemaName, relName, src);
162 
163  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
164  pg_file_create_mode)) < 0)
165  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
166  schemaName, relName, dst);
167 
168  do
169  {
170  nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
171  if (nbytes < 0)
172  pg_fatal("error while copying relation \"%s.%s\": could not copy file range from \"%s\" to \"%s\": %m",
173  schemaName, relName, src, dst);
174  }
175  while (nbytes > 0);
176 
177  close(src_fd);
178  close(dest_fd);
179 #endif
180 }
181 
182 
183 /*
184  * linkFile()
185  *
186  * Hard-links a relation file from src to dst.
187  * schemaName/relName are relation's SQL name (used for error messages only).
188  */
189 void
190 linkFile(const char *src, const char *dst,
191  const char *schemaName, const char *relName)
192 {
193  if (link(src, dst) < 0)
194  pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
195  schemaName, relName, src, dst);
196 }
197 
198 
199 /*
200  * rewriteVisibilityMap()
201  *
202  * Transform a visibility map file, copying from src to dst.
203  * schemaName/relName are relation's SQL name (used for error messages only).
204  *
205  * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
206  * visibility map included one bit per heap page; it now includes two.
207  * When upgrading a cluster from before that time to a current PostgreSQL
208  * version, we could refuse to copy visibility maps from the old cluster
209  * to the new cluster; the next VACUUM would recreate them, but at the
210  * price of scanning the entire table. So, instead, we rewrite the old
211  * visibility maps in the new format. That way, the all-visible bits
212  * remain set for the pages for which they were set previously. The
213  * all-frozen bits are never set by this conversion; we leave that to VACUUM.
214  */
215 void
216 rewriteVisibilityMap(const char *fromfile, const char *tofile,
217  const char *schemaName, const char *relName)
218 {
219  int src_fd;
220  int dst_fd;
221  PGIOAlignedBlock buffer;
222  PGIOAlignedBlock new_vmbuf;
223  ssize_t totalBytesRead = 0;
224  ssize_t src_filesize;
225  int rewriteVmBytesPerPage;
226  BlockNumber new_blkno = 0;
227  struct stat statbuf;
228 
229  /* Compute number of old-format bytes per new page */
230  rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
231 
232  if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
233  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
234  schemaName, relName, fromfile);
235 
236  if (fstat(src_fd, &statbuf) != 0)
237  pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %m",
238  schemaName, relName, fromfile);
239 
240  if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
241  pg_file_create_mode)) < 0)
242  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
243  schemaName, relName, tofile);
244 
245  /* Save old file size */
246  src_filesize = statbuf.st_size;
247 
248  /*
249  * Turn each visibility map page into 2 pages one by one. Each new page
250  * has the same page header as the old one. If the last section of the
251  * last page is empty, we skip it, mostly to avoid turning one-page
252  * visibility maps for small relations into two pages needlessly.
253  */
254  while (totalBytesRead < src_filesize)
255  {
256  ssize_t bytesRead;
257  char *old_cur;
258  char *old_break;
259  char *old_blkend;
260  PageHeaderData pageheader;
261  bool old_lastblk;
262 
263  if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
264  {
265  if (bytesRead < 0)
266  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
267  schemaName, relName, fromfile);
268  else
269  pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
270  schemaName, relName, fromfile);
271  }
272 
273  totalBytesRead += BLCKSZ;
274  old_lastblk = (totalBytesRead == src_filesize);
275 
276  /* Save the page header data */
277  memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
278 
279  /*
280  * These old_* variables point to old visibility map page. old_cur
281  * points to current position on old page. old_blkend points to end of
282  * old block. old_break is the end+1 position on the old page for the
283  * data that will be transferred to the current new page.
284  */
285  old_cur = buffer.data + SizeOfPageHeaderData;
286  old_blkend = buffer.data + bytesRead;
287  old_break = old_cur + rewriteVmBytesPerPage;
288 
289  while (old_break <= old_blkend)
290  {
291  char *new_cur;
292  bool empty = true;
293  bool old_lastpart;
294 
295  /* First, copy old page header to new page */
296  memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
297 
298  /* Rewriting the last part of the last old page? */
299  old_lastpart = old_lastblk && (old_break == old_blkend);
300 
301  new_cur = new_vmbuf.data + SizeOfPageHeaderData;
302 
303  /* Process old page bytes one by one, and turn it into new page. */
304  while (old_cur < old_break)
305  {
306  uint8 byte = *(uint8 *) old_cur;
307  uint16 new_vmbits = 0;
308  int i;
309 
310  /* Generate new format bits while keeping old information */
311  for (i = 0; i < BITS_PER_BYTE; i++)
312  {
313  if (byte & (1 << i))
314  {
315  empty = false;
316  new_vmbits |=
318  }
319  }
320 
321  /* Copy new visibility map bytes to new-format page */
322  new_cur[0] = (char) (new_vmbits & 0xFF);
323  new_cur[1] = (char) (new_vmbits >> 8);
324 
325  old_cur++;
326  new_cur += BITS_PER_HEAPBLOCK;
327  }
328 
329  /* If the last part of the last page is empty, skip writing it */
330  if (old_lastpart && empty)
331  break;
332 
333  /* Set new checksum for visibility map page, if enabled */
335  ((PageHeader) new_vmbuf.data)->pd_checksum =
336  pg_checksum_page(new_vmbuf.data, new_blkno);
337 
338  errno = 0;
339  if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
340  {
341  /* if write didn't set errno, assume problem is no disk space */
342  if (errno == 0)
343  errno = ENOSPC;
344  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
345  schemaName, relName, tofile);
346  }
347 
348  /* Advance for next new page */
349  old_break += rewriteVmBytesPerPage;
350  new_blkno++;
351  }
352  }
353 
354  /* Clean up */
355  close(dst_fd);
356  close(src_fd);
357 }
358 
359 void
361 {
362  char existing_file[MAXPGPATH];
363  char new_link_file[MAXPGPATH];
364 
365  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
366  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
367  unlink(new_link_file); /* might fail */
368 
369 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
370  if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
371  pg_fatal("could not clone file between old and new data directories: %m");
372 #elif defined(__linux__) && defined(FICLONE)
373  {
374  int src_fd;
375  int dest_fd;
376 
377  if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
378  pg_fatal("could not open file \"%s\": %m",
379  existing_file);
380 
381  if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
382  pg_file_create_mode)) < 0)
383  pg_fatal("could not create file \"%s\": %m",
384  new_link_file);
385 
386  if (ioctl(dest_fd, FICLONE, src_fd) < 0)
387  pg_fatal("could not clone file between old and new data directories: %m");
388 
389  close(src_fd);
390  close(dest_fd);
391  }
392 #else
393  pg_fatal("file cloning not supported on this platform");
394 #endif
395 
396  unlink(new_link_file);
397 }
398 
399 void
401 {
402  char existing_file[MAXPGPATH];
403  char new_link_file[MAXPGPATH];
404 
405  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
406  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.copy_file_range_test", new_cluster.pgdata);
407  unlink(new_link_file); /* might fail */
408 
409 #if defined(HAVE_COPY_FILE_RANGE)
410  {
411  int src_fd;
412  int dest_fd;
413 
414  if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
415  pg_fatal("could not open file \"%s\": %m",
416  existing_file);
417 
418  if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
419  pg_file_create_mode)) < 0)
420  pg_fatal("could not create file \"%s\": %m",
421  new_link_file);
422 
423  if (copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0) < 0)
424  pg_fatal("could not copy file range between old and new data directories: %m");
425 
426  close(src_fd);
427  close(dest_fd);
428  }
429 #else
430  pg_fatal("copy_file_range not supported on this platform");
431 #endif
432 
433  unlink(new_link_file);
434 }
435 
436 void
438 {
439  char existing_file[MAXPGPATH];
440  char new_link_file[MAXPGPATH];
441 
442  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
443  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
444  unlink(new_link_file); /* might fail */
445 
446  if (link(existing_file, new_link_file) < 0)
447  pg_fatal("could not create hard link between old and new data directories: %m\n"
448  "In link mode the old and new data directories must be on the same file system.");
449 
450  unlink(new_link_file);
451 }
uint32 BlockNumber
Definition: block.h:31
PageHeaderData * PageHeader
Definition: bufpage.h:173
#define SizeOfPageHeaderData
Definition: bufpage.h:216
unsigned short uint16
Definition: c.h:491
#define PG_BINARY
Definition: c.h:1252
unsigned char uint8
Definition: c.h:490
uint16 pg_checksum_page(char *page, BlockNumber blkno)
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
void linkFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:190
void check_file_clone(void)
Definition: file.c:360
void rewriteVisibilityMap(const char *fromfile, const char *tofile, const char *schemaName, const char *relName)
Definition: file.c:216
void cloneFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:39
void copyFileByRange(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:151
void check_hard_link(void)
Definition: file.c:437
void copyFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:82
void check_copy_file_range(void)
Definition: file.c:400
#define COPY_BUF_SIZE
int pg_file_create_mode
Definition: file_perm.c:19
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
int i
Definition: isn.c:72
#define pg_fatal(...)
#define BITS_PER_BYTE
#define MAXPGPATH
ClusterInfo new_cluster
Definition: pg_upgrade.c:68
ClusterInfo old_cluster
Definition: pg_upgrade.c:67
#define strerror
Definition: port.h:251
#define snprintf
Definition: port.h:238
char * pgdata
Definition: pg_upgrade.h:285
ControlData controldata
Definition: pg_upgrade.h:282
uint32 data_checksum_version
Definition: pg_upgrade.h:247
__int64 st_size
Definition: win32_port.h:273
char data[BLCKSZ]
Definition: c.h:1116
#define BITS_PER_HEAPBLOCK
#define VISIBILITYMAP_ALL_VISIBLE
void _dosmaperr(unsigned long)
Definition: win32error.c:177
#define fstat
Definition: win32_port.h:283