PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
file.c
Go to the documentation of this file.
1 /*
2  * file.c
3  *
4  * file system operations
5  *
6  * Copyright (c) 2010-2017, PostgreSQL Global Development Group
7  * src/bin/pg_upgrade/file.c
8  */
9 
10 #include "postgres_fe.h"
11 
12 #include "access/visibilitymap.h"
13 #include "pg_upgrade.h"
14 #include "storage/bufpage.h"
15 #include "storage/checksum.h"
16 #include "storage/checksum_impl.h"
17 
18 #include <sys/stat.h>
19 #include <fcntl.h>
20 
21 
22 #ifdef WIN32
23 static int win32_pghardlink(const char *src, const char *dst);
24 #endif
25 
26 
27 /*
28  * copyFile()
29  *
30  * Copies a relation file from src to dst.
31  * schemaName/relName are relation's SQL name (used for error messages only).
32  */
33 void
34 copyFile(const char *src, const char *dst,
35  const char *schemaName, const char *relName)
36 {
37 #ifndef WIN32
38  int src_fd;
39  int dest_fd;
40  char *buffer;
41 
42  if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
43  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
44  schemaName, relName, src, strerror(errno));
45 
46  if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
47  S_IRUSR | S_IWUSR)) < 0)
48  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
49  schemaName, relName, dst, strerror(errno));
50 
51  /* copy in fairly large chunks for best efficiency */
52 #define COPY_BUF_SIZE (50 * BLCKSZ)
53 
54  buffer = (char *) pg_malloc(COPY_BUF_SIZE);
55 
56  /* perform data copying i.e read src source, write to destination */
57  while (true)
58  {
59  ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
60 
61  if (nbytes < 0)
62  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
63  schemaName, relName, src, strerror(errno));
64 
65  if (nbytes == 0)
66  break;
67 
68  errno = 0;
69  if (write(dest_fd, buffer, nbytes) != nbytes)
70  {
71  /* if write didn't set errno, assume problem is no disk space */
72  if (errno == 0)
73  errno = ENOSPC;
74  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
75  schemaName, relName, dst, strerror(errno));
76  }
77  }
78 
79  pg_free(buffer);
80  close(src_fd);
81  close(dest_fd);
82 
83 #else /* WIN32 */
84 
85  if (CopyFile(src, dst, true) == 0)
86  {
87  _dosmaperr(GetLastError());
88  pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
89  schemaName, relName, src, dst, strerror(errno));
90  }
91 
92 #endif /* WIN32 */
93 }
94 
95 
96 /*
97  * linkFile()
98  *
99  * Hard-links a relation file from src to dst.
100  * schemaName/relName are relation's SQL name (used for error messages only).
101  */
102 void
103 linkFile(const char *src, const char *dst,
104  const char *schemaName, const char *relName)
105 {
106  if (pg_link_file(src, dst) < 0)
107  pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
108  schemaName, relName, src, dst, strerror(errno));
109 }
110 
111 
112 /*
113  * rewriteVisibilityMap()
114  *
115  * Transform a visibility map file, copying from src to dst.
116  * schemaName/relName are relation's SQL name (used for error messages only).
117  *
118  * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
119  * visibility map included one bit per heap page; it now includes two.
120  * When upgrading a cluster from before that time to a current PostgreSQL
121  * version, we could refuse to copy visibility maps from the old cluster
122  * to the new cluster; the next VACUUM would recreate them, but at the
123  * price of scanning the entire table. So, instead, we rewrite the old
124  * visibility maps in the new format. That way, the all-visible bits
125  * remain set for the pages for which they were set previously. The
126  * all-frozen bits are never set by this conversion; we leave that to VACUUM.
127  */
128 void
129 rewriteVisibilityMap(const char *fromfile, const char *tofile,
130  const char *schemaName, const char *relName)
131 {
132  int src_fd;
133  int dst_fd;
134  char *buffer;
135  char *new_vmbuf;
136  ssize_t totalBytesRead = 0;
137  ssize_t src_filesize;
138  int rewriteVmBytesPerPage;
139  BlockNumber new_blkno = 0;
140  struct stat statbuf;
141 
142  /* Compute number of old-format bytes per new page */
143  rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
144 
145  if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
146  pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
147  schemaName, relName, fromfile, strerror(errno));
148 
149  if (fstat(src_fd, &statbuf) != 0)
150  pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
151  schemaName, relName, fromfile, strerror(errno));
152 
153  if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
154  S_IRUSR | S_IWUSR)) < 0)
155  pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
156  schemaName, relName, tofile, strerror(errno));
157 
158  /* Save old file size */
159  src_filesize = statbuf.st_size;
160 
161  /*
162  * Malloc the work buffers, rather than making them local arrays, to
163  * ensure adequate alignment.
164  */
165  buffer = (char *) pg_malloc(BLCKSZ);
166  new_vmbuf = (char *) pg_malloc(BLCKSZ);
167 
168  /*
169  * Turn each visibility map page into 2 pages one by one. Each new page
170  * has the same page header as the old one. If the last section of the
171  * last page is empty, we skip it, mostly to avoid turning one-page
172  * visibility maps for small relations into two pages needlessly.
173  */
174  while (totalBytesRead < src_filesize)
175  {
176  ssize_t bytesRead;
177  char *old_cur;
178  char *old_break;
179  char *old_blkend;
180  PageHeaderData pageheader;
181  bool old_lastblk;
182 
183  if ((bytesRead = read(src_fd, buffer, BLCKSZ)) != BLCKSZ)
184  {
185  if (bytesRead < 0)
186  pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
187  schemaName, relName, fromfile, strerror(errno));
188  else
189  pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
190  schemaName, relName, fromfile);
191  }
192 
193  totalBytesRead += BLCKSZ;
194  old_lastblk = (totalBytesRead == src_filesize);
195 
196  /* Save the page header data */
197  memcpy(&pageheader, buffer, SizeOfPageHeaderData);
198 
199  /*
200  * These old_* variables point to old visibility map page. old_cur
201  * points to current position on old page. old_blkend points to end of
202  * old block. old_break is the end+1 position on the old page for the
203  * data that will be transferred to the current new page.
204  */
205  old_cur = buffer + SizeOfPageHeaderData;
206  old_blkend = buffer + bytesRead;
207  old_break = old_cur + rewriteVmBytesPerPage;
208 
209  while (old_break <= old_blkend)
210  {
211  char *new_cur;
212  bool empty = true;
213  bool old_lastpart;
214 
215  /* First, copy old page header to new page */
216  memcpy(new_vmbuf, &pageheader, SizeOfPageHeaderData);
217 
218  /* Rewriting the last part of the last old page? */
219  old_lastpart = old_lastblk && (old_break == old_blkend);
220 
221  new_cur = new_vmbuf + SizeOfPageHeaderData;
222 
223  /* Process old page bytes one by one, and turn it into new page. */
224  while (old_cur < old_break)
225  {
226  uint8 byte = *(uint8 *) old_cur;
227  uint16 new_vmbits = 0;
228  int i;
229 
230  /* Generate new format bits while keeping old information */
231  for (i = 0; i < BITS_PER_BYTE; i++)
232  {
233  if (byte & (1 << i))
234  {
235  empty = false;
236  new_vmbits |=
238  }
239  }
240 
241  /* Copy new visibility map bytes to new-format page */
242  new_cur[0] = (char) (new_vmbits & 0xFF);
243  new_cur[1] = (char) (new_vmbits >> 8);
244 
245  old_cur++;
246  new_cur += BITS_PER_HEAPBLOCK;
247  }
248 
249  /* If the last part of the last page is empty, skip writing it */
250  if (old_lastpart && empty)
251  break;
252 
253  /* Set new checksum for visibility map page, if enabled */
255  ((PageHeader) new_vmbuf)->pd_checksum =
256  pg_checksum_page(new_vmbuf, new_blkno);
257 
258  errno = 0;
259  if (write(dst_fd, new_vmbuf, BLCKSZ) != BLCKSZ)
260  {
261  /* if write didn't set errno, assume problem is no disk space */
262  if (errno == 0)
263  errno = ENOSPC;
264  pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
265  schemaName, relName, tofile, strerror(errno));
266  }
267 
268  /* Advance for next new page */
269  old_break += rewriteVmBytesPerPage;
270  new_blkno++;
271  }
272  }
273 
274  /* Clean up */
275  pg_free(buffer);
276  pg_free(new_vmbuf);
277  close(dst_fd);
278  close(src_fd);
279 }
280 
281 void
283 {
284  char existing_file[MAXPGPATH];
285  char new_link_file[MAXPGPATH];
286 
287  snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
288  snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
289  unlink(new_link_file); /* might fail */
290 
291  if (pg_link_file(existing_file, new_link_file) < 0)
292  pg_fatal("could not create hard link between old and new data directories: %s\n"
293  "In link mode the old and new data directories must be on the same file system volume.\n",
294  strerror(errno));
295 
296  unlink(new_link_file);
297 }
298 
299 #ifdef WIN32
300 /* implementation of pg_link_file() on Windows */
301 static int
302 win32_pghardlink(const char *src, const char *dst)
303 {
304  /*
305  * CreateHardLinkA returns zero for failure
306  * http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
307  */
308  if (CreateHardLinkA(dst, src, NULL) == 0)
309  {
310  _dosmaperr(GetLastError());
311  return -1;
312  }
313  else
314  return 0;
315 }
316 #endif
317 
318 
319 /* fopen() file with no group/other permissions */
320 FILE *
321 fopen_priv(const char *path, const char *mode)
322 {
323  mode_t old_umask = umask(S_IRWXG | S_IRWXO);
324  FILE *fp;
325 
326  fp = fopen(path, mode);
327 
328  umask(old_umask); /* we assume this can't change errno */
329 
330  return fp;
331 }
void check_hard_link(void)
Definition: file.c:282
#define pg_link_file
Definition: pg_upgrade.h:74
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
ControlData controldata
Definition: pg_upgrade.h:260
#define BITS_PER_BYTE
#define write(a, b, c)
Definition: win32.h:14
void rewriteVisibilityMap(const char *fromfile, const char *tofile, const char *schemaName, const char *relName)
Definition: file.c:129
unsigned char uint8
Definition: c.h:266
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
uint32 BlockNumber
Definition: block.h:31
void pg_fatal(const char *fmt,...)
Definition: logging.c:83
#define SizeOfPageHeaderData
Definition: bufpage.h:212
FILE * fopen_priv(const char *path, const char *mode)
Definition: file.c:321
#define PG_BINARY
Definition: c.h:1038
void copyFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:34
ClusterInfo new_cluster
Definition: pg_upgrade.c:56
#define COPY_BUF_SIZE
unsigned short uint16
Definition: c.h:267
#define MAXPGPATH
bool data_checksum_version
Definition: pg_upgrade.h:225
void linkFile(const char *src, const char *dst, const char *schemaName, const char *relName)
Definition: file.c:103
ClusterInfo old_cluster
Definition: pg_upgrade.c:56
int unlink(const char *filename)
#define BITS_PER_HEAPBLOCK
Definition: visibilitymap.h:23
#define byte(x, n)
Definition: rijndael.c:68
#define S_IRWXO
Definition: win32.h:455
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define NULL
Definition: c.h:229
void _dosmaperr(unsigned long)
Definition: win32error.c:171
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
#define S_IRWXG
Definition: win32.h:451
char * pgdata
Definition: pg_upgrade.h:262
int i
const char * strerror(int errnum)
Definition: strerror.c:19
#define close(a)
Definition: win32.h:12
uint16 pg_checksum_page(char *page, BlockNumber blkno)
#define read(a, b, c)
Definition: win32.h:13