PostgreSQL Source Code  git master
reinit.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * reinit.c
4  * Reinitialization of unlogged relations
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/storage/file/reinit.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include <unistd.h>
18 
19 #include "common/relpath.h"
20 #include "storage/copydir.h"
21 #include "storage/fd.h"
22 #include "storage/reinit.h"
23 #include "utils/hsearch.h"
24 #include "utils/memutils.h"
25 
26 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
27  int op);
28 static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
29  int op);
30 
31 typedef struct
32 {
33  char oid[OIDCHARS + 1];
35 
36 /*
37  * Reset unlogged relations from before the last restart.
38  *
39  * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
40  * relation with an "init" fork, except for the "init" fork itself.
41  *
42  * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
43  * fork.
44  */
45 void
47 {
48  char temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
49  DIR *spc_dir;
50  struct dirent *spc_de;
51  MemoryContext tmpctx,
52  oldctx;
53 
54  /* Log it. */
55  elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
56  (op & UNLOGGED_RELATION_CLEANUP) != 0,
57  (op & UNLOGGED_RELATION_INIT) != 0);
58 
59  /*
60  * Just to be sure we don't leak any memory, let's create a temporary
61  * memory context for this operation.
62  */
64  "ResetUnloggedRelations",
66  oldctx = MemoryContextSwitchTo(tmpctx);
67 
68  /*
69  * First process unlogged files in pg_default ($PGDATA/base)
70  */
72 
73  /*
74  * Cycle through directories for all non-default tablespaces.
75  */
76  spc_dir = AllocateDir("pg_tblspc");
77 
78  while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
79  {
80  if (strcmp(spc_de->d_name, ".") == 0 ||
81  strcmp(spc_de->d_name, "..") == 0)
82  continue;
83 
84  snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
87  }
88 
89  FreeDir(spc_dir);
90 
91  /*
92  * Restore memory context.
93  */
94  MemoryContextSwitchTo(oldctx);
95  MemoryContextDelete(tmpctx);
96 }
97 
98 /*
99  * Process one tablespace directory for ResetUnloggedRelations
100  */
101 static void
102 ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
103 {
104  DIR *ts_dir;
105  struct dirent *de;
106  char dbspace_path[MAXPGPATH * 2];
107 
108  ts_dir = AllocateDir(tsdirname);
109 
110  /*
111  * If we get ENOENT on a tablespace directory, log it and return. This
112  * can happen if a previous DROP TABLESPACE crashed between removing the
113  * tablespace directory and removing the symlink in pg_tblspc. We don't
114  * really want to prevent database startup in that scenario, so let it
115  * pass instead. Any other type of error will be reported by ReadDir
116  * (causing a startup failure).
117  */
118  if (ts_dir == NULL && errno == ENOENT)
119  {
120  ereport(LOG,
122  errmsg("could not open directory \"%s\": %m",
123  tsdirname)));
124  return;
125  }
126 
127  while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
128  {
129  /*
130  * We're only interested in the per-database directories, which have
131  * numeric names. Note that this code will also (properly) ignore "."
132  * and "..".
133  */
134  if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
135  continue;
136 
137  snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
138  tsdirname, de->d_name);
139  ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
140  }
141 
142  FreeDir(ts_dir);
143 }
144 
145 /*
146  * Process one per-dbspace directory for ResetUnloggedRelations
147  */
148 static void
149 ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
150 {
151  DIR *dbspace_dir;
152  struct dirent *de;
153  char rm_path[MAXPGPATH * 2];
154 
155  /* Caller must specify at least one operation. */
157 
158  /*
159  * Cleanup is a two-pass operation. First, we go through and identify all
160  * the files with init forks. Then, we go through again and nuke
161  * everything with the same OID except the init fork.
162  */
163  if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
164  {
165  HTAB *hash;
166  HASHCTL ctl;
167 
168  /*
169  * It's possible that someone could create a ton of unlogged relations
170  * in the same database & tablespace, so we'd better use a hash table
171  * rather than an array or linked list to keep track of which files
172  * need to be reset. Otherwise, this cleanup operation would be
173  * O(n^2).
174  */
175  memset(&ctl, 0, sizeof(ctl));
176  ctl.keysize = sizeof(unlogged_relation_entry);
177  ctl.entrysize = sizeof(unlogged_relation_entry);
178  hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
179 
180  /* Scan the directory. */
181  dbspace_dir = AllocateDir(dbspacedirname);
182  while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
183  {
184  ForkNumber forkNum;
185  int oidchars;
187 
188  /* Skip anything that doesn't look like a relation data file. */
189  if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
190  &forkNum))
191  continue;
192 
193  /* Also skip it unless this is the init fork. */
194  if (forkNum != INIT_FORKNUM)
195  continue;
196 
197  /*
198  * Put the OID portion of the name into the hash table, if it
199  * isn't already.
200  */
201  memset(ent.oid, 0, sizeof(ent.oid));
202  memcpy(ent.oid, de->d_name, oidchars);
203  hash_search(hash, &ent, HASH_ENTER, NULL);
204  }
205 
206  /* Done with the first pass. */
207  FreeDir(dbspace_dir);
208 
209  /*
210  * If we didn't find any init forks, there's no point in continuing;
211  * we can bail out now.
212  */
213  if (hash_get_num_entries(hash) == 0)
214  {
215  hash_destroy(hash);
216  return;
217  }
218 
219  /*
220  * Now, make a second pass and remove anything that matches.
221  */
222  dbspace_dir = AllocateDir(dbspacedirname);
223  while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
224  {
225  ForkNumber forkNum;
226  int oidchars;
227  bool found;
229 
230  /* Skip anything that doesn't look like a relation data file. */
231  if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
232  &forkNum))
233  continue;
234 
235  /* We never remove the init fork. */
236  if (forkNum == INIT_FORKNUM)
237  continue;
238 
239  /*
240  * See whether the OID portion of the name shows up in the hash
241  * table.
242  */
243  memset(ent.oid, 0, sizeof(ent.oid));
244  memcpy(ent.oid, de->d_name, oidchars);
245  hash_search(hash, &ent, HASH_FIND, &found);
246 
247  /* If so, nuke it! */
248  if (found)
249  {
250  snprintf(rm_path, sizeof(rm_path), "%s/%s",
251  dbspacedirname, de->d_name);
252  if (unlink(rm_path) < 0)
253  ereport(ERROR,
255  errmsg("could not remove file \"%s\": %m",
256  rm_path)));
257  else
258  elog(DEBUG2, "unlinked file \"%s\"", rm_path);
259  }
260  }
261 
262  /* Cleanup is complete. */
263  FreeDir(dbspace_dir);
264  hash_destroy(hash);
265  }
266 
267  /*
268  * Initialization happens after cleanup is complete: we copy each init
269  * fork file to the corresponding main fork file. Note that if we are
270  * asked to do both cleanup and init, we may never get here: if the
271  * cleanup code determines that there are no init forks in this dbspace,
272  * it will return before we get to this point.
273  */
274  if ((op & UNLOGGED_RELATION_INIT) != 0)
275  {
276  /* Scan the directory. */
277  dbspace_dir = AllocateDir(dbspacedirname);
278  while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
279  {
280  ForkNumber forkNum;
281  int oidchars;
282  char oidbuf[OIDCHARS + 1];
283  char srcpath[MAXPGPATH * 2];
284  char dstpath[MAXPGPATH];
285 
286  /* Skip anything that doesn't look like a relation data file. */
287  if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
288  &forkNum))
289  continue;
290 
291  /* Also skip it unless this is the init fork. */
292  if (forkNum != INIT_FORKNUM)
293  continue;
294 
295  /* Construct source pathname. */
296  snprintf(srcpath, sizeof(srcpath), "%s/%s",
297  dbspacedirname, de->d_name);
298 
299  /* Construct destination pathname. */
300  memcpy(oidbuf, de->d_name, oidchars);
301  oidbuf[oidchars] = '\0';
302  snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
303  dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
304  strlen(forkNames[INIT_FORKNUM]));
305 
306  /* OK, we're ready to perform the actual copy. */
307  elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
308  copy_file(srcpath, dstpath);
309  }
310 
311  FreeDir(dbspace_dir);
312 
313  /*
314  * copy_file() above has already called pg_flush_data() on the files
315  * it created. Now we need to fsync those files, because a checkpoint
316  * won't do it for us while we're in recovery. We do this in a
317  * separate pass to allow the kernel to perform all the flushes
318  * (especially the metadata ones) at once.
319  */
320  dbspace_dir = AllocateDir(dbspacedirname);
321  while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
322  {
323  ForkNumber forkNum;
324  int oidchars;
325  char oidbuf[OIDCHARS + 1];
326  char mainpath[MAXPGPATH];
327 
328  /* Skip anything that doesn't look like a relation data file. */
329  if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
330  &forkNum))
331  continue;
332 
333  /* Also skip it unless this is the init fork. */
334  if (forkNum != INIT_FORKNUM)
335  continue;
336 
337  /* Construct main fork pathname. */
338  memcpy(oidbuf, de->d_name, oidchars);
339  oidbuf[oidchars] = '\0';
340  snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
341  dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
342  strlen(forkNames[INIT_FORKNUM]));
343 
344  fsync_fname(mainpath, false);
345  }
346 
347  FreeDir(dbspace_dir);
348 
349  /*
350  * Lastly, fsync the database directory itself, ensuring the
351  * filesystem remembers the file creations and deletions we've done.
352  * We don't bother with this during a call that does only
353  * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
354  * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
355  * too at the next startup attempt.
356  */
357  fsync_fname(dbspacedirname, true);
358  }
359 }
360 
361 /*
362  * Basic parsing of putative relation filenames.
363  *
364  * This function returns true if the file appears to be in the correct format
365  * for a non-temporary relation and false otherwise.
366  *
367  * NB: If this function returns true, the caller is entitled to assume that
368  * *oidchars has been set to the a value no more than OIDCHARS, and thus
369  * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID
370  * portion of the filename. This is critical to protect against a possible
371  * buffer overrun.
372  */
373 bool
374 parse_filename_for_nontemp_relation(const char *name, int *oidchars,
375  ForkNumber *fork)
376 {
377  int pos;
378 
379  /* Look for a non-empty string of digits (that isn't too long). */
380  for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
381  ;
382  if (pos == 0 || pos > OIDCHARS)
383  return false;
384  *oidchars = pos;
385 
386  /* Check for a fork name. */
387  if (name[pos] != '_')
388  *fork = MAIN_FORKNUM;
389  else
390  {
391  int forkchar;
392 
393  forkchar = forkname_chars(&name[pos + 1], fork);
394  if (forkchar <= 0)
395  return false;
396  pos += forkchar + 1;
397  }
398 
399  /* Check for a segment number. */
400  if (name[pos] == '.')
401  {
402  int segchar;
403 
404  for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
405  ;
406  if (segchar <= 1)
407  return false;
408  pos += segchar;
409  }
410 
411  /* Now we should be at the end. */
412  if (name[pos] != '\0')
413  return false;
414  return true;
415 }
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:814
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define DEBUG1
Definition: elog.h:25
#define HASH_ELEM
Definition: hsearch.h:87
int forkname_chars(const char *str, ForkNumber *fork)
Definition: relpath.c:78
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:575
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Size entrysize
Definition: hsearch.h:73
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1335
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:906
#define LOG
Definition: elog.h:26
void ResetUnloggedRelations(int op)
Definition: reinit.c:46
Definition: dirent.h:9
static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
Definition: reinit.c:149
Definition: reinit.c:31
Definition: dynahash.c:208
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
#define MAXPGPATH
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define DEBUG2
Definition: elog.h:24
#define TABLESPACE_VERSION_DIRECTORY
Definition: relpath.h:26
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:26
int errcode_for_file_access(void)
Definition: elog.c:598
void copy_file(char *fromfile, char *tofile)
Definition: copydir.c:127
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2600
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define ereport(elevel, rest)
Definition: elog.h:122
static char dstpath[MAXPGPATH]
Definition: file_ops.c:31
ForkNumber
Definition: relpath.h:40
#define AllocSetContextCreate(parent, name, allocparams)
Definition: memutils.h:170
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:316
Size keysize
Definition: hsearch.h:72
#define Assert(condition)
Definition: c.h:699
char oid[OIDCHARS+1]
Definition: reinit.c:33
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2666
bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ForkNumber *fork)
Definition: reinit.c:374
const char * name
Definition: encode.c:521
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:25
int errmsg(const char *fmt,...)
Definition: elog.c:797
static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
Definition: reinit.c:102
char d_name[MAX_PATH]
Definition: dirent.h:14
#define elog
Definition: elog.h:219
const char *const forkNames[]
Definition: relpath.c:33
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
#define OIDCHARS
Definition: relpath.h:30
int FreeDir(DIR *dir)
Definition: fd.c:2718