PostgreSQL Source Code  git master
basic_archive.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * basic_archive.c
4  *
5  * This file demonstrates a basic archive library implementation that is
6  * roughly equivalent to the following shell command:
7  *
8  * test ! -f /path/to/dest && cp /path/to/src /path/to/dest
9  *
10  * One notable difference between this module and the shell command above
11  * is that this module first copies the file to a temporary destination,
12  * syncs it to disk, and then durably moves it to the final destination.
13  *
14  * Another notable difference is that if /path/to/dest already exists
15  * but has contents identical to /path/to/src, archiving will succeed,
16  * whereas the command shown above would fail. This prevents problems if
17  * a file is successfully archived and then the system crashes before
18  * a durable record of the success has been made.
19  *
20  * Copyright (c) 2022, PostgreSQL Global Development Group
21  *
22  * IDENTIFICATION
23  * contrib/basic_archive/basic_archive.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 #include "postgres.h"
28 
29 #include <sys/stat.h>
30 #include <sys/time.h>
31 #include <unistd.h>
32 
33 #include "common/int.h"
34 #include "miscadmin.h"
35 #include "postmaster/pgarch.h"
36 #include "storage/copydir.h"
37 #include "storage/fd.h"
38 #include "utils/guc.h"
39 #include "utils/memutils.h"
40 
42 
43 static char *archive_directory = NULL;
45 
46 static bool basic_archive_configured(void);
47 static bool basic_archive_file(const char *file, const char *path);
48 static void basic_archive_file_internal(const char *file, const char *path);
49 static bool check_archive_directory(char **newval, void **extra, GucSource source);
50 static bool compare_files(const char *file1, const char *file2);
51 
52 /*
53  * _PG_init
54  *
55  * Defines the module's GUC.
56  */
57 void
58 _PG_init(void)
59 {
60  DefineCustomStringVariable("basic_archive.archive_directory",
61  gettext_noop("Archive file destination directory."),
62  NULL,
64  "",
65  PGC_SIGHUP,
66  0,
67  check_archive_directory, NULL, NULL);
68 
69  MarkGUCPrefixReserved("basic_archive");
70 
72  "basic_archive",
74 }
75 
76 /*
77  * _PG_archive_module_init
78  *
79  * Returns the module's archiving callbacks.
80  */
81 void
83 {
85 
88 }
89 
90 /*
91  * check_archive_directory
92  *
93  * Checks that the provided archive directory exists.
94  */
95 static bool
97 {
98  struct stat st;
99 
100  /*
101  * The default value is an empty string, so we have to accept that value.
102  * Our check_configured callback also checks for this and prevents
103  * archiving from proceeding if it is still empty.
104  */
105  if (*newval == NULL || *newval[0] == '\0')
106  return true;
107 
108  /*
109  * Make sure the file paths won't be too long. The docs indicate that the
110  * file names to be archived can be up to 64 characters long.
111  */
112  if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
113  {
114  GUC_check_errdetail("Archive directory too long.");
115  return false;
116  }
117 
118  /*
119  * Do a basic sanity check that the specified archive directory exists. It
120  * could be removed at some point in the future, so we still need to be
121  * prepared for it not to exist in the actual archiving logic.
122  */
123  if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
124  {
125  GUC_check_errdetail("Specified archive directory does not exist.");
126  return false;
127  }
128 
129  return true;
130 }
131 
132 /*
133  * basic_archive_configured
134  *
135  * Checks that archive_directory is not blank.
136  */
137 static bool
139 {
140  return archive_directory != NULL && archive_directory[0] != '\0';
141 }
142 
143 /*
144  * basic_archive_file
145  *
146  * Archives one file.
147  */
148 static bool
149 basic_archive_file(const char *file, const char *path)
150 {
151  sigjmp_buf local_sigjmp_buf;
152  MemoryContext oldcontext;
153 
154  /*
155  * We run basic_archive_file_internal() in our own memory context so that
156  * we can easily reset it during error recovery (thus avoiding memory
157  * leaks).
158  */
160 
161  /*
162  * Since the archiver operates at the bottom of the exception stack,
163  * ERRORs turn into FATALs and cause the archiver process to restart.
164  * However, using ereport(ERROR, ...) when there are problems is easy to
165  * code and maintain. Therefore, we create our own exception handler to
166  * catch ERRORs and return false instead of restarting the archiver
167  * whenever there is a failure.
168  */
169  if (sigsetjmp(local_sigjmp_buf, 1) != 0)
170  {
171  /* Since not using PG_TRY, must reset error stack by hand */
172  error_context_stack = NULL;
173 
174  /* Prevent interrupts while cleaning up */
175  HOLD_INTERRUPTS();
176 
177  /* Report the error and clear ErrorContext for next time */
178  EmitErrorReport();
179  FlushErrorState();
180 
181  /* Close any files left open by copy_file() or compare_files() */
183 
184  /* Reset our memory context and switch back to the original one */
185  MemoryContextSwitchTo(oldcontext);
187 
188  /* Remove our exception handler */
189  PG_exception_stack = NULL;
190 
191  /* Now we can allow interrupts again */
193 
194  /* Report failure so that the archiver retries this file */
195  return false;
196  }
197 
198  /* Enable our exception handler */
199  PG_exception_stack = &local_sigjmp_buf;
200 
201  /* Archive the file! */
202  basic_archive_file_internal(file, path);
203 
204  /* Remove our exception handler */
205  PG_exception_stack = NULL;
206 
207  /* Reset our memory context and switch back to the original one */
208  MemoryContextSwitchTo(oldcontext);
210 
211  return true;
212 }
213 
214 static void
215 basic_archive_file_internal(const char *file, const char *path)
216 {
217  char destination[MAXPGPATH];
218  char temp[MAXPGPATH + 256];
219  struct stat st;
220  struct timeval tv;
221  uint64 epoch; /* milliseconds */
222 
223  ereport(DEBUG3,
224  (errmsg("archiving \"%s\" via basic_archive", file)));
225 
226  snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
227 
228  /*
229  * First, check if the file has already been archived. If it already
230  * exists and has the same contents as the file we're trying to archive,
231  * we can return success (after ensuring the file is persisted to disk).
232  * This scenario is possible if the server crashed after archiving the
233  * file but before renaming its .ready file to .done.
234  *
235  * If the archive file already exists but has different contents,
236  * something might be wrong, so we just fail.
237  */
238  if (stat(destination, &st) == 0)
239  {
240  if (compare_files(path, destination))
241  {
242  ereport(DEBUG3,
243  (errmsg("archive file \"%s\" already exists with identical contents",
244  destination)));
245 
246  fsync_fname(destination, false);
248 
249  return;
250  }
251 
252  ereport(ERROR,
253  (errmsg("archive file \"%s\" already exists", destination)));
254  }
255  else if (errno != ENOENT)
256  ereport(ERROR,
258  errmsg("could not stat file \"%s\": %m", destination)));
259 
260  /*
261  * Pick a sufficiently unique name for the temporary file so that a
262  * collision is unlikely. This helps avoid problems in case a temporary
263  * file was left around after a crash or another server happens to be
264  * archiving to the same directory.
265  */
266  gettimeofday(&tv, NULL);
267  if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
268  pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
269  elog(ERROR, "could not generate temporary file name for archiving");
270 
271  snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
272  archive_directory, "archtemp", file, MyProcPid, epoch);
273 
274  /*
275  * Copy the file to its temporary destination. Note that this will fail
276  * if temp already exists.
277  */
278  copy_file(unconstify(char *, path), temp);
279 
280  /*
281  * Sync the temporary file to disk and move it to its final destination.
282  * Note that this will overwrite any existing file, but this is only
283  * possible if someone else created the file since the stat() above.
284  */
285  (void) durable_rename(temp, destination, ERROR);
286 
287  ereport(DEBUG1,
288  (errmsg("archived \"%s\" via basic_archive", file)));
289 }
290 
291 /*
292  * compare_files
293  *
294  * Returns whether the contents of the files are the same.
295  */
296 static bool
297 compare_files(const char *file1, const char *file2)
298 {
299 #define CMP_BUF_SIZE (4096)
300  char buf1[CMP_BUF_SIZE];
301  char buf2[CMP_BUF_SIZE];
302  int fd1;
303  int fd2;
304  bool ret = true;
305 
306  fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
307  if (fd1 < 0)
308  ereport(ERROR,
310  errmsg("could not open file \"%s\": %m", file1)));
311 
312  fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
313  if (fd2 < 0)
314  ereport(ERROR,
316  errmsg("could not open file \"%s\": %m", file2)));
317 
318  for (;;)
319  {
320  int nbytes = 0;
321  int buf1_len = 0;
322  int buf2_len = 0;
323 
324  while (buf1_len < CMP_BUF_SIZE)
325  {
326  nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
327  if (nbytes < 0)
328  ereport(ERROR,
330  errmsg("could not read file \"%s\": %m", file1)));
331  else if (nbytes == 0)
332  break;
333 
334  buf1_len += nbytes;
335  }
336 
337  while (buf2_len < CMP_BUF_SIZE)
338  {
339  nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
340  if (nbytes < 0)
341  ereport(ERROR,
343  errmsg("could not read file \"%s\": %m", file2)));
344  else if (nbytes == 0)
345  break;
346 
347  buf2_len += nbytes;
348  }
349 
350  if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
351  {
352  ret = false;
353  break;
354  }
355  else if (buf1_len == 0)
356  break;
357  }
358 
359  if (CloseTransientFile(fd1) != 0)
360  ereport(ERROR,
362  errmsg("could not close file \"%s\": %m", file1)));
363 
364  if (CloseTransientFile(fd2) != 0)
365  ereport(ERROR,
367  errmsg("could not close file \"%s\": %m", file2)));
368 
369  return ret;
370 }
static MemoryContext basic_archive_context
Definition: basic_archive.c:44
void _PG_archive_module_init(ArchiveModuleCallbacks *cb)
Definition: basic_archive.c:82
static char * archive_directory
Definition: basic_archive.c:43
void _PG_init(void)
Definition: basic_archive.c:58
PG_MODULE_MAGIC
Definition: basic_archive.c:41
static bool basic_archive_file(const char *file, const char *path)
#define CMP_BUF_SIZE
static bool basic_archive_configured(void)
static void basic_archive_file_internal(const char *file, const char *path)
static bool compare_files(const char *file1, const char *file2)
static bool check_archive_directory(char **newval, void **extra, GucSource source)
Definition: basic_archive.c:96
#define unconstify(underlying_type, expr)
Definition: c.h:1181
#define gettext_noop(x)
Definition: c.h:1135
#define AssertVariableIsOfType(varname, typename)
Definition: c.h:914
#define InvalidSubTransactionId
Definition: c.h:594
#define PG_BINARY
Definition: c.h:1209
#define UINT64_FORMAT
Definition: c.h:485
void copy_file(char *fromfile, char *tofile)
Definition: copydir.c:117
void EmitErrorReport(void)
Definition: elog.c:1546
int errcode_for_file_access(void)
Definition: elog.c:758
ErrorContextCallback * error_context_stack
Definition: elog.c:94
void FlushErrorState(void)
Definition: elog.c:1702
int errmsg(const char *fmt,...)
Definition: elog.c:946
sigjmp_buf * PG_exception_stack
Definition: elog.c:96
#define DEBUG3
Definition: elog.h:24
#define DEBUG1
Definition: elog.h:26
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:688
int CloseTransientFile(int fd)
Definition: fd.c:2609
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:662
void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid, SubTransactionId parentSubid)
Definition: fd.c:2932
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2433
int MyProcPid
Definition: globals.c:44
void DefineCustomStringVariable(const char *name, const char *short_desc, const char *long_desc, char **valueAddr, const char *bootValue, GucContext context, int flags, GucStringCheckHook check_hook, GucStringAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5026
#define newval
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5087
#define GUC_check_errdetail
Definition: guc.h:434
GucSource
Definition: guc.h:108
@ PGC_SIGHUP
Definition: guc.h:71
static bool pg_add_u64_overflow(uint64 a, uint64 b, uint64 *result)
Definition: int.h:376
static bool pg_mul_u64_overflow(uint64 a, uint64 b, uint64 *result)
Definition: int.h:410
#define read(a, b, c)
Definition: win32.h:13
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:303
MemoryContext TopMemoryContext
Definition: mcxt.c:130
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:134
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:132
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:135
#define MAXPGPATH
static rewind_source * source
Definition: pg_rewind.c:81
void(* ArchiveModuleInit)(ArchiveModuleCallbacks *cb)
Definition: pgarch.h:64
#define snprintf
Definition: port.h:238
ArchiveFileCB archive_file_cb
Definition: pgarch.h:56
ArchiveCheckConfiguredCB check_configured_cb
Definition: pgarch.h:55
unsigned short st_mode
Definition: win32_port.h:270
#define stat
Definition: win32_port.h:286
#define S_ISDIR(m)
Definition: win32_port.h:327
static const unsigned __int64 epoch
int gettimeofday(struct timeval *tp, void *tzp)