PostgreSQL Source Code  git master
basic_archive.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * basic_archive.c
4  *
5  * This file demonstrates a basic archive library implementation that is
6  * roughly equivalent to the following shell command:
7  *
8  * test ! -f /path/to/dest && cp /path/to/src /path/to/dest
9  *
10  * One notable difference between this module and the shell command above
11  * is that this module first copies the file to a temporary destination,
12  * syncs it to disk, and then durably moves it to the final destination.
13  *
14  * Another notable difference is that if /path/to/dest already exists
15  * but has contents identical to /path/to/src, archiving will succeed,
16  * whereas the command shown above would fail. This prevents problems if
17  * a file is successfully archived and then the system crashes before
18  * a durable record of the success has been made.
19  *
20  * Copyright (c) 2022-2024, PostgreSQL Global Development Group
21  *
22  * IDENTIFICATION
23  * contrib/basic_archive/basic_archive.c
24  *
25  *-------------------------------------------------------------------------
26  */
27 #include "postgres.h"
28 
29 #include <sys/stat.h>
30 #include <sys/time.h>
31 #include <unistd.h>
32 
33 #include "archive/archive_module.h"
34 #include "common/int.h"
35 #include "miscadmin.h"
36 #include "storage/copydir.h"
37 #include "storage/fd.h"
38 #include "utils/guc.h"
39 #include "utils/memutils.h"
40 
42 
43 static char *archive_directory = NULL;
44 
46 static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
47 static bool check_archive_directory(char **newval, void **extra, GucSource source);
48 static bool compare_files(const char *file1, const char *file2);
49 
51  .startup_cb = NULL,
52  .check_configured_cb = basic_archive_configured,
53  .archive_file_cb = basic_archive_file,
54  .shutdown_cb = NULL
55 };
56 
57 /*
58  * _PG_init
59  *
60  * Defines the module's GUC.
61  */
62 void
63 _PG_init(void)
64 {
65  DefineCustomStringVariable("basic_archive.archive_directory",
66  gettext_noop("Archive file destination directory."),
67  NULL,
69  "",
70  PGC_SIGHUP,
71  0,
72  check_archive_directory, NULL, NULL);
73 
74  MarkGUCPrefixReserved("basic_archive");
75 }
76 
77 /*
78  * _PG_archive_module_init
79  *
80  * Returns the module's archiving callbacks.
81  */
84 {
86 }
87 
88 /*
89  * check_archive_directory
90  *
91  * Checks that the provided archive directory exists.
92  */
93 static bool
95 {
96  struct stat st;
97 
98  /*
99  * The default value is an empty string, so we have to accept that value.
100  * Our check_configured callback also checks for this and prevents
101  * archiving from proceeding if it is still empty.
102  */
103  if (*newval == NULL || *newval[0] == '\0')
104  return true;
105 
106  /*
107  * Make sure the file paths won't be too long. The docs indicate that the
108  * file names to be archived can be up to 64 characters long.
109  */
110  if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
111  {
112  GUC_check_errdetail("Archive directory too long.");
113  return false;
114  }
115 
116  /*
117  * Do a basic sanity check that the specified archive directory exists. It
118  * could be removed at some point in the future, so we still need to be
119  * prepared for it not to exist in the actual archiving logic.
120  */
121  if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
122  {
123  GUC_check_errdetail("Specified archive directory does not exist.");
124  return false;
125  }
126 
127  return true;
128 }
129 
130 /*
131  * basic_archive_configured
132  *
133  * Checks that archive_directory is not blank.
134  */
135 static bool
137 {
138  if (archive_directory != NULL && archive_directory[0] != '\0')
139  return true;
140 
141  arch_module_check_errdetail("%s is not set.",
142  "basic_archive.archive_directory");
143  return false;
144 }
145 
146 /*
147  * basic_archive_file
148  *
149  * Archives one file.
150  */
151 static bool
152 basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
153 {
154  char destination[MAXPGPATH];
155  char temp[MAXPGPATH + 256];
156  struct stat st;
157  struct timeval tv;
158  uint64 epoch; /* milliseconds */
159 
160  ereport(DEBUG3,
161  (errmsg("archiving \"%s\" via basic_archive", file)));
162 
163  snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
164 
165  /*
166  * First, check if the file has already been archived. If it already
167  * exists and has the same contents as the file we're trying to archive,
168  * we can return success (after ensuring the file is persisted to disk).
169  * This scenario is possible if the server crashed after archiving the
170  * file but before renaming its .ready file to .done.
171  *
172  * If the archive file already exists but has different contents,
173  * something might be wrong, so we just fail.
174  */
175  if (stat(destination, &st) == 0)
176  {
177  if (compare_files(path, destination))
178  {
179  ereport(DEBUG3,
180  (errmsg("archive file \"%s\" already exists with identical contents",
181  destination)));
182 
183  fsync_fname(destination, false);
185 
186  return true;
187  }
188 
189  ereport(ERROR,
190  (errmsg("archive file \"%s\" already exists", destination)));
191  }
192  else if (errno != ENOENT)
193  ereport(ERROR,
195  errmsg("could not stat file \"%s\": %m", destination)));
196 
197  /*
198  * Pick a sufficiently unique name for the temporary file so that a
199  * collision is unlikely. This helps avoid problems in case a temporary
200  * file was left around after a crash or another server happens to be
201  * archiving to the same directory.
202  */
203  gettimeofday(&tv, NULL);
204  if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
205  pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
206  elog(ERROR, "could not generate temporary file name for archiving");
207 
208  snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
209  archive_directory, "archtemp", file, MyProcPid, epoch);
210 
211  /*
212  * Copy the file to its temporary destination. Note that this will fail
213  * if temp already exists.
214  */
215  copy_file(path, temp);
216 
217  /*
218  * Sync the temporary file to disk and move it to its final destination.
219  * Note that this will overwrite any existing file, but this is only
220  * possible if someone else created the file since the stat() above.
221  */
222  (void) durable_rename(temp, destination, ERROR);
223 
224  ereport(DEBUG1,
225  (errmsg("archived \"%s\" via basic_archive", file)));
226 
227  return true;
228 }
229 
230 /*
231  * compare_files
232  *
233  * Returns whether the contents of the files are the same.
234  */
235 static bool
236 compare_files(const char *file1, const char *file2)
237 {
238 #define CMP_BUF_SIZE (4096)
239  char buf1[CMP_BUF_SIZE];
240  char buf2[CMP_BUF_SIZE];
241  int fd1;
242  int fd2;
243  bool ret = true;
244 
245  fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
246  if (fd1 < 0)
247  ereport(ERROR,
249  errmsg("could not open file \"%s\": %m", file1)));
250 
251  fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
252  if (fd2 < 0)
253  ereport(ERROR,
255  errmsg("could not open file \"%s\": %m", file2)));
256 
257  for (;;)
258  {
259  int nbytes = 0;
260  int buf1_len = 0;
261  int buf2_len = 0;
262 
263  while (buf1_len < CMP_BUF_SIZE)
264  {
265  nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
266  if (nbytes < 0)
267  ereport(ERROR,
269  errmsg("could not read file \"%s\": %m", file1)));
270  else if (nbytes == 0)
271  break;
272 
273  buf1_len += nbytes;
274  }
275 
276  while (buf2_len < CMP_BUF_SIZE)
277  {
278  nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
279  if (nbytes < 0)
280  ereport(ERROR,
282  errmsg("could not read file \"%s\": %m", file2)));
283  else if (nbytes == 0)
284  break;
285 
286  buf2_len += nbytes;
287  }
288 
289  if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
290  {
291  ret = false;
292  break;
293  }
294  else if (buf1_len == 0)
295  break;
296  }
297 
298  if (CloseTransientFile(fd1) != 0)
299  ereport(ERROR,
301  errmsg("could not close file \"%s\": %m", file1)));
302 
303  if (CloseTransientFile(fd2) != 0)
304  ereport(ERROR,
306  errmsg("could not close file \"%s\": %m", file2)));
307 
308  return ret;
309 }
#define arch_module_check_errdetail
static char * archive_directory
Definition: basic_archive.c:43
void _PG_init(void)
Definition: basic_archive.c:63
PG_MODULE_MAGIC
Definition: basic_archive.c:41
static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
#define CMP_BUF_SIZE
const ArchiveModuleCallbacks * _PG_archive_module_init(void)
Definition: basic_archive.c:83
static const ArchiveModuleCallbacks basic_archive_callbacks
Definition: basic_archive.c:50
static bool compare_files(const char *file1, const char *file2)
static bool basic_archive_configured(ArchiveModuleState *state)
static bool check_archive_directory(char **newval, void **extra, GucSource source)
Definition: basic_archive.c:94
#define gettext_noop(x)
Definition: c.h:1196
#define PG_BINARY
Definition: c.h:1273
#define UINT64_FORMAT
Definition: c.h:549
void copy_file(const char *fromfile, const char *tofile)
Definition: copydir.c:117
int errcode_for_file_access(void)
Definition: elog.c:882
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define DEBUG3
Definition: elog.h:28
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int CloseTransientFile(int fd)
Definition: fd.c:2809
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
int MyProcPid
Definition: globals.c:45
void DefineCustomStringVariable(const char *name, const char *short_desc, const char *long_desc, char **valueAddr, const char *bootValue, GucContext context, int flags, GucStringCheckHook check_hook, GucStringAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5171
#define newval
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5232
#define GUC_check_errdetail
Definition: guc.h:448
GucSource
Definition: guc.h:108
@ PGC_SIGHUP
Definition: guc.h:71
static bool pg_add_u64_overflow(uint64 a, uint64 b, uint64 *result)
Definition: int.h:380
static bool pg_mul_u64_overflow(uint64 a, uint64 b, uint64 *result)
Definition: int.h:414
#define read(a, b, c)
Definition: win32.h:13
#define MAXPGPATH
static rewind_source * source
Definition: pg_rewind.c:89
#define snprintf
Definition: port.h:238
ArchiveStartupCB startup_cb
unsigned short st_mode
Definition: win32_port.h:268
Definition: regguts.h:323
#define stat
Definition: win32_port.h:284
#define S_ISDIR(m)
Definition: win32_port.h:325
static const unsigned __int64 epoch
int gettimeofday(struct timeval *tp, void *tzp)