PostgreSQL Source Code  git master
libpq_fetch.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * libpq_fetch.c
4  * Functions for fetching files from a remote server.
5  *
6  * Copyright (c) 2013-2020, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <dirent.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 
17 #include "catalog/pg_type_d.h"
18 #include "common/connect.h"
19 #include "datapagemap.h"
20 #include "fetch.h"
21 #include "file_ops.h"
22 #include "filemap.h"
23 #include "pg_rewind.h"
24 #include "port/pg_bswap.h"
25 
26 PGconn *conn = NULL;
27 
28 /*
29  * Files are fetched max CHUNKSIZE bytes at a time.
30  *
31  * (This only applies to files that are copied in whole, or for truncated
32  * files where we copy the tail. Relation files, where we know the individual
33  * blocks that need to be fetched, are fetched in BLCKSZ chunks.)
34  */
35 #define CHUNKSIZE 1000000
36 
37 static void receiveFileChunks(const char *sql);
38 static void execute_pagemap(datapagemap_t *pagemap, const char *path);
39 static char *run_simple_query(const char *sql);
40 static void run_simple_command(const char *sql);
41 
42 void
43 libpqConnect(const char *connstr)
44 {
45  char *str;
46  PGresult *res;
47 
48  conn = PQconnectdb(connstr);
49  if (PQstatus(conn) == CONNECTION_BAD)
50  pg_fatal("could not connect to server: %s",
51  PQerrorMessage(conn));
52 
53  if (showprogress)
54  pg_log_info("connected to server");
55 
56  /* disable all types of timeouts */
57  run_simple_command("SET statement_timeout = 0");
58  run_simple_command("SET lock_timeout = 0");
59  run_simple_command("SET idle_in_transaction_session_timeout = 0");
60 
62  if (PQresultStatus(res) != PGRES_TUPLES_OK)
63  pg_fatal("could not clear search_path: %s",
65  PQclear(res);
66 
67  /*
68  * Check that the server is not in hot standby mode. There is no
69  * fundamental reason that couldn't be made to work, but it doesn't
70  * currently because we use a temporary table. Better to check for it
71  * explicitly than error out, for a better error message.
72  */
73  str = run_simple_query("SELECT pg_is_in_recovery()");
74  if (strcmp(str, "f") != 0)
75  pg_fatal("source server must not be in recovery mode");
76  pg_free(str);
77 
78  /*
79  * Also check that full_page_writes is enabled. We can get torn pages if
80  * a page is modified while we read it with pg_read_binary_file(), and we
81  * rely on full page images to fix them.
82  */
83  str = run_simple_query("SHOW full_page_writes");
84  if (strcmp(str, "on") != 0)
85  pg_fatal("full_page_writes must be enabled in the source server");
86  pg_free(str);
87 
88  /*
89  * Although we don't do any "real" updates, we do work with a temporary
90  * table. We don't care about synchronous commit for that. It doesn't
91  * otherwise matter much, but if the server is using synchronous
92  * replication, and replication isn't working for some reason, we don't
93  * want to get stuck, waiting for it to start working again.
94  */
95  run_simple_command("SET synchronous_commit = off");
96 }
97 
98 /*
99  * Runs a query that returns a single value.
100  * The result should be pg_free'd after use.
101  */
102 static char *
103 run_simple_query(const char *sql)
104 {
105  PGresult *res;
106  char *result;
107 
108  res = PQexec(conn, sql);
109 
110  if (PQresultStatus(res) != PGRES_TUPLES_OK)
111  pg_fatal("error running query (%s) on source server: %s",
112  sql, PQresultErrorMessage(res));
113 
114  /* sanity check the result set */
115  if (PQnfields(res) != 1 || PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
116  pg_fatal("unexpected result set from query");
117 
118  result = pg_strdup(PQgetvalue(res, 0, 0));
119 
120  PQclear(res);
121 
122  return result;
123 }
124 
125 /*
126  * Runs a command.
127  * In the event of a failure, exit immediately.
128  */
129 static void
130 run_simple_command(const char *sql)
131 {
132  PGresult *res;
133 
134  res = PQexec(conn, sql);
135 
136  if (PQresultStatus(res) != PGRES_COMMAND_OK)
137  pg_fatal("error running query (%s) in source server: %s",
138  sql, PQresultErrorMessage(res));
139 
140  PQclear(res);
141 }
142 
143 /*
144  * Calls pg_current_wal_insert_lsn() function
145  */
148 {
149  XLogRecPtr result;
150  uint32 hi;
151  uint32 lo;
152  char *val;
153 
154  val = run_simple_query("SELECT pg_current_wal_insert_lsn()");
155 
156  if (sscanf(val, "%X/%X", &hi, &lo) != 2)
157  pg_fatal("unrecognized result \"%s\" for current WAL insert location", val);
158 
159  result = ((uint64) hi) << 32 | lo;
160 
161  pg_free(val);
162 
163  return result;
164 }
165 
166 /*
167  * Get a list of all files in the data directory.
168  */
169 void
171 {
172  PGresult *res;
173  const char *sql;
174  int i;
175 
176  /*
177  * Create a recursive directory listing of the whole data directory.
178  *
179  * The WITH RECURSIVE part does most of the work. The second part gets the
180  * targets of the symlinks in pg_tblspc directory.
181  *
182  * XXX: There is no backend function to get a symbolic link's target in
183  * general, so if the admin has put any custom symbolic links in the data
184  * directory, they won't be copied correctly.
185  */
186  sql =
187  "WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
188  " SELECT '' AS path, filename, size, isdir FROM\n"
189  " (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
190  " pg_stat_file(fn.filename, true) AS this\n"
191  " UNION ALL\n"
192  " SELECT parent.path || parent.filename || '/' AS path,\n"
193  " fn, this.size, this.isdir\n"
194  " FROM files AS parent,\n"
195  " pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n"
196  " pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n"
197  " WHERE parent.isdir = 't'\n"
198  ")\n"
199  "SELECT path || filename, size, isdir,\n"
200  " pg_tablespace_location(pg_tablespace.oid) AS link_target\n"
201  "FROM files\n"
202  "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
203  " AND oid::text = files.filename\n";
204  res = PQexec(conn, sql);
205 
206  if (PQresultStatus(res) != PGRES_TUPLES_OK)
207  pg_fatal("could not fetch file list: %s",
208  PQresultErrorMessage(res));
209 
210  /* sanity check the result set */
211  if (PQnfields(res) != 4)
212  pg_fatal("unexpected result set while fetching file list");
213 
214  /* Read result to local variables */
215  for (i = 0; i < PQntuples(res); i++)
216  {
217  char *path;
218  int64 filesize;
219  bool isdir;
220  char *link_target;
222 
223  if (PQgetisnull(res, i, 1))
224  {
225  /*
226  * The file was removed from the server while the query was
227  * running. Ignore it.
228  */
229  continue;
230  }
231 
232  path = PQgetvalue(res, i, 0);
233  filesize = atol(PQgetvalue(res, i, 1));
234  isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
235  link_target = PQgetvalue(res, i, 3);
236 
237  if (link_target[0])
238  type = FILE_TYPE_SYMLINK;
239  else if (isdir)
240  type = FILE_TYPE_DIRECTORY;
241  else
242  type = FILE_TYPE_REGULAR;
243 
244  process_source_file(path, type, filesize, link_target);
245  }
246  PQclear(res);
247 }
248 
249 /*----
250  * Runs a query, which returns pieces of files from the remote source data
251  * directory, and overwrites the corresponding parts of target files with
252  * the received parts. The result set is expected to be of format:
253  *
254  * path text -- path in the data directory, e.g "base/1/123"
255  * begin int8 -- offset within the file
256  * chunk bytea -- file content
257  *----
258  */
259 static void
260 receiveFileChunks(const char *sql)
261 {
262  PGresult *res;
263 
264  if (PQsendQueryParams(conn, sql, 0, NULL, NULL, NULL, NULL, 1) != 1)
265  pg_fatal("could not send query: %s", PQerrorMessage(conn));
266 
267  pg_log_debug("getting file chunks");
268 
269  if (PQsetSingleRowMode(conn) != 1)
270  pg_fatal("could not set libpq connection to single row mode");
271 
272  while ((res = PQgetResult(conn)) != NULL)
273  {
274  char *filename;
275  int filenamelen;
276  int64 chunkoff;
277  int chunksize;
278  char *chunk;
279 
280  switch (PQresultStatus(res))
281  {
282  case PGRES_SINGLE_TUPLE:
283  break;
284 
285  case PGRES_TUPLES_OK:
286  PQclear(res);
287  continue; /* final zero-row result */
288 
289  default:
290  pg_fatal("unexpected result while fetching remote files: %s",
291  PQresultErrorMessage(res));
292  }
293 
294  /* sanity check the result set */
295  if (PQnfields(res) != 3 || PQntuples(res) != 1)
296  pg_fatal("unexpected result set size while fetching remote files");
297 
298  if (PQftype(res, 0) != TEXTOID ||
299  PQftype(res, 1) != INT8OID ||
300  PQftype(res, 2) != BYTEAOID)
301  {
302  pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u",
303  PQftype(res, 0), PQftype(res, 1), PQftype(res, 2));
304  }
305 
306  if (PQfformat(res, 0) != 1 &&
307  PQfformat(res, 1) != 1 &&
308  PQfformat(res, 2) != 1)
309  {
310  pg_fatal("unexpected result format while fetching remote files");
311  }
312 
313  if (PQgetisnull(res, 0, 0) ||
314  PQgetisnull(res, 0, 1))
315  {
316  pg_fatal("unexpected null values in result while fetching remote files");
317  }
318 
319  if (PQgetlength(res, 0, 1) != sizeof(int64))
320  pg_fatal("unexpected result length while fetching remote files");
321 
322  /* Read result set to local variables */
323  memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64));
324  chunkoff = pg_ntoh64(chunkoff);
325  chunksize = PQgetlength(res, 0, 2);
326 
327  filenamelen = PQgetlength(res, 0, 0);
328  filename = pg_malloc(filenamelen + 1);
329  memcpy(filename, PQgetvalue(res, 0, 0), filenamelen);
330  filename[filenamelen] = '\0';
331 
332  chunk = PQgetvalue(res, 0, 2);
333 
334  /*
335  * If a file has been deleted on the source, remove it on the target
336  * as well. Note that multiple unlink() calls may happen on the same
337  * file if multiple data chunks are associated with it, hence ignore
338  * unconditionally anything missing. If this file is not a relation
339  * data file, then it has been already truncated when creating the
340  * file chunk list at the previous execution of the filemap.
341  */
342  if (PQgetisnull(res, 0, 2))
343  {
344  pg_log_debug("received null value for chunk for file \"%s\", file has been deleted",
345  filename);
346  remove_target_file(filename, true);
347  pg_free(filename);
348  PQclear(res);
349  continue;
350  }
351 
352  pg_log_debug("received chunk for file \"%s\", offset %lld, size %d",
353  filename, (long long int) chunkoff, chunksize);
354 
355  open_target_file(filename, false);
356 
357  write_target_range(chunk, chunkoff, chunksize);
358 
359  pg_free(filename);
360 
361  PQclear(res);
362  }
363 }
364 
365 /*
366  * Receive a single file as a malloc'd buffer.
367  */
368 char *
369 libpqGetFile(const char *filename, size_t *filesize)
370 {
371  PGresult *res;
372  char *result;
373  int len;
374  const char *paramValues[1];
375 
376  paramValues[0] = filename;
377  res = PQexecParams(conn, "SELECT pg_read_binary_file($1)",
378  1, NULL, paramValues, NULL, NULL, 1);
379 
380  if (PQresultStatus(res) != PGRES_TUPLES_OK)
381  pg_fatal("could not fetch remote file \"%s\": %s",
382  filename, PQresultErrorMessage(res));
383 
384  /* sanity check the result set */
385  if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
386  pg_fatal("unexpected result set while fetching remote file \"%s\"",
387  filename);
388 
389  /* Read result to local variables */
390  len = PQgetlength(res, 0, 0);
391  result = pg_malloc(len + 1);
392  memcpy(result, PQgetvalue(res, 0, 0), len);
393  result[len] = '\0';
394 
395  PQclear(res);
396 
397  pg_log_debug("fetched file \"%s\", length %d", filename, len);
398 
399  if (filesize)
400  *filesize = len;
401  return result;
402 }
403 
404 /*
405  * Write a file range to a temporary table in the server.
406  *
407  * The range is sent to the server as a COPY formatted line, to be inserted
408  * into the 'fetchchunks' temporary table. It is used in receiveFileChunks()
409  * function to actually fetch the data.
410  */
411 static void
412 fetch_file_range(const char *path, uint64 begin, uint64 end)
413 {
414  char linebuf[MAXPGPATH + 23];
415 
416  /* Split the range into CHUNKSIZE chunks */
417  while (end - begin > 0)
418  {
419  unsigned int len;
420 
421  /* Fine as long as CHUNKSIZE is not bigger than UINT32_MAX */
422  if (end - begin > CHUNKSIZE)
423  len = CHUNKSIZE;
424  else
425  len = (unsigned int) (end - begin);
426 
427  snprintf(linebuf, sizeof(linebuf), "%s\t" UINT64_FORMAT "\t%u\n", path, begin, len);
428 
429  if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1)
430  pg_fatal("could not send COPY data: %s",
431  PQerrorMessage(conn));
432 
433  begin += len;
434  }
435 }
436 
437 /*
438  * Fetch all changed blocks from remote source data directory.
439  */
440 void
442 {
443  file_entry_t *entry;
444  const char *sql;
445  PGresult *res;
446  int i;
447 
448  /*
449  * First create a temporary table, and load it with the blocks that we
450  * need to fetch.
451  */
452  sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int8, len int4);";
453  run_simple_command(sql);
454 
455  sql = "COPY fetchchunks FROM STDIN";
456  res = PQexec(conn, sql);
457 
458  if (PQresultStatus(res) != PGRES_COPY_IN)
459  pg_fatal("could not send file list: %s",
460  PQresultErrorMessage(res));
461  PQclear(res);
462 
463  for (i = 0; i < map->narray; i++)
464  {
465  entry = map->array[i];
466 
467  /* If this is a relation file, copy the modified blocks */
468  execute_pagemap(&entry->pagemap, entry->path);
469 
470  switch (entry->action)
471  {
472  case FILE_ACTION_NONE:
473  /* nothing else to do */
474  break;
475 
476  case FILE_ACTION_COPY:
477  /* Truncate the old file out of the way, if any */
478  open_target_file(entry->path, true);
479  fetch_file_range(entry->path, 0, entry->newsize);
480  break;
481 
483  truncate_target_file(entry->path, entry->newsize);
484  break;
485 
487  fetch_file_range(entry->path, entry->oldsize, entry->newsize);
488  break;
489 
490  case FILE_ACTION_REMOVE:
491  remove_target(entry);
492  break;
493 
494  case FILE_ACTION_CREATE:
495  create_target(entry);
496  break;
497  }
498  }
499 
500  if (PQputCopyEnd(conn, NULL) != 1)
501  pg_fatal("could not send end-of-COPY: %s",
502  PQerrorMessage(conn));
503 
504  while ((res = PQgetResult(conn)) != NULL)
505  {
506  if (PQresultStatus(res) != PGRES_COMMAND_OK)
507  pg_fatal("unexpected result while sending file list: %s",
508  PQresultErrorMessage(res));
509  PQclear(res);
510  }
511 
512  /*
513  * We've now copied the list of file ranges that we need to fetch to the
514  * temporary table. Now, actually fetch all of those ranges.
515  */
516  sql =
517  "SELECT path, begin,\n"
518  " pg_read_binary_file(path, begin, len, true) AS chunk\n"
519  "FROM fetchchunks\n";
520 
521  receiveFileChunks(sql);
522 }
523 
524 static void
525 execute_pagemap(datapagemap_t *pagemap, const char *path)
526 {
528  BlockNumber blkno;
529  off_t offset;
530 
531  iter = datapagemap_iterate(pagemap);
532  while (datapagemap_next(iter, &blkno))
533  {
534  offset = blkno * BLCKSZ;
535 
536  fetch_file_range(path, offset, offset + BLCKSZ);
537  }
538  pg_free(iter);
539 }
int PQputCopyData(PGconn *conn, const char *buffer, int nbytes)
Definition: fe-exec.c:2317
int PQgetlength(const PGresult *res, int tup_num, int field_num)
Definition: fe-exec.c:3174
int PQnfields(const PGresult *res)
Definition: fe-exec.c:2777
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:6669
void libpqProcessFileList(void)
Definition: libpq_fetch.c:170
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:42
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:83
int PQsendQueryParams(PGconn *conn, const char *command, int nParams, const Oid *paramTypes, const char *const *paramValues, const int *paramLengths, const int *paramFormats, int resultFormat)
Definition: fe-exec.c:1285
char * PQgetvalue(const PGresult *res, int tup_num, int field_num)
Definition: fe-exec.c:3163
file_entry_t ** array
Definition: filemap.h:78
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
void remove_target_file(const char *path, bool missing_ok)
Definition: file_ops.c:172
size_t newsize
Definition: filemap.h:50
#define pg_fatal(...)
Definition: pg_rewind.h:41
static void execute_pagemap(datapagemap_t *pagemap, const char *path)
Definition: libpq_fetch.c:525
int PQputCopyEnd(PGconn *conn, const char *errormsg)
Definition: fe-exec.c:2384
uint32 BlockNumber
Definition: block.h:31
#define CHUNKSIZE
Definition: libpq_fetch.c:35
int PQntuples(const PGresult *res)
Definition: fe-exec.c:2769
datapagemap_t pagemap
Definition: filemap.h:53
ExecStatusType PQresultStatus(const PGresult *res)
Definition: fe-exec.c:2692
int narray
Definition: filemap.h:79
void truncate_target_file(const char *path, off_t newsize)
Definition: file_ops.c:191
static char * run_simple_query(const char *sql)
Definition: libpq_fetch.c:103
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
#define pg_log_debug(...)
Definition: logging.h:92
#define MAXPGPATH
Oid PQftype(const PGresult *res, int field_num)
Definition: fe-exec.c:3007
file_action_t action
Definition: filemap.h:46
static void receiveFileChunks(const char *sql)
Definition: libpq_fetch.c:260
int PQsetSingleRowMode(PGconn *conn)
Definition: fe-exec.c:1677
size_t oldsize
Definition: filemap.h:49
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
unsigned int uint32
Definition: c.h:375
PGconn * conn
Definition: libpq_fetch.c:26
void libpq_executeFileMap(filemap_t *map)
Definition: libpq_fetch.c:441
static bool showprogress
static void run_simple_command(const char *sql)
Definition: libpq_fetch.c:130
void libpqConnect(const char *connstr)
Definition: libpq_fetch.c:43
void PQclear(PGresult *res)
Definition: fe-exec.c:694
void remove_target(file_entry_t *entry)
Definition: file_ops.c:125
uint64 XLogRecPtr
Definition: xlogdefs.h:21
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
char * path
Definition: filemap.h:43
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define ALWAYS_SECURE_SEARCH_PATH_SQL
Definition: connect.h:25
void process_source_file(const char *path, file_type_t type, size_t newsize, const char *link_target)
Definition: filemap.c:157
static char * filename
Definition: pg_dumpall.c:91
char * PQresultErrorMessage(const PGresult *res)
Definition: fe-exec.c:2708
PGresult * PQexecParams(PGconn *conn, const char *command, int nParams, const Oid *paramTypes, const char *const *paramValues, const int *paramLengths, const int *paramFormats, int resultFormat)
Definition: fe-exec.c:1953
static void fetch_file_range(const char *path, uint64 begin, uint64 end)
Definition: libpq_fetch.c:412
int i
Definition: filemap.h:41
PGresult * PQexec(PGconn *conn, const char *query)
Definition: fe-exec.c:1939
void create_target(file_entry_t *entry)
Definition: file_ops.c:146
XLogRecPtr libpqGetCurrentXlogInsertLocation(void)
Definition: libpq_fetch.c:147
file_type_t
Definition: filemap.h:34
int PQgetisnull(const PGresult *res, int tup_num, int field_num)
Definition: fe-exec.c:3188
ConnStatusType PQstatus(const PGconn *conn)
Definition: fe-connect.c:6616
int PQfformat(const PGresult *res, int field_num)
Definition: fe-exec.c:2996
#define snprintf
Definition: port.h:215
char * libpqGetFile(const char *filename, size_t *filesize)
Definition: libpq_fetch.c:369
#define UINT64_FORMAT
Definition: c.h:418
#define pg_ntoh64(x)
Definition: pg_bswap.h:126
long val
Definition: informix.c:664
PGresult * PQgetResult(PGconn *conn)
Definition: fe-exec.c:1778
#define pg_log_info(...)
Definition: logging.h:88
PGconn * PQconnectdb(const char *conninfo)
Definition: fe-connect.c:703
static char * connstr
Definition: pg_dumpall.c:62