PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
copy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * copy.c
4  * Implements the COPY utility command
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/commands/copy.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <ctype.h>
18 #include <unistd.h>
19 #include <sys/stat.h>
20 
21 #include "access/heapam.h"
22 #include "access/htup_details.h"
23 #include "access/sysattr.h"
24 #include "access/xact.h"
25 #include "access/xlog.h"
26 #include "catalog/pg_type.h"
27 #include "commands/copy.h"
28 #include "commands/defrem.h"
29 #include "commands/trigger.h"
30 #include "executor/executor.h"
31 #include "libpq/libpq.h"
32 #include "libpq/pqformat.h"
33 #include "mb/pg_wchar.h"
34 #include "miscadmin.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/planner.h"
37 #include "nodes/makefuncs.h"
38 #include "parser/parse_relation.h"
39 #include "port/pg_bswap.h"
40 #include "rewrite/rewriteHandler.h"
41 #include "storage/fd.h"
42 #include "tcop/tcopprot.h"
43 #include "utils/builtins.h"
44 #include "utils/lsyscache.h"
45 #include "utils/memutils.h"
46 #include "utils/portal.h"
47 #include "utils/rel.h"
48 #include "utils/rls.h"
49 #include "utils/snapmgr.h"
50 
51 
52 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
53 #define OCTVALUE(c) ((c) - '0')
54 
55 /*
56  * Represents the different source/dest cases we need to worry about at
57  * the bottom level
58  */
59 typedef enum CopyDest
60 {
61  COPY_FILE, /* to/from file (or a piped program) */
62  COPY_OLD_FE, /* to/from frontend (2.0 protocol) */
63  COPY_NEW_FE, /* to/from frontend (3.0 protocol) */
64  COPY_CALLBACK /* to/from callback function */
65 } CopyDest;
66 
67 /*
68  * Represents the end-of-line terminator type of the input
69  */
70 typedef enum EolType
71 {
76 } EolType;
77 
78 /*
79  * This struct contains all the state variables used throughout a COPY
80  * operation. For simplicity, we use the same struct for all variants of COPY,
81  * even though some fields are used in only some cases.
82  *
83  * Multi-byte encodings: all supported client-side encodings encode multi-byte
84  * characters by having the first byte's high bit set. Subsequent bytes of the
85  * character can have the high bit not set. When scanning data in such an
86  * encoding to look for a match to a single-byte (ie ASCII) character, we must
87  * use the full pg_encoding_mblen() machinery to skip over multibyte
88  * characters, else we might find a false match to a trailing byte. In
89  * supported server encodings, there is no possibility of a false match, and
90  * it's faster to make useless comparisons to trailing bytes than it is to
91  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
92  * when we have to do it the hard way.
93  */
94 typedef struct CopyStateData
95 {
96  /* low-level state data */
97  CopyDest copy_dest; /* type of copy source/destination */
98  FILE *copy_file; /* used if copy_dest == COPY_FILE */
99  StringInfo fe_msgbuf; /* used for all dests during COPY TO, only for
100  * dest == COPY_NEW_FE in COPY FROM */
101  bool fe_eof; /* true if detected end of copy data */
102  EolType eol_type; /* EOL type of input */
103  int file_encoding; /* file or remote side's character encoding */
104  bool need_transcoding; /* file encoding diff from server? */
105  bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
106 
107  /* parameters from the COPY command */
108  Relation rel; /* relation to copy to or from */
109  QueryDesc *queryDesc; /* executable query to copy from */
110  List *attnumlist; /* integer list of attnums to copy */
111  char *filename; /* filename, or NULL for STDIN/STDOUT */
112  bool is_program; /* is 'filename' a program to popen? */
113  copy_data_source_cb data_source_cb; /* function for reading data */
114  bool binary; /* binary format? */
115  bool oids; /* include OIDs? */
116  bool freeze; /* freeze rows on loading? */
117  bool csv_mode; /* Comma Separated Value format? */
118  bool header_line; /* CSV header line? */
119  char *null_print; /* NULL marker string (server encoding!) */
120  int null_print_len; /* length of same */
121  char *null_print_client; /* same converted to file encoding */
122  char *delim; /* column delimiter (must be 1 byte) */
123  char *quote; /* CSV quote char (must be 1 byte) */
124  char *escape; /* CSV escape char (must be 1 byte) */
125  List *force_quote; /* list of column names */
126  bool force_quote_all; /* FORCE_QUOTE *? */
127  bool *force_quote_flags; /* per-column CSV FQ flags */
128  List *force_notnull; /* list of column names */
129  bool *force_notnull_flags; /* per-column CSV FNN flags */
130  List *force_null; /* list of column names */
131  bool *force_null_flags; /* per-column CSV FN flags */
132  bool convert_selectively; /* do selective binary conversion? */
133  List *convert_select; /* list of column names (can be NIL) */
134  bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
135 
136  /* these are just for error messages, see CopyFromErrorCallback */
137  const char *cur_relname; /* table name for error messages */
138  int cur_lineno; /* line number for error messages */
139  const char *cur_attname; /* current att for error messages */
140  const char *cur_attval; /* current att value for error messages */
141 
142  /*
143  * Working state for COPY TO/FROM
144  */
145  MemoryContext copycontext; /* per-copy execution context */
146 
147  /*
148  * Working state for COPY TO
149  */
150  FmgrInfo *out_functions; /* lookup info for output functions */
151  MemoryContext rowcontext; /* per-row evaluation context */
152 
153  /*
154  * Working state for COPY FROM
155  */
160  FmgrInfo *in_functions; /* array of input functions for each attrs */
161  Oid *typioparams; /* array of element types for in_functions */
162  int *defmap; /* array of default att numbers */
163  ExprState **defexprs; /* array of default att expressions */
164  bool volatile_defexprs; /* is any of defexprs volatile? */
166 
168  int num_dispatch; /* Number of entries in the above array */
169  int num_partitions; /* Number of members in the following arrays */
170  ResultRelInfo **partitions; /* Per partition result relation pointers */
175 
176  /*
177  * These variables are used to reduce overhead in textual COPY FROM.
178  *
179  * attribute_buf holds the separated, de-escaped text for each field of
180  * the current line. The CopyReadAttributes functions return arrays of
181  * pointers into this buffer. We avoid palloc/pfree overhead by re-using
182  * the buffer on each cycle.
183  */
185 
186  /* field raw data pointers found by COPY FROM */
187 
189  char **raw_fields;
190 
191  /*
192  * Similarly, line_buf holds the whole input line being processed. The
193  * input cycle is first to read the whole line into line_buf, convert it
194  * to server encoding there, and then extract the individual attribute
195  * fields into attribute_buf. line_buf is preserved unmodified so that we
196  * can display it in error messages if appropriate.
197  */
199  bool line_buf_converted; /* converted to server encoding? */
200  bool line_buf_valid; /* contains the row being processed? */
201 
202  /*
203  * Finally, raw_buf holds raw data read from the data source (file or
204  * client connection). CopyReadLine parses this data sufficiently to
205  * locate line boundaries, then transfers the data to line_buf and
206  * converts it. Note: we guarantee that there is a \0 at
207  * raw_buf[raw_buf_len].
208  */
209 #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
210  char *raw_buf;
211  int raw_buf_index; /* next byte to process */
212  int raw_buf_len; /* total # of bytes stored */
213 } CopyStateData;
214 
215 /* DestReceiver for COPY (query) TO */
216 typedef struct
217 {
218  DestReceiver pub; /* publicly-known function pointers */
219  CopyState cstate; /* CopyStateData for the command */
220  uint64 processed; /* # of tuples processed */
221 } DR_copy;
222 
223 
224 /*
225  * These macros centralize code used to process line_buf and raw_buf buffers.
226  * They are macros because they often do continue/break control and to avoid
227  * function call overhead in tight COPY loops.
228  *
229  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
230  * prevent the continue/break processing from working. We end the "if (1)"
231  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
232  * any "else" in the calling code, and to avoid any compiler warnings about
233  * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
234  */
235 
236 /*
237  * This keeps the character read at the top of the loop in the buffer
238  * even if there is more than one read-ahead.
239  */
240 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
241 if (1) \
242 { \
243  if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
244  { \
245  raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
246  need_data = true; \
247  continue; \
248  } \
249 } else ((void) 0)
250 
251 /* This consumes the remainder of the buffer and breaks */
252 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
253 if (1) \
254 { \
255  if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
256  { \
257  if (extralen) \
258  raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
259  /* backslash just before EOF, treat as data char */ \
260  result = true; \
261  break; \
262  } \
263 } else ((void) 0)
264 
265 /*
266  * Transfer any approved data to line_buf; must do this to be sure
267  * there is some room in raw_buf.
268  */
269 #define REFILL_LINEBUF \
270 if (1) \
271 { \
272  if (raw_buf_ptr > cstate->raw_buf_index) \
273  { \
274  appendBinaryStringInfo(&cstate->line_buf, \
275  cstate->raw_buf + cstate->raw_buf_index, \
276  raw_buf_ptr - cstate->raw_buf_index); \
277  cstate->raw_buf_index = raw_buf_ptr; \
278  } \
279 } else ((void) 0)
280 
281 /* Undo any read-ahead and jump out of the block. */
282 #define NO_END_OF_COPY_GOTO \
283 if (1) \
284 { \
285  raw_buf_ptr = prev_raw_ptr + 1; \
286  goto not_end_of_copy; \
287 } else ((void) 0)
288 
289 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
290 
291 
292 /* non-export function prototypes */
293 static CopyState BeginCopy(ParseState *pstate, bool is_from, Relation rel,
294  RawStmt *raw_query, Oid queryRelId, List *attnamelist,
295  List *options);
296 static void EndCopy(CopyState cstate);
297 static void ClosePipeToProgram(CopyState cstate);
298 static CopyState BeginCopyTo(ParseState *pstate, Relation rel, RawStmt *query,
299  Oid queryRelId, const char *filename, bool is_program,
300  List *attnamelist, List *options);
301 static void EndCopyTo(CopyState cstate);
302 static uint64 DoCopyTo(CopyState cstate);
303 static uint64 CopyTo(CopyState cstate);
304 static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
305  Datum *values, bool *nulls);
306 static void CopyFromInsertBatch(CopyState cstate, EState *estate,
307  CommandId mycid, int hi_options,
308  ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
309  BulkInsertState bistate,
310  int nBufferedTuples, HeapTuple *bufferedTuples,
311  int firstBufferedLineNo);
312 static bool CopyReadLine(CopyState cstate);
313 static bool CopyReadLineText(CopyState cstate);
314 static int CopyReadAttributesText(CopyState cstate);
315 static int CopyReadAttributesCSV(CopyState cstate);
317  int column_no, FmgrInfo *flinfo,
318  Oid typioparam, int32 typmod,
319  bool *isnull);
320 static void CopyAttributeOutText(CopyState cstate, char *string);
321 static void CopyAttributeOutCSV(CopyState cstate, char *string,
322  bool use_quote, bool single_attr);
323 static List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
324  List *attnamelist);
325 static char *limit_printout_length(const char *str);
326 
327 /* Low-level communications functions */
328 static void SendCopyBegin(CopyState cstate);
329 static void ReceiveCopyBegin(CopyState cstate);
330 static void SendCopyEnd(CopyState cstate);
331 static void CopySendData(CopyState cstate, const void *databuf, int datasize);
332 static void CopySendString(CopyState cstate, const char *str);
333 static void CopySendChar(CopyState cstate, char c);
334 static void CopySendEndOfRow(CopyState cstate);
335 static int CopyGetData(CopyState cstate, void *databuf,
336  int minread, int maxread);
337 static void CopySendInt32(CopyState cstate, int32 val);
338 static bool CopyGetInt32(CopyState cstate, int32 *val);
339 static void CopySendInt16(CopyState cstate, int16 val);
340 static bool CopyGetInt16(CopyState cstate, int16 *val);
341 
342 
343 /*
344  * Send copy start/stop messages for frontend copies. These have changed
345  * in past protocol redesigns.
346  */
347 static void
349 {
351  {
352  /* new way */
354  int natts = list_length(cstate->attnumlist);
355  int16 format = (cstate->binary ? 1 : 0);
356  int i;
357 
358  pq_beginmessage(&buf, 'H');
359  pq_sendbyte(&buf, format); /* overall format */
360  pq_sendint16(&buf, natts);
361  for (i = 0; i < natts; i++)
362  pq_sendint16(&buf, format); /* per-column formats */
363  pq_endmessage(&buf);
364  cstate->copy_dest = COPY_NEW_FE;
365  }
366  else
367  {
368  /* old way */
369  if (cstate->binary)
370  ereport(ERROR,
371  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
372  errmsg("COPY BINARY is not supported to stdout or from stdin")));
373  pq_putemptymessage('H');
374  /* grottiness needed for old COPY OUT protocol */
375  pq_startcopyout();
376  cstate->copy_dest = COPY_OLD_FE;
377  }
378 }
379 
380 static void
382 {
384  {
385  /* new way */
387  int natts = list_length(cstate->attnumlist);
388  int16 format = (cstate->binary ? 1 : 0);
389  int i;
390 
391  pq_beginmessage(&buf, 'G');
392  pq_sendbyte(&buf, format); /* overall format */
393  pq_sendint16(&buf, natts);
394  for (i = 0; i < natts; i++)
395  pq_sendint16(&buf, format); /* per-column formats */
396  pq_endmessage(&buf);
397  cstate->copy_dest = COPY_NEW_FE;
398  cstate->fe_msgbuf = makeStringInfo();
399  }
400  else
401  {
402  /* old way */
403  if (cstate->binary)
404  ereport(ERROR,
405  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
406  errmsg("COPY BINARY is not supported to stdout or from stdin")));
407  pq_putemptymessage('G');
408  /* any error in old protocol will make us lose sync */
409  pq_startmsgread();
410  cstate->copy_dest = COPY_OLD_FE;
411  }
412  /* We *must* flush here to ensure FE knows it can send. */
413  pq_flush();
414 }
415 
416 static void
418 {
419  if (cstate->copy_dest == COPY_NEW_FE)
420  {
421  /* Shouldn't have any unsent data */
422  Assert(cstate->fe_msgbuf->len == 0);
423  /* Send Copy Done message */
424  pq_putemptymessage('c');
425  }
426  else
427  {
428  CopySendData(cstate, "\\.", 2);
429  /* Need to flush out the trailer (this also appends a newline) */
430  CopySendEndOfRow(cstate);
431  pq_endcopyout(false);
432  }
433 }
434 
435 /*----------
436  * CopySendData sends output data to the destination (file or frontend)
437  * CopySendString does the same for null-terminated strings
438  * CopySendChar does the same for single characters
439  * CopySendEndOfRow does the appropriate thing at end of each data row
440  * (data is not actually flushed except by CopySendEndOfRow)
441  *
442  * NB: no data conversion is applied by these functions
443  *----------
444  */
445 static void
446 CopySendData(CopyState cstate, const void *databuf, int datasize)
447 {
448  appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
449 }
450 
451 static void
452 CopySendString(CopyState cstate, const char *str)
453 {
454  appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
455 }
456 
457 static void
458 CopySendChar(CopyState cstate, char c)
459 {
461 }
462 
463 static void
465 {
466  StringInfo fe_msgbuf = cstate->fe_msgbuf;
467 
468  switch (cstate->copy_dest)
469  {
470  case COPY_FILE:
471  if (!cstate->binary)
472  {
473  /* Default line termination depends on platform */
474 #ifndef WIN32
475  CopySendChar(cstate, '\n');
476 #else
477  CopySendString(cstate, "\r\n");
478 #endif
479  }
480 
481  if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
482  cstate->copy_file) != 1 ||
483  ferror(cstate->copy_file))
484  {
485  if (cstate->is_program)
486  {
487  if (errno == EPIPE)
488  {
489  /*
490  * The pipe will be closed automatically on error at
491  * the end of transaction, but we might get a better
492  * error message from the subprocess' exit code than
493  * just "Broken Pipe"
494  */
495  ClosePipeToProgram(cstate);
496 
497  /*
498  * If ClosePipeToProgram() didn't throw an error, the
499  * program terminated normally, but closed the pipe
500  * first. Restore errno, and throw an error.
501  */
502  errno = EPIPE;
503  }
504  ereport(ERROR,
506  errmsg("could not write to COPY program: %m")));
507  }
508  else
509  ereport(ERROR,
511  errmsg("could not write to COPY file: %m")));
512  }
513  break;
514  case COPY_OLD_FE:
515  /* The FE/BE protocol uses \n as newline for all platforms */
516  if (!cstate->binary)
517  CopySendChar(cstate, '\n');
518 
519  if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
520  {
521  /* no hope of recovering connection sync, so FATAL */
522  ereport(FATAL,
523  (errcode(ERRCODE_CONNECTION_FAILURE),
524  errmsg("connection lost during COPY to stdout")));
525  }
526  break;
527  case COPY_NEW_FE:
528  /* The FE/BE protocol uses \n as newline for all platforms */
529  if (!cstate->binary)
530  CopySendChar(cstate, '\n');
531 
532  /* Dump the accumulated row as one CopyData message */
533  (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
534  break;
535  case COPY_CALLBACK:
536  Assert(false); /* Not yet supported. */
537  break;
538  }
539 
540  resetStringInfo(fe_msgbuf);
541 }
542 
543 /*
544  * CopyGetData reads data from the source (file or frontend)
545  *
546  * We attempt to read at least minread, and at most maxread, bytes from
547  * the source. The actual number of bytes read is returned; if this is
548  * less than minread, EOF was detected.
549  *
550  * Note: when copying from the frontend, we expect a proper EOF mark per
551  * protocol; if the frontend simply drops the connection, we raise error.
552  * It seems unwise to allow the COPY IN to complete normally in that case.
553  *
554  * NB: no data conversion is applied here.
555  */
556 static int
557 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
558 {
559  int bytesread = 0;
560 
561  switch (cstate->copy_dest)
562  {
563  case COPY_FILE:
564  bytesread = fread(databuf, 1, maxread, cstate->copy_file);
565  if (ferror(cstate->copy_file))
566  ereport(ERROR,
568  errmsg("could not read from COPY file: %m")));
569  break;
570  case COPY_OLD_FE:
571 
572  /*
573  * We cannot read more than minread bytes (which in practice is 1)
574  * because old protocol doesn't have any clear way of separating
575  * the COPY stream from following data. This is slow, but not any
576  * slower than the code path was originally, and we don't care
577  * much anymore about the performance of old protocol.
578  */
579  if (pq_getbytes((char *) databuf, minread))
580  {
581  /* Only a \. terminator is legal EOF in old protocol */
582  ereport(ERROR,
583  (errcode(ERRCODE_CONNECTION_FAILURE),
584  errmsg("unexpected EOF on client connection with an open transaction")));
585  }
586  bytesread = minread;
587  break;
588  case COPY_NEW_FE:
589  while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
590  {
591  int avail;
592 
593  while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
594  {
595  /* Try to receive another message */
596  int mtype;
597 
598  readmessage:
600  pq_startmsgread();
601  mtype = pq_getbyte();
602  if (mtype == EOF)
603  ereport(ERROR,
604  (errcode(ERRCODE_CONNECTION_FAILURE),
605  errmsg("unexpected EOF on client connection with an open transaction")));
606  if (pq_getmessage(cstate->fe_msgbuf, 0))
607  ereport(ERROR,
608  (errcode(ERRCODE_CONNECTION_FAILURE),
609  errmsg("unexpected EOF on client connection with an open transaction")));
611  switch (mtype)
612  {
613  case 'd': /* CopyData */
614  break;
615  case 'c': /* CopyDone */
616  /* COPY IN correctly terminated by frontend */
617  cstate->fe_eof = true;
618  return bytesread;
619  case 'f': /* CopyFail */
620  ereport(ERROR,
621  (errcode(ERRCODE_QUERY_CANCELED),
622  errmsg("COPY from stdin failed: %s",
623  pq_getmsgstring(cstate->fe_msgbuf))));
624  break;
625  case 'H': /* Flush */
626  case 'S': /* Sync */
627 
628  /*
629  * Ignore Flush/Sync for the convenience of client
630  * libraries (such as libpq) that may send those
631  * without noticing that the command they just
632  * sent was COPY.
633  */
634  goto readmessage;
635  default:
636  ereport(ERROR,
637  (errcode(ERRCODE_PROTOCOL_VIOLATION),
638  errmsg("unexpected message type 0x%02X during COPY from stdin",
639  mtype)));
640  break;
641  }
642  }
643  avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
644  if (avail > maxread)
645  avail = maxread;
646  pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
647  databuf = (void *) ((char *) databuf + avail);
648  maxread -= avail;
649  bytesread += avail;
650  }
651  break;
652  case COPY_CALLBACK:
653  bytesread = cstate->data_source_cb(databuf, minread, maxread);
654  break;
655  }
656 
657  return bytesread;
658 }
659 
660 
661 /*
662  * These functions do apply some data conversion
663  */
664 
665 /*
666  * CopySendInt32 sends an int32 in network byte order
667  */
668 static void
670 {
671  uint32 buf;
672 
673  buf = pg_hton32((uint32) val);
674  CopySendData(cstate, &buf, sizeof(buf));
675 }
676 
677 /*
678  * CopyGetInt32 reads an int32 that appears in network byte order
679  *
680  * Returns true if OK, false if EOF
681  */
682 static bool
684 {
685  uint32 buf;
686 
687  if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
688  {
689  *val = 0; /* suppress compiler warning */
690  return false;
691  }
692  *val = (int32) pg_ntoh32(buf);
693  return true;
694 }
695 
696 /*
697  * CopySendInt16 sends an int16 in network byte order
698  */
699 static void
701 {
702  uint16 buf;
703 
704  buf = pg_hton16((uint16) val);
705  CopySendData(cstate, &buf, sizeof(buf));
706 }
707 
708 /*
709  * CopyGetInt16 reads an int16 that appears in network byte order
710  */
711 static bool
713 {
714  uint16 buf;
715 
716  if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
717  {
718  *val = 0; /* suppress compiler warning */
719  return false;
720  }
721  *val = (int16) pg_ntoh16(buf);
722  return true;
723 }
724 
725 
726 /*
727  * CopyLoadRawBuf loads some more data into raw_buf
728  *
729  * Returns TRUE if able to obtain at least one more byte, else FALSE.
730  *
731  * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
732  * down to the start of the buffer and then we load more data after that.
733  * This case is used only when a frontend multibyte character crosses a
734  * bufferload boundary.
735  */
736 static bool
738 {
739  int nbytes;
740  int inbytes;
741 
742  if (cstate->raw_buf_index < cstate->raw_buf_len)
743  {
744  /* Copy down the unprocessed data */
745  nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
746  memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
747  nbytes);
748  }
749  else
750  nbytes = 0; /* no data need be saved */
751 
752  inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
753  1, RAW_BUF_SIZE - nbytes);
754  nbytes += inbytes;
755  cstate->raw_buf[nbytes] = '\0';
756  cstate->raw_buf_index = 0;
757  cstate->raw_buf_len = nbytes;
758  return (inbytes > 0);
759 }
760 
761 
762 /*
763  * DoCopy executes the SQL COPY statement
764  *
765  * Either unload or reload contents of table <relation>, depending on <from>.
766  * (<from> = TRUE means we are inserting into the table.) In the "TO" case
767  * we also support copying the output of an arbitrary SELECT, INSERT, UPDATE
768  * or DELETE query.
769  *
770  * If <pipe> is false, transfer is between the table and the file named
771  * <filename>. Otherwise, transfer is between the table and our regular
772  * input/output stream. The latter could be either stdin/stdout or a
773  * socket, depending on whether we're running under Postmaster control.
774  *
775  * Do not allow a Postgres user without superuser privilege to read from
776  * or write to a file.
777  *
778  * Do not allow the copy if user doesn't have proper permission to access
779  * the table or the specifically requested columns.
780  */
781 void
782 DoCopy(ParseState *pstate, const CopyStmt *stmt,
783  int stmt_location, int stmt_len,
784  uint64 *processed)
785 {
786  CopyState cstate;
787  bool is_from = stmt->is_from;
788  bool pipe = (stmt->filename == NULL);
789  Relation rel;
790  Oid relid;
791  RawStmt *query = NULL;
792 
793  /* Disallow COPY to/from file or program except to superusers. */
794  if (!pipe && !superuser())
795  {
796  if (stmt->is_program)
797  ereport(ERROR,
798  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
799  errmsg("must be superuser to COPY to or from an external program"),
800  errhint("Anyone can COPY to stdout or from stdin. "
801  "psql's \\copy command also works for anyone.")));
802  else
803  ereport(ERROR,
804  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
805  errmsg("must be superuser to COPY to or from a file"),
806  errhint("Anyone can COPY to stdout or from stdin. "
807  "psql's \\copy command also works for anyone.")));
808  }
809 
810  if (stmt->relation)
811  {
812  TupleDesc tupDesc;
813  List *attnums;
814  ListCell *cur;
815  RangeTblEntry *rte;
816 
817  Assert(!stmt->query);
818 
819  /* Open and lock the relation, using the appropriate lock type. */
820  rel = heap_openrv(stmt->relation,
821  (is_from ? RowExclusiveLock : AccessShareLock));
822 
823  relid = RelationGetRelid(rel);
824 
825  rte = addRangeTableEntryForRelation(pstate, rel, NULL, false, false);
826  rte->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT);
827 
828  tupDesc = RelationGetDescr(rel);
829  attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
830  foreach(cur, attnums)
831  {
832  int attno = lfirst_int(cur) -
834 
835  if (is_from)
836  rte->insertedCols = bms_add_member(rte->insertedCols, attno);
837  else
838  rte->selectedCols = bms_add_member(rte->selectedCols, attno);
839  }
840  ExecCheckRTPerms(pstate->p_rtable, true);
841 
842  /*
843  * Permission check for row security policies.
844  *
845  * check_enable_rls will ereport(ERROR) if the user has requested
846  * something invalid and will otherwise indicate if we should enable
847  * RLS (returns RLS_ENABLED) or not for this COPY statement.
848  *
849  * If the relation has a row security policy and we are to apply it
850  * then perform a "query" copy and allow the normal query processing
851  * to handle the policies.
852  *
853  * If RLS is not enabled for this, then just fall through to the
854  * normal non-filtering relation handling.
855  */
856  if (check_enable_rls(rte->relid, InvalidOid, false) == RLS_ENABLED)
857  {
859  ColumnRef *cr;
860  ResTarget *target;
861  RangeVar *from;
862  List *targetList = NIL;
863 
864  if (is_from)
865  ereport(ERROR,
866  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
867  errmsg("COPY FROM not supported with row-level security"),
868  errhint("Use INSERT statements instead.")));
869 
870  /*
871  * Build target list
872  *
873  * If no columns are specified in the attribute list of the COPY
874  * command, then the target list is 'all' columns. Therefore, '*'
875  * should be used as the target list for the resulting SELECT
876  * statement.
877  *
878  * In the case that columns are specified in the attribute list,
879  * create a ColumnRef and ResTarget for each column and add them
880  * to the target list for the resulting SELECT statement.
881  */
882  if (!stmt->attlist)
883  {
884  cr = makeNode(ColumnRef);
886  cr->location = -1;
887 
888  target = makeNode(ResTarget);
889  target->name = NULL;
890  target->indirection = NIL;
891  target->val = (Node *) cr;
892  target->location = -1;
893 
894  targetList = list_make1(target);
895  }
896  else
897  {
898  ListCell *lc;
899 
900  foreach(lc, stmt->attlist)
901  {
902  /*
903  * Build the ColumnRef for each column. The ColumnRef
904  * 'fields' property is a String 'Value' node (see
905  * nodes/value.h) that corresponds to the column name
906  * respectively.
907  */
908  cr = makeNode(ColumnRef);
909  cr->fields = list_make1(lfirst(lc));
910  cr->location = -1;
911 
912  /* Build the ResTarget and add the ColumnRef to it. */
913  target = makeNode(ResTarget);
914  target->name = NULL;
915  target->indirection = NIL;
916  target->val = (Node *) cr;
917  target->location = -1;
918 
919  /* Add each column to the SELECT statement's target list */
920  targetList = lappend(targetList, target);
921  }
922  }
923 
924  /*
925  * Build RangeVar for from clause, fully qualified based on the
926  * relation which we have opened and locked.
927  */
930  -1);
931 
932  /* Build query */
933  select = makeNode(SelectStmt);
934  select->targetList = targetList;
935  select->fromClause = list_make1(from);
936 
937  query = makeNode(RawStmt);
938  query->stmt = (Node *) select;
939  query->stmt_location = stmt_location;
940  query->stmt_len = stmt_len;
941 
942  /*
943  * Close the relation for now, but keep the lock on it to prevent
944  * changes between now and when we start the query-based COPY.
945  *
946  * We'll reopen it later as part of the query-based COPY.
947  */
948  heap_close(rel, NoLock);
949  rel = NULL;
950  }
951  }
952  else
953  {
954  Assert(stmt->query);
955 
956  query = makeNode(RawStmt);
957  query->stmt = stmt->query;
958  query->stmt_location = stmt_location;
959  query->stmt_len = stmt_len;
960 
961  relid = InvalidOid;
962  rel = NULL;
963  }
964 
965  if (is_from)
966  {
967  Assert(rel);
968 
969  /* check read-only transaction and parallel mode */
970  if (XactReadOnly && !rel->rd_islocaltemp)
971  PreventCommandIfReadOnly("COPY FROM");
972  PreventCommandIfParallelMode("COPY FROM");
973 
974  cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program,
975  NULL, stmt->attlist, stmt->options);
976  *processed = CopyFrom(cstate); /* copy from file to database */
977  EndCopyFrom(cstate);
978  }
979  else
980  {
981  cstate = BeginCopyTo(pstate, rel, query, relid,
982  stmt->filename, stmt->is_program,
983  stmt->attlist, stmt->options);
984  *processed = DoCopyTo(cstate); /* copy from database to file */
985  EndCopyTo(cstate);
986  }
987 
988  /*
989  * Close the relation. If reading, we can release the AccessShareLock we
990  * got; if writing, we should hold the lock until end of transaction to
991  * ensure that updates will be committed before lock is released.
992  */
993  if (rel != NULL)
994  heap_close(rel, (is_from ? NoLock : AccessShareLock));
995 }
996 
997 /*
998  * Process the statement option list for COPY.
999  *
1000  * Scan the options list (a list of DefElem) and transpose the information
1001  * into cstate, applying appropriate error checking.
1002  *
1003  * cstate is assumed to be filled with zeroes initially.
1004  *
1005  * This is exported so that external users of the COPY API can sanity-check
1006  * a list of options. In that usage, cstate should be passed as NULL
1007  * (since external users don't know sizeof(CopyStateData)) and the collected
1008  * data is just leaked until CurrentMemoryContext is reset.
1009  *
1010  * Note that additional checking, such as whether column names listed in FORCE
1011  * QUOTE actually exist, has to be applied later. This just checks for
1012  * self-consistency of the options list.
1013  */
1014 void
1016  CopyState cstate,
1017  bool is_from,
1018  List *options)
1019 {
1020  bool format_specified = false;
1021  ListCell *option;
1022 
1023  /* Support external use for option sanity checking */
1024  if (cstate == NULL)
1025  cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
1026 
1027  cstate->file_encoding = -1;
1028 
1029  /* Extract options from the statement node tree */
1030  foreach(option, options)
1031  {
1032  DefElem *defel = lfirst_node(DefElem, option);
1033 
1034  if (strcmp(defel->defname, "format") == 0)
1035  {
1036  char *fmt = defGetString(defel);
1037 
1038  if (format_specified)
1039  ereport(ERROR,
1040  (errcode(ERRCODE_SYNTAX_ERROR),
1041  errmsg("conflicting or redundant options"),
1042  parser_errposition(pstate, defel->location)));
1043  format_specified = true;
1044  if (strcmp(fmt, "text") == 0)
1045  /* default format */ ;
1046  else if (strcmp(fmt, "csv") == 0)
1047  cstate->csv_mode = true;
1048  else if (strcmp(fmt, "binary") == 0)
1049  cstate->binary = true;
1050  else
1051  ereport(ERROR,
1052  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1053  errmsg("COPY format \"%s\" not recognized", fmt),
1054  parser_errposition(pstate, defel->location)));
1055  }
1056  else if (strcmp(defel->defname, "oids") == 0)
1057  {
1058  if (cstate->oids)
1059  ereport(ERROR,
1060  (errcode(ERRCODE_SYNTAX_ERROR),
1061  errmsg("conflicting or redundant options"),
1062  parser_errposition(pstate, defel->location)));
1063  cstate->oids = defGetBoolean(defel);
1064  }
1065  else if (strcmp(defel->defname, "freeze") == 0)
1066  {
1067  if (cstate->freeze)
1068  ereport(ERROR,
1069  (errcode(ERRCODE_SYNTAX_ERROR),
1070  errmsg("conflicting or redundant options"),
1071  parser_errposition(pstate, defel->location)));
1072  cstate->freeze = defGetBoolean(defel);
1073  }
1074  else if (strcmp(defel->defname, "delimiter") == 0)
1075  {
1076  if (cstate->delim)
1077  ereport(ERROR,
1078  (errcode(ERRCODE_SYNTAX_ERROR),
1079  errmsg("conflicting or redundant options"),
1080  parser_errposition(pstate, defel->location)));
1081  cstate->delim = defGetString(defel);
1082  }
1083  else if (strcmp(defel->defname, "null") == 0)
1084  {
1085  if (cstate->null_print)
1086  ereport(ERROR,
1087  (errcode(ERRCODE_SYNTAX_ERROR),
1088  errmsg("conflicting or redundant options"),
1089  parser_errposition(pstate, defel->location)));
1090  cstate->null_print = defGetString(defel);
1091  }
1092  else if (strcmp(defel->defname, "header") == 0)
1093  {
1094  if (cstate->header_line)
1095  ereport(ERROR,
1096  (errcode(ERRCODE_SYNTAX_ERROR),
1097  errmsg("conflicting or redundant options"),
1098  parser_errposition(pstate, defel->location)));
1099  cstate->header_line = defGetBoolean(defel);
1100  }
1101  else if (strcmp(defel->defname, "quote") == 0)
1102  {
1103  if (cstate->quote)
1104  ereport(ERROR,
1105  (errcode(ERRCODE_SYNTAX_ERROR),
1106  errmsg("conflicting or redundant options"),
1107  parser_errposition(pstate, defel->location)));
1108  cstate->quote = defGetString(defel);
1109  }
1110  else if (strcmp(defel->defname, "escape") == 0)
1111  {
1112  if (cstate->escape)
1113  ereport(ERROR,
1114  (errcode(ERRCODE_SYNTAX_ERROR),
1115  errmsg("conflicting or redundant options"),
1116  parser_errposition(pstate, defel->location)));
1117  cstate->escape = defGetString(defel);
1118  }
1119  else if (strcmp(defel->defname, "force_quote") == 0)
1120  {
1121  if (cstate->force_quote || cstate->force_quote_all)
1122  ereport(ERROR,
1123  (errcode(ERRCODE_SYNTAX_ERROR),
1124  errmsg("conflicting or redundant options"),
1125  parser_errposition(pstate, defel->location)));
1126  if (defel->arg && IsA(defel->arg, A_Star))
1127  cstate->force_quote_all = true;
1128  else if (defel->arg && IsA(defel->arg, List))
1129  cstate->force_quote = castNode(List, defel->arg);
1130  else
1131  ereport(ERROR,
1132  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1133  errmsg("argument to option \"%s\" must be a list of column names",
1134  defel->defname),
1135  parser_errposition(pstate, defel->location)));
1136  }
1137  else if (strcmp(defel->defname, "force_not_null") == 0)
1138  {
1139  if (cstate->force_notnull)
1140  ereport(ERROR,
1141  (errcode(ERRCODE_SYNTAX_ERROR),
1142  errmsg("conflicting or redundant options"),
1143  parser_errposition(pstate, defel->location)));
1144  if (defel->arg && IsA(defel->arg, List))
1145  cstate->force_notnull = castNode(List, defel->arg);
1146  else
1147  ereport(ERROR,
1148  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1149  errmsg("argument to option \"%s\" must be a list of column names",
1150  defel->defname),
1151  parser_errposition(pstate, defel->location)));
1152  }
1153  else if (strcmp(defel->defname, "force_null") == 0)
1154  {
1155  if (cstate->force_null)
1156  ereport(ERROR,
1157  (errcode(ERRCODE_SYNTAX_ERROR),
1158  errmsg("conflicting or redundant options")));
1159  if (defel->arg && IsA(defel->arg, List))
1160  cstate->force_null = castNode(List, defel->arg);
1161  else
1162  ereport(ERROR,
1163  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1164  errmsg("argument to option \"%s\" must be a list of column names",
1165  defel->defname),
1166  parser_errposition(pstate, defel->location)));
1167  }
1168  else if (strcmp(defel->defname, "convert_selectively") == 0)
1169  {
1170  /*
1171  * Undocumented, not-accessible-from-SQL option: convert only the
1172  * named columns to binary form, storing the rest as NULLs. It's
1173  * allowed for the column list to be NIL.
1174  */
1175  if (cstate->convert_selectively)
1176  ereport(ERROR,
1177  (errcode(ERRCODE_SYNTAX_ERROR),
1178  errmsg("conflicting or redundant options"),
1179  parser_errposition(pstate, defel->location)));
1180  cstate->convert_selectively = true;
1181  if (defel->arg == NULL || IsA(defel->arg, List))
1182  cstate->convert_select = castNode(List, defel->arg);
1183  else
1184  ereport(ERROR,
1185  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1186  errmsg("argument to option \"%s\" must be a list of column names",
1187  defel->defname),
1188  parser_errposition(pstate, defel->location)));
1189  }
1190  else if (strcmp(defel->defname, "encoding") == 0)
1191  {
1192  if (cstate->file_encoding >= 0)
1193  ereport(ERROR,
1194  (errcode(ERRCODE_SYNTAX_ERROR),
1195  errmsg("conflicting or redundant options"),
1196  parser_errposition(pstate, defel->location)));
1198  if (cstate->file_encoding < 0)
1199  ereport(ERROR,
1200  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1201  errmsg("argument to option \"%s\" must be a valid encoding name",
1202  defel->defname),
1203  parser_errposition(pstate, defel->location)));
1204  }
1205  else
1206  ereport(ERROR,
1207  (errcode(ERRCODE_SYNTAX_ERROR),
1208  errmsg("option \"%s\" not recognized",
1209  defel->defname),
1210  parser_errposition(pstate, defel->location)));
1211  }
1212 
1213  /*
1214  * Check for incompatible options (must do these two before inserting
1215  * defaults)
1216  */
1217  if (cstate->binary && cstate->delim)
1218  ereport(ERROR,
1219  (errcode(ERRCODE_SYNTAX_ERROR),
1220  errmsg("cannot specify DELIMITER in BINARY mode")));
1221 
1222  if (cstate->binary && cstate->null_print)
1223  ereport(ERROR,
1224  (errcode(ERRCODE_SYNTAX_ERROR),
1225  errmsg("cannot specify NULL in BINARY mode")));
1226 
1227  /* Set defaults for omitted options */
1228  if (!cstate->delim)
1229  cstate->delim = cstate->csv_mode ? "," : "\t";
1230 
1231  if (!cstate->null_print)
1232  cstate->null_print = cstate->csv_mode ? "" : "\\N";
1233  cstate->null_print_len = strlen(cstate->null_print);
1234 
1235  if (cstate->csv_mode)
1236  {
1237  if (!cstate->quote)
1238  cstate->quote = "\"";
1239  if (!cstate->escape)
1240  cstate->escape = cstate->quote;
1241  }
1242 
1243  /* Only single-byte delimiter strings are supported. */
1244  if (strlen(cstate->delim) != 1)
1245  ereport(ERROR,
1246  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1247  errmsg("COPY delimiter must be a single one-byte character")));
1248 
1249  /* Disallow end-of-line characters */
1250  if (strchr(cstate->delim, '\r') != NULL ||
1251  strchr(cstate->delim, '\n') != NULL)
1252  ereport(ERROR,
1253  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1254  errmsg("COPY delimiter cannot be newline or carriage return")));
1255 
1256  if (strchr(cstate->null_print, '\r') != NULL ||
1257  strchr(cstate->null_print, '\n') != NULL)
1258  ereport(ERROR,
1259  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1260  errmsg("COPY null representation cannot use newline or carriage return")));
1261 
1262  /*
1263  * Disallow unsafe delimiter characters in non-CSV mode. We can't allow
1264  * backslash because it would be ambiguous. We can't allow the other
1265  * cases because data characters matching the delimiter must be
1266  * backslashed, and certain backslash combinations are interpreted
1267  * non-literally by COPY IN. Disallowing all lower case ASCII letters is
1268  * more than strictly necessary, but seems best for consistency and
1269  * future-proofing. Likewise we disallow all digits though only octal
1270  * digits are actually dangerous.
1271  */
1272  if (!cstate->csv_mode &&
1273  strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
1274  cstate->delim[0]) != NULL)
1275  ereport(ERROR,
1276  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1277  errmsg("COPY delimiter cannot be \"%s\"", cstate->delim)));
1278 
1279  /* Check header */
1280  if (!cstate->csv_mode && cstate->header_line)
1281  ereport(ERROR,
1282  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1283  errmsg("COPY HEADER available only in CSV mode")));
1284 
1285  /* Check quote */
1286  if (!cstate->csv_mode && cstate->quote != NULL)
1287  ereport(ERROR,
1288  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1289  errmsg("COPY quote available only in CSV mode")));
1290 
1291  if (cstate->csv_mode && strlen(cstate->quote) != 1)
1292  ereport(ERROR,
1293  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1294  errmsg("COPY quote must be a single one-byte character")));
1295 
1296  if (cstate->csv_mode && cstate->delim[0] == cstate->quote[0])
1297  ereport(ERROR,
1298  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1299  errmsg("COPY delimiter and quote must be different")));
1300 
1301  /* Check escape */
1302  if (!cstate->csv_mode && cstate->escape != NULL)
1303  ereport(ERROR,
1304  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1305  errmsg("COPY escape available only in CSV mode")));
1306 
1307  if (cstate->csv_mode && strlen(cstate->escape) != 1)
1308  ereport(ERROR,
1309  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1310  errmsg("COPY escape must be a single one-byte character")));
1311 
1312  /* Check force_quote */
1313  if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all))
1314  ereport(ERROR,
1315  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1316  errmsg("COPY force quote available only in CSV mode")));
1317  if ((cstate->force_quote || cstate->force_quote_all) && is_from)
1318  ereport(ERROR,
1319  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1320  errmsg("COPY force quote only available using COPY TO")));
1321 
1322  /* Check force_notnull */
1323  if (!cstate->csv_mode && cstate->force_notnull != NIL)
1324  ereport(ERROR,
1325  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1326  errmsg("COPY force not null available only in CSV mode")));
1327  if (cstate->force_notnull != NIL && !is_from)
1328  ereport(ERROR,
1329  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1330  errmsg("COPY force not null only available using COPY FROM")));
1331 
1332  /* Check force_null */
1333  if (!cstate->csv_mode && cstate->force_null != NIL)
1334  ereport(ERROR,
1335  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1336  errmsg("COPY force null available only in CSV mode")));
1337 
1338  if (cstate->force_null != NIL && !is_from)
1339  ereport(ERROR,
1340  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1341  errmsg("COPY force null only available using COPY FROM")));
1342 
1343  /* Don't allow the delimiter to appear in the null string. */
1344  if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
1345  ereport(ERROR,
1346  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1347  errmsg("COPY delimiter must not appear in the NULL specification")));
1348 
1349  /* Don't allow the CSV quote char to appear in the null string. */
1350  if (cstate->csv_mode &&
1351  strchr(cstate->null_print, cstate->quote[0]) != NULL)
1352  ereport(ERROR,
1353  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1354  errmsg("CSV quote character must not appear in the NULL specification")));
1355 }
1356 
1357 /*
1358  * Common setup routines used by BeginCopyFrom and BeginCopyTo.
1359  *
1360  * Iff <binary>, unload or reload in the binary format, as opposed to the
1361  * more wasteful but more robust and portable text format.
1362  *
1363  * Iff <oids>, unload or reload the format that includes OID information.
1364  * On input, we accept OIDs whether or not the table has an OID column,
1365  * but silently drop them if it does not. On output, we report an error
1366  * if the user asks for OIDs in a table that has none (not providing an
1367  * OID column might seem friendlier, but could seriously confuse programs).
1368  *
1369  * If in the text format, delimit columns with delimiter <delim> and print
1370  * NULL values as <null_print>.
1371  */
1372 static CopyState
1374  bool is_from,
1375  Relation rel,
1376  RawStmt *raw_query,
1377  Oid queryRelId,
1378  List *attnamelist,
1379  List *options)
1380 {
1381  CopyState cstate;
1382  TupleDesc tupDesc;
1383  int num_phys_attrs;
1384  MemoryContext oldcontext;
1385 
1386  /* Allocate workspace and zero all fields */
1387  cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
1388 
1389  /*
1390  * We allocate everything used by a cstate in a new memory context. This
1391  * avoids memory leaks during repeated use of COPY in a query.
1392  */
1394  "COPY",
1396 
1397  oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1398 
1399  /* Extract options from the statement node tree */
1400  ProcessCopyOptions(pstate, cstate, is_from, options);
1401 
1402  /* Process the source/target relation or query */
1403  if (rel)
1404  {
1405  Assert(!raw_query);
1406 
1407  cstate->rel = rel;
1408 
1409  tupDesc = RelationGetDescr(cstate->rel);
1410 
1411  /* Don't allow COPY w/ OIDs to or from a table without them */
1412  if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
1413  ereport(ERROR,
1414  (errcode(ERRCODE_UNDEFINED_COLUMN),
1415  errmsg("table \"%s\" does not have OIDs",
1416  RelationGetRelationName(cstate->rel))));
1417  }
1418  else
1419  {
1420  List *rewritten;
1421  Query *query;
1422  PlannedStmt *plan;
1423  DestReceiver *dest;
1424 
1425  Assert(!is_from);
1426  cstate->rel = NULL;
1427 
1428  /* Don't allow COPY w/ OIDs from a query */
1429  if (cstate->oids)
1430  ereport(ERROR,
1431  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1432  errmsg("COPY (query) WITH OIDS is not supported")));
1433 
1434  /*
1435  * Run parse analysis and rewrite. Note this also acquires sufficient
1436  * locks on the source table(s).
1437  *
1438  * Because the parser and planner tend to scribble on their input, we
1439  * make a preliminary copy of the source querytree. This prevents
1440  * problems in the case that the COPY is in a portal or plpgsql
1441  * function and is executed repeatedly. (See also the same hack in
1442  * DECLARE CURSOR and PREPARE.) XXX FIXME someday.
1443  */
1444  rewritten = pg_analyze_and_rewrite(copyObject(raw_query),
1445  pstate->p_sourcetext, NULL, 0,
1446  NULL);
1447 
1448  /* check that we got back something we can work with */
1449  if (rewritten == NIL)
1450  {
1451  ereport(ERROR,
1452  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1453  errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
1454  }
1455  else if (list_length(rewritten) > 1)
1456  {
1457  ListCell *lc;
1458 
1459  /* examine queries to determine which error message to issue */
1460  foreach(lc, rewritten)
1461  {
1462  Query *q = lfirst_node(Query, lc);
1463 
1465  ereport(ERROR,
1466  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1467  errmsg("conditional DO INSTEAD rules are not supported for COPY")));
1469  ereport(ERROR,
1470  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1471  errmsg("DO ALSO rules are not supported for the COPY")));
1472  }
1473 
1474  ereport(ERROR,
1475  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1476  errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
1477  }
1478 
1479  query = linitial_node(Query, rewritten);
1480 
1481  /* The grammar allows SELECT INTO, but we don't support that */
1482  if (query->utilityStmt != NULL &&
1484  ereport(ERROR,
1485  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1486  errmsg("COPY (SELECT INTO) is not supported")));
1487 
1488  Assert(query->utilityStmt == NULL);
1489 
1490  /*
1491  * Similarly the grammar doesn't enforce the presence of a RETURNING
1492  * clause, but this is required here.
1493  */
1494  if (query->commandType != CMD_SELECT &&
1495  query->returningList == NIL)
1496  {
1497  Assert(query->commandType == CMD_INSERT ||
1498  query->commandType == CMD_UPDATE ||
1499  query->commandType == CMD_DELETE);
1500 
1501  ereport(ERROR,
1502  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1503  errmsg("COPY query must have a RETURNING clause")));
1504  }
1505 
1506  /* plan the query */
1507  plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, NULL);
1508 
1509  /*
1510  * With row level security and a user using "COPY relation TO", we
1511  * have to convert the "COPY relation TO" to a query-based COPY (eg:
1512  * "COPY (SELECT * FROM relation) TO"), to allow the rewriter to add
1513  * in any RLS clauses.
1514  *
1515  * When this happens, we are passed in the relid of the originally
1516  * found relation (which we have locked). As the planner will look up
1517  * the relation again, we double-check here to make sure it found the
1518  * same one that we have locked.
1519  */
1520  if (queryRelId != InvalidOid)
1521  {
1522  /*
1523  * Note that with RLS involved there may be multiple relations,
1524  * and while the one we need is almost certainly first, we don't
1525  * make any guarantees of that in the planner, so check the whole
1526  * list and make sure we find the original relation.
1527  */
1528  if (!list_member_oid(plan->relationOids, queryRelId))
1529  ereport(ERROR,
1530  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1531  errmsg("relation referenced by COPY statement has changed")));
1532  }
1533 
1534  /*
1535  * Use a snapshot with an updated command ID to ensure this query sees
1536  * results of any previously executed queries.
1537  */
1540 
1541  /* Create dest receiver for COPY OUT */
1543  ((DR_copy *) dest)->cstate = cstate;
1544 
1545  /* Create a QueryDesc requesting no output */
1546  cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
1549  dest, NULL, NULL, 0);
1550 
1551  /*
1552  * Call ExecutorStart to prepare the plan for execution.
1553  *
1554  * ExecutorStart computes a result tupdesc for us
1555  */
1556  ExecutorStart(cstate->queryDesc, 0);
1557 
1558  tupDesc = cstate->queryDesc->tupDesc;
1559  }
1560 
1561  /* Generate or convert list of attributes to process */
1562  cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1563 
1564  num_phys_attrs = tupDesc->natts;
1565 
1566  /* Convert FORCE_QUOTE name list to per-column flags, check validity */
1567  cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1568  if (cstate->force_quote_all)
1569  {
1570  int i;
1571 
1572  for (i = 0; i < num_phys_attrs; i++)
1573  cstate->force_quote_flags[i] = true;
1574  }
1575  else if (cstate->force_quote)
1576  {
1577  List *attnums;
1578  ListCell *cur;
1579 
1580  attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote);
1581 
1582  foreach(cur, attnums)
1583  {
1584  int attnum = lfirst_int(cur);
1585  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1586 
1587  if (!list_member_int(cstate->attnumlist, attnum))
1588  ereport(ERROR,
1589  (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1590  errmsg("FORCE_QUOTE column \"%s\" not referenced by COPY",
1591  NameStr(attr->attname))));
1592  cstate->force_quote_flags[attnum - 1] = true;
1593  }
1594  }
1595 
1596  /* Convert FORCE_NOT_NULL name list to per-column flags, check validity */
1597  cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1598  if (cstate->force_notnull)
1599  {
1600  List *attnums;
1601  ListCell *cur;
1602 
1603  attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull);
1604 
1605  foreach(cur, attnums)
1606  {
1607  int attnum = lfirst_int(cur);
1608  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1609 
1610  if (!list_member_int(cstate->attnumlist, attnum))
1611  ereport(ERROR,
1612  (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1613  errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY",
1614  NameStr(attr->attname))));
1615  cstate->force_notnull_flags[attnum - 1] = true;
1616  }
1617  }
1618 
1619  /* Convert FORCE_NULL name list to per-column flags, check validity */
1620  cstate->force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1621  if (cstate->force_null)
1622  {
1623  List *attnums;
1624  ListCell *cur;
1625 
1626  attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_null);
1627 
1628  foreach(cur, attnums)
1629  {
1630  int attnum = lfirst_int(cur);
1631  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1632 
1633  if (!list_member_int(cstate->attnumlist, attnum))
1634  ereport(ERROR,
1635  (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1636  errmsg("FORCE_NULL column \"%s\" not referenced by COPY",
1637  NameStr(attr->attname))));
1638  cstate->force_null_flags[attnum - 1] = true;
1639  }
1640  }
1641 
1642  /* Convert convert_selectively name list to per-column flags */
1643  if (cstate->convert_selectively)
1644  {
1645  List *attnums;
1646  ListCell *cur;
1647 
1648  cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1649 
1650  attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
1651 
1652  foreach(cur, attnums)
1653  {
1654  int attnum = lfirst_int(cur);
1655  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1656 
1657  if (!list_member_int(cstate->attnumlist, attnum))
1658  ereport(ERROR,
1659  (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1660  errmsg_internal("selected column \"%s\" not referenced by COPY",
1661  NameStr(attr->attname))));
1662  cstate->convert_select_flags[attnum - 1] = true;
1663  }
1664  }
1665 
1666  /* Use client encoding when ENCODING option is not specified. */
1667  if (cstate->file_encoding < 0)
1669 
1670  /*
1671  * Set up encoding conversion info. Even if the file and server encodings
1672  * are the same, we must apply pg_any_to_server() to validate data in
1673  * multibyte encodings.
1674  */
1675  cstate->need_transcoding =
1676  (cstate->file_encoding != GetDatabaseEncoding() ||
1678  /* See Multibyte encoding comment above */
1680 
1681  cstate->copy_dest = COPY_FILE; /* default */
1682 
1683  MemoryContextSwitchTo(oldcontext);
1684 
1685  return cstate;
1686 }
1687 
1688 /*
1689  * Closes the pipe to an external program, checking the pclose() return code.
1690  */
1691 static void
1693 {
1694  int pclose_rc;
1695 
1696  Assert(cstate->is_program);
1697 
1698  pclose_rc = ClosePipeStream(cstate->copy_file);
1699  if (pclose_rc == -1)
1700  ereport(ERROR,
1702  errmsg("could not close pipe to external command: %m")));
1703  else if (pclose_rc != 0)
1704  ereport(ERROR,
1705  (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
1706  errmsg("program \"%s\" failed",
1707  cstate->filename),
1708  errdetail_internal("%s", wait_result_to_str(pclose_rc))));
1709 }
1710 
1711 /*
1712  * Release resources allocated in a cstate for COPY TO/FROM.
1713  */
1714 static void
1716 {
1717  if (cstate->is_program)
1718  {
1719  ClosePipeToProgram(cstate);
1720  }
1721  else
1722  {
1723  if (cstate->filename != NULL && FreeFile(cstate->copy_file))
1724  ereport(ERROR,
1726  errmsg("could not close file \"%s\": %m",
1727  cstate->filename)));
1728  }
1729 
1731  pfree(cstate);
1732 }
1733 
1734 /*
1735  * Setup CopyState to read tuples from a table or a query for COPY TO.
1736  */
1737 static CopyState
1739  Relation rel,
1740  RawStmt *query,
1741  Oid queryRelId,
1742  const char *filename,
1743  bool is_program,
1744  List *attnamelist,
1745  List *options)
1746 {
1747  CopyState cstate;
1748  bool pipe = (filename == NULL);
1749  MemoryContext oldcontext;
1750 
1751  if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
1752  {
1753  if (rel->rd_rel->relkind == RELKIND_VIEW)
1754  ereport(ERROR,
1755  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1756  errmsg("cannot copy from view \"%s\"",
1758  errhint("Try the COPY (SELECT ...) TO variant.")));
1759  else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
1760  ereport(ERROR,
1761  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1762  errmsg("cannot copy from materialized view \"%s\"",
1764  errhint("Try the COPY (SELECT ...) TO variant.")));
1765  else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1766  ereport(ERROR,
1767  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1768  errmsg("cannot copy from foreign table \"%s\"",
1770  errhint("Try the COPY (SELECT ...) TO variant.")));
1771  else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
1772  ereport(ERROR,
1773  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1774  errmsg("cannot copy from sequence \"%s\"",
1775  RelationGetRelationName(rel))));
1776  else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1777  ereport(ERROR,
1778  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1779  errmsg("cannot copy from partitioned table \"%s\"",
1781  errhint("Try the COPY (SELECT ...) TO variant.")));
1782  else
1783  ereport(ERROR,
1784  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1785  errmsg("cannot copy from non-table relation \"%s\"",
1786  RelationGetRelationName(rel))));
1787  }
1788 
1789  cstate = BeginCopy(pstate, false, rel, query, queryRelId, attnamelist,
1790  options);
1791  oldcontext = MemoryContextSwitchTo(cstate->copycontext);
1792 
1793  if (pipe)
1794  {
1795  Assert(!is_program); /* the grammar does not allow this */
1797  cstate->copy_file = stdout;
1798  }
1799  else
1800  {
1801  cstate->filename = pstrdup(filename);
1802  cstate->is_program = is_program;
1803 
1804  if (is_program)
1805  {
1806  cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
1807  if (cstate->copy_file == NULL)
1808  ereport(ERROR,
1810  errmsg("could not execute command \"%s\": %m",
1811  cstate->filename)));
1812  }
1813  else
1814  {
1815  mode_t oumask; /* Pre-existing umask value */
1816  struct stat st;
1817 
1818  /*
1819  * Prevent write to relative path ... too easy to shoot oneself in
1820  * the foot by overwriting a database file ...
1821  */
1822  if (!is_absolute_path(filename))
1823  ereport(ERROR,
1824  (errcode(ERRCODE_INVALID_NAME),
1825  errmsg("relative path not allowed for COPY to file")));
1826 
1827  oumask = umask(S_IWGRP | S_IWOTH);
1828  PG_TRY();
1829  {
1830  cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1831  }
1832  PG_CATCH();
1833  {
1834  umask(oumask);
1835  PG_RE_THROW();
1836  }
1837  PG_END_TRY();
1838  umask(oumask);
1839  if (cstate->copy_file == NULL)
1840  {
1841  /* copy errno because ereport subfunctions might change it */
1842  int save_errno = errno;
1843 
1844  ereport(ERROR,
1846  errmsg("could not open file \"%s\" for writing: %m",
1847  cstate->filename),
1848  (save_errno == ENOENT || save_errno == EACCES) ?
1849  errhint("COPY TO instructs the PostgreSQL server process to write a file. "
1850  "You may want a client-side facility such as psql's \\copy.") : 0));
1851  }
1852 
1853  if (fstat(fileno(cstate->copy_file), &st))
1854  ereport(ERROR,
1856  errmsg("could not stat file \"%s\": %m",
1857  cstate->filename)));
1858 
1859  if (S_ISDIR(st.st_mode))
1860  ereport(ERROR,
1861  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1862  errmsg("\"%s\" is a directory", cstate->filename)));
1863  }
1864  }
1865 
1866  MemoryContextSwitchTo(oldcontext);
1867 
1868  return cstate;
1869 }
1870 
1871 /*
1872  * This intermediate routine exists mainly to localize the effects of setjmp
1873  * so we don't need to plaster a lot of variables with "volatile".
1874  */
1875 static uint64
1877 {
1878  bool pipe = (cstate->filename == NULL);
1879  bool fe_copy = (pipe && whereToSendOutput == DestRemote);
1880  uint64 processed;
1881 
1882  PG_TRY();
1883  {
1884  if (fe_copy)
1885  SendCopyBegin(cstate);
1886 
1887  processed = CopyTo(cstate);
1888 
1889  if (fe_copy)
1890  SendCopyEnd(cstate);
1891  }
1892  PG_CATCH();
1893  {
1894  /*
1895  * Make sure we turn off old-style COPY OUT mode upon error. It is
1896  * okay to do this in all cases, since it does nothing if the mode is
1897  * not on.
1898  */
1899  pq_endcopyout(true);
1900  PG_RE_THROW();
1901  }
1902  PG_END_TRY();
1903 
1904  return processed;
1905 }
1906 
1907 /*
1908  * Clean up storage and release resources for COPY TO.
1909  */
1910 static void
1912 {
1913  if (cstate->queryDesc != NULL)
1914  {
1915  /* Close down the query and free resources. */
1916  ExecutorFinish(cstate->queryDesc);
1917  ExecutorEnd(cstate->queryDesc);
1918  FreeQueryDesc(cstate->queryDesc);
1920  }
1921 
1922  /* Clean up storage */
1923  EndCopy(cstate);
1924 }
1925 
1926 /*
1927  * Copy from relation or query TO file.
1928  */
1929 static uint64
1931 {
1932  TupleDesc tupDesc;
1933  int num_phys_attrs;
1934  ListCell *cur;
1935  uint64 processed;
1936 
1937  if (cstate->rel)
1938  tupDesc = RelationGetDescr(cstate->rel);
1939  else
1940  tupDesc = cstate->queryDesc->tupDesc;
1941  num_phys_attrs = tupDesc->natts;
1942  cstate->null_print_client = cstate->null_print; /* default */
1943 
1944  /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1945  cstate->fe_msgbuf = makeStringInfo();
1946 
1947  /* Get info about the columns we need to process. */
1948  cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1949  foreach(cur, cstate->attnumlist)
1950  {
1951  int attnum = lfirst_int(cur);
1952  Oid out_func_oid;
1953  bool isvarlena;
1954  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1955 
1956  if (cstate->binary)
1957  getTypeBinaryOutputInfo(attr->atttypid,
1958  &out_func_oid,
1959  &isvarlena);
1960  else
1961  getTypeOutputInfo(attr->atttypid,
1962  &out_func_oid,
1963  &isvarlena);
1964  fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
1965  }
1966 
1967  /*
1968  * Create a temporary memory context that we can reset once per row to
1969  * recover palloc'd memory. This avoids any problems with leaks inside
1970  * datatype output routines, and should be faster than retail pfree's
1971  * anyway. (We don't need a whole econtext as CopyFrom does.)
1972  */
1974  "COPY TO",
1976 
1977  if (cstate->binary)
1978  {
1979  /* Generate header for a binary copy */
1980  int32 tmp;
1981 
1982  /* Signature */
1983  CopySendData(cstate, BinarySignature, 11);
1984  /* Flags field */
1985  tmp = 0;
1986  if (cstate->oids)
1987  tmp |= (1 << 16);
1988  CopySendInt32(cstate, tmp);
1989  /* No header extension */
1990  tmp = 0;
1991  CopySendInt32(cstate, tmp);
1992  }
1993  else
1994  {
1995  /*
1996  * For non-binary copy, we need to convert null_print to file
1997  * encoding, because it will be sent directly with CopySendString.
1998  */
1999  if (cstate->need_transcoding)
2000  cstate->null_print_client = pg_server_to_any(cstate->null_print,
2001  cstate->null_print_len,
2002  cstate->file_encoding);
2003 
2004  /* if a header has been requested send the line */
2005  if (cstate->header_line)
2006  {
2007  bool hdr_delim = false;
2008 
2009  foreach(cur, cstate->attnumlist)
2010  {
2011  int attnum = lfirst_int(cur);
2012  char *colname;
2013 
2014  if (hdr_delim)
2015  CopySendChar(cstate, cstate->delim[0]);
2016  hdr_delim = true;
2017 
2018  colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
2019 
2020  CopyAttributeOutCSV(cstate, colname, false,
2021  list_length(cstate->attnumlist) == 1);
2022  }
2023 
2024  CopySendEndOfRow(cstate);
2025  }
2026  }
2027 
2028  if (cstate->rel)
2029  {
2030  Datum *values;
2031  bool *nulls;
2032  HeapScanDesc scandesc;
2033  HeapTuple tuple;
2034 
2035  values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
2036  nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
2037 
2038  scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
2039 
2040  processed = 0;
2041  while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
2042  {
2044 
2045  /* Deconstruct the tuple ... faster than repeated heap_getattr */
2046  heap_deform_tuple(tuple, tupDesc, values, nulls);
2047 
2048  /* Format and send the data */
2049  CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
2050  processed++;
2051  }
2052 
2053  heap_endscan(scandesc);
2054 
2055  pfree(values);
2056  pfree(nulls);
2057  }
2058  else
2059  {
2060  /* run the plan --- the dest receiver will send tuples */
2061  ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L, true);
2062  processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
2063  }
2064 
2065  if (cstate->binary)
2066  {
2067  /* Generate trailer for a binary copy */
2068  CopySendInt16(cstate, -1);
2069  /* Need to flush out the trailer */
2070  CopySendEndOfRow(cstate);
2071  }
2072 
2074 
2075  return processed;
2076 }
2077 
2078 /*
2079  * Emit one row during CopyTo().
2080  */
2081 static void
2082 CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls)
2083 {
2084  bool need_delim = false;
2085  FmgrInfo *out_functions = cstate->out_functions;
2086  MemoryContext oldcontext;
2087  ListCell *cur;
2088  char *string;
2089 
2090  MemoryContextReset(cstate->rowcontext);
2091  oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
2092 
2093  if (cstate->binary)
2094  {
2095  /* Binary per-tuple header */
2096  CopySendInt16(cstate, list_length(cstate->attnumlist));
2097  /* Send OID if wanted --- note attnumlist doesn't include it */
2098  if (cstate->oids)
2099  {
2100  /* Hack --- assume Oid is same size as int32 */
2101  CopySendInt32(cstate, sizeof(int32));
2102  CopySendInt32(cstate, tupleOid);
2103  }
2104  }
2105  else
2106  {
2107  /* Text format has no per-tuple header, but send OID if wanted */
2108  /* Assume digits don't need any quoting or encoding conversion */
2109  if (cstate->oids)
2110  {
2112  ObjectIdGetDatum(tupleOid)));
2113  CopySendString(cstate, string);
2114  need_delim = true;
2115  }
2116  }
2117 
2118  foreach(cur, cstate->attnumlist)
2119  {
2120  int attnum = lfirst_int(cur);
2121  Datum value = values[attnum - 1];
2122  bool isnull = nulls[attnum - 1];
2123 
2124  if (!cstate->binary)
2125  {
2126  if (need_delim)
2127  CopySendChar(cstate, cstate->delim[0]);
2128  need_delim = true;
2129  }
2130 
2131  if (isnull)
2132  {
2133  if (!cstate->binary)
2134  CopySendString(cstate, cstate->null_print_client);
2135  else
2136  CopySendInt32(cstate, -1);
2137  }
2138  else
2139  {
2140  if (!cstate->binary)
2141  {
2142  string = OutputFunctionCall(&out_functions[attnum - 1],
2143  value);
2144  if (cstate->csv_mode)
2145  CopyAttributeOutCSV(cstate, string,
2146  cstate->force_quote_flags[attnum - 1],
2147  list_length(cstate->attnumlist) == 1);
2148  else
2149  CopyAttributeOutText(cstate, string);
2150  }
2151  else
2152  {
2153  bytea *outputbytes;
2154 
2155  outputbytes = SendFunctionCall(&out_functions[attnum - 1],
2156  value);
2157  CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
2158  CopySendData(cstate, VARDATA(outputbytes),
2159  VARSIZE(outputbytes) - VARHDRSZ);
2160  }
2161  }
2162  }
2163 
2164  CopySendEndOfRow(cstate);
2165 
2166  MemoryContextSwitchTo(oldcontext);
2167 }
2168 
2169 
2170 /*
2171  * error context callback for COPY FROM
2172  *
2173  * The argument for the error context must be CopyState.
2174  */
2175 void
2177 {
2178  CopyState cstate = (CopyState) arg;
2179 
2180  if (cstate->binary)
2181  {
2182  /* can't usefully display the data */
2183  if (cstate->cur_attname)
2184  errcontext("COPY %s, line %d, column %s",
2185  cstate->cur_relname, cstate->cur_lineno,
2186  cstate->cur_attname);
2187  else
2188  errcontext("COPY %s, line %d",
2189  cstate->cur_relname, cstate->cur_lineno);
2190  }
2191  else
2192  {
2193  if (cstate->cur_attname && cstate->cur_attval)
2194  {
2195  /* error is relevant to a particular column */
2196  char *attval;
2197 
2198  attval = limit_printout_length(cstate->cur_attval);
2199  errcontext("COPY %s, line %d, column %s: \"%s\"",
2200  cstate->cur_relname, cstate->cur_lineno,
2201  cstate->cur_attname, attval);
2202  pfree(attval);
2203  }
2204  else if (cstate->cur_attname)
2205  {
2206  /* error is relevant to a particular column, value is NULL */
2207  errcontext("COPY %s, line %d, column %s: null input",
2208  cstate->cur_relname, cstate->cur_lineno,
2209  cstate->cur_attname);
2210  }
2211  else
2212  {
2213  /*
2214  * Error is relevant to a particular line.
2215  *
2216  * If line_buf still contains the correct line, and it's already
2217  * transcoded, print it. If it's still in a foreign encoding, it's
2218  * quite likely that the error is precisely a failure to do
2219  * encoding conversion (ie, bad data). We dare not try to convert
2220  * it, and at present there's no way to regurgitate it without
2221  * conversion. So we have to punt and just report the line number.
2222  */
2223  if (cstate->line_buf_valid &&
2224  (cstate->line_buf_converted || !cstate->need_transcoding))
2225  {
2226  char *lineval;
2227 
2228  lineval = limit_printout_length(cstate->line_buf.data);
2229  errcontext("COPY %s, line %d: \"%s\"",
2230  cstate->cur_relname, cstate->cur_lineno, lineval);
2231  pfree(lineval);
2232  }
2233  else
2234  {
2235  errcontext("COPY %s, line %d",
2236  cstate->cur_relname, cstate->cur_lineno);
2237  }
2238  }
2239  }
2240 }
2241 
2242 /*
2243  * Make sure we don't print an unreasonable amount of COPY data in a message.
2244  *
2245  * It would seem a lot easier to just use the sprintf "precision" limit to
2246  * truncate the string. However, some versions of glibc have a bug/misfeature
2247  * that vsnprintf will always fail (return -1) if it is asked to truncate
2248  * a string that contains invalid byte sequences for the current encoding.
2249  * So, do our own truncation. We return a pstrdup'd copy of the input.
2250  */
2251 static char *
2252 limit_printout_length(const char *str)
2253 {
2254 #define MAX_COPY_DATA_DISPLAY 100
2255 
2256  int slen = strlen(str);
2257  int len;
2258  char *res;
2259 
2260  /* Fast path if definitely okay */
2261  if (slen <= MAX_COPY_DATA_DISPLAY)
2262  return pstrdup(str);
2263 
2264  /* Apply encoding-dependent truncation */
2265  len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
2266 
2267  /*
2268  * Truncate, and add "..." to show we truncated the input.
2269  */
2270  res = (char *) palloc(len + 4);
2271  memcpy(res, str, len);
2272  strcpy(res + len, "...");
2273 
2274  return res;
2275 }
2276 
2277 /*
2278  * Copy FROM file to relation.
2279  */
2280 uint64
2282 {
2283  HeapTuple tuple;
2284  TupleDesc tupDesc;
2285  Datum *values;
2286  bool *nulls;
2287  ResultRelInfo *resultRelInfo;
2288  ResultRelInfo *saved_resultRelInfo = NULL;
2289  EState *estate = CreateExecutorState(); /* for ExecConstraints() */
2290  ExprContext *econtext;
2291  TupleTableSlot *myslot;
2292  MemoryContext oldcontext = CurrentMemoryContext;
2293 
2294  ErrorContextCallback errcallback;
2295  CommandId mycid = GetCurrentCommandId(true);
2296  int hi_options = 0; /* start with default heap_insert options */
2297  BulkInsertState bistate;
2298  uint64 processed = 0;
2299  bool useHeapMultiInsert;
2300  int nBufferedTuples = 0;
2301  int prev_leaf_part_index = -1;
2302 
2303 #define MAX_BUFFERED_TUPLES 1000
2304  HeapTuple *bufferedTuples = NULL; /* initialize to silence warning */
2305  Size bufferedTuplesSize = 0;
2306  int firstBufferedLineNo = 0;
2307 
2308  Assert(cstate->rel);
2309 
2310  /*
2311  * The target must be a plain relation or have an INSTEAD OF INSERT row
2312  * trigger. (Currently, such triggers are only allowed on views, so we
2313  * only hint about them in the view case.)
2314  */
2315  if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
2316  cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
2317  !(cstate->rel->trigdesc &&
2319  {
2320  if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
2321  ereport(ERROR,
2322  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2323  errmsg("cannot copy to view \"%s\"",
2324  RelationGetRelationName(cstate->rel)),
2325  errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
2326  else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
2327  ereport(ERROR,
2328  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2329  errmsg("cannot copy to materialized view \"%s\"",
2330  RelationGetRelationName(cstate->rel))));
2331  else if (cstate->rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2332  ereport(ERROR,
2333  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2334  errmsg("cannot copy to foreign table \"%s\"",
2335  RelationGetRelationName(cstate->rel))));
2336  else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
2337  ereport(ERROR,
2338  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2339  errmsg("cannot copy to sequence \"%s\"",
2340  RelationGetRelationName(cstate->rel))));
2341  else
2342  ereport(ERROR,
2343  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2344  errmsg("cannot copy to non-table relation \"%s\"",
2345  RelationGetRelationName(cstate->rel))));
2346  }
2347 
2348  tupDesc = RelationGetDescr(cstate->rel);
2349 
2350  /*----------
2351  * Check to see if we can avoid writing WAL
2352  *
2353  * If archive logging/streaming is not enabled *and* either
2354  * - table was created in same transaction as this COPY
2355  * - data is being written to relfilenode created in this transaction
2356  * then we can skip writing WAL. It's safe because if the transaction
2357  * doesn't commit, we'll discard the table (or the new relfilenode file).
2358  * If it does commit, we'll have done the heap_sync at the bottom of this
2359  * routine first.
2360  *
2361  * As mentioned in comments in utils/rel.h, the in-same-transaction test
2362  * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
2363  * can be cleared before the end of the transaction. The exact case is
2364  * when a relation sets a new relfilenode twice in same transaction, yet
2365  * the second one fails in an aborted subtransaction, e.g.
2366  *
2367  * BEGIN;
2368  * TRUNCATE t;
2369  * SAVEPOINT save;
2370  * TRUNCATE t;
2371  * ROLLBACK TO save;
2372  * COPY ...
2373  *
2374  * Also, if the target file is new-in-transaction, we assume that checking
2375  * FSM for free space is a waste of time, even if we must use WAL because
2376  * of archiving. This could possibly be wrong, but it's unlikely.
2377  *
2378  * The comments for heap_insert and RelationGetBufferForTuple specify that
2379  * skipping WAL logging is only safe if we ensure that our tuples do not
2380  * go into pages containing tuples from any other transactions --- but this
2381  * must be the case if we have a new table or new relfilenode, so we need
2382  * no additional work to enforce that.
2383  *----------
2384  */
2385  /* createSubid is creation check, newRelfilenodeSubid is truncation check */
2386  if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
2388  {
2389  hi_options |= HEAP_INSERT_SKIP_FSM;
2390  if (!XLogIsNeeded())
2391  hi_options |= HEAP_INSERT_SKIP_WAL;
2392  }
2393 
2394  /*
2395  * Optimize if new relfilenode was created in this subxact or one of its
2396  * committed children and we won't see those rows later as part of an
2397  * earlier scan or command. This ensures that if this subtransaction
2398  * aborts then the frozen rows won't be visible after xact cleanup. Note
2399  * that the stronger test of exactly which subtransaction created it is
2400  * crucial for correctness of this optimization.
2401  */
2402  if (cstate->freeze)
2403  {
2405  ereport(ERROR,
2406  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2407  errmsg("cannot perform FREEZE because of prior transaction activity")));
2408 
2409  if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
2411  ereport(ERROR,
2412  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2413  errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
2414 
2415  hi_options |= HEAP_INSERT_FROZEN;
2416  }
2417 
2418  /*
2419  * We need a ResultRelInfo so we can use the regular executor's
2420  * index-entry-making machinery. (There used to be a huge amount of code
2421  * here that basically duplicated execUtils.c ...)
2422  */
2423  resultRelInfo = makeNode(ResultRelInfo);
2424  InitResultRelInfo(resultRelInfo,
2425  cstate->rel,
2426  1, /* dummy rangetable index */
2427  NULL,
2428  0);
2429 
2430  ExecOpenIndices(resultRelInfo, false);
2431 
2432  estate->es_result_relations = resultRelInfo;
2433  estate->es_num_result_relations = 1;
2434  estate->es_result_relation_info = resultRelInfo;
2435  estate->es_range_table = cstate->range_table;
2436 
2437  /* Set up a tuple slot too */
2438  myslot = ExecInitExtraTupleSlot(estate);
2439  ExecSetSlotDescriptor(myslot, tupDesc);
2440  /* Triggers might need a slot as well */
2441  estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
2442 
2443  /* Prepare to catch AFTER triggers. */
2445 
2446  /*
2447  * If there are any triggers with transition tables on the named relation,
2448  * we need to be prepared to capture transition tuples.
2449  */
2450  cstate->transition_capture =
2452  RelationGetRelid(cstate->rel),
2453  CMD_INSERT);
2454 
2455  /*
2456  * If the named relation is a partitioned table, initialize state for
2457  * CopyFrom tuple routing.
2458  */
2459  if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2460  {
2461  PartitionDispatch *partition_dispatch_info;
2462  ResultRelInfo **partitions;
2463  TupleConversionMap **partition_tupconv_maps;
2464  TupleTableSlot *partition_tuple_slot;
2465  int num_parted,
2466  num_partitions;
2467 
2469  1,
2470  estate,
2471  &partition_dispatch_info,
2472  &partitions,
2473  &partition_tupconv_maps,
2474  &partition_tuple_slot,
2475  &num_parted, &num_partitions);
2476  cstate->partition_dispatch_info = partition_dispatch_info;
2477  cstate->num_dispatch = num_parted;
2478  cstate->partitions = partitions;
2479  cstate->num_partitions = num_partitions;
2480  cstate->partition_tupconv_maps = partition_tupconv_maps;
2481  cstate->partition_tuple_slot = partition_tuple_slot;
2482 
2483  /*
2484  * If we are capturing transition tuples, they may need to be
2485  * converted from partition format back to partitioned table format
2486  * (this is only ever necessary if a BEFORE trigger modifies the
2487  * tuple).
2488  */
2489  if (cstate->transition_capture != NULL)
2490  {
2491  int i;
2492 
2494  palloc0(sizeof(TupleConversionMap *) * cstate->num_partitions);
2495  for (i = 0; i < cstate->num_partitions; ++i)
2496  {
2497  cstate->transition_tupconv_maps[i] =
2499  RelationGetDescr(cstate->rel),
2500  gettext_noop("could not convert row type"));
2501  }
2502  }
2503  }
2504 
2505  /*
2506  * It's more efficient to prepare a bunch of tuples for insertion, and
2507  * insert them in one heap_multi_insert() call, than call heap_insert()
2508  * separately for every tuple. However, we can't do that if there are
2509  * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
2510  * expressions. Such triggers or expressions might query the table we're
2511  * inserting to, and act differently if the tuples that have already been
2512  * processed and prepared for insertion are not there. We also can't do
2513  * it if the table is partitioned.
2514  */
2515  if ((resultRelInfo->ri_TrigDesc != NULL &&
2516  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2517  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
2518  cstate->partition_dispatch_info != NULL ||
2519  cstate->volatile_defexprs)
2520  {
2521  useHeapMultiInsert = false;
2522  }
2523  else
2524  {
2525  useHeapMultiInsert = true;
2526  bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
2527  }
2528 
2529  /*
2530  * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
2531  * should do this for COPY, since it's not really an "INSERT" statement as
2532  * such. However, executing these triggers maintains consistency with the
2533  * EACH ROW triggers that we already fire on COPY.
2534  */
2535  ExecBSInsertTriggers(estate, resultRelInfo);
2536 
2537  values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
2538  nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
2539 
2540  bistate = GetBulkInsertState();
2541  econtext = GetPerTupleExprContext(estate);
2542 
2543  /* Set up callback to identify error line number */
2544  errcallback.callback = CopyFromErrorCallback;
2545  errcallback.arg = (void *) cstate;
2546  errcallback.previous = error_context_stack;
2547  error_context_stack = &errcallback;
2548 
2549  for (;;)
2550  {
2551  TupleTableSlot *slot;
2552  bool skip_tuple;
2553  Oid loaded_oid = InvalidOid;
2554 
2556 
2557  if (nBufferedTuples == 0)
2558  {
2559  /*
2560  * Reset the per-tuple exprcontext. We can only do this if the
2561  * tuple buffer is empty. (Calling the context the per-tuple
2562  * memory context is a bit of a misnomer now.)
2563  */
2564  ResetPerTupleExprContext(estate);
2565  }
2566 
2567  /* Switch into its memory context */
2569 
2570  if (!NextCopyFrom(cstate, econtext, values, nulls, &loaded_oid))
2571  break;
2572 
2573  /* And now we can form the input tuple. */
2574  tuple = heap_form_tuple(tupDesc, values, nulls);
2575 
2576  if (loaded_oid != InvalidOid)
2577  HeapTupleSetOid(tuple, loaded_oid);
2578 
2579  /*
2580  * Constraints might reference the tableoid column, so initialize
2581  * t_tableOid before evaluating them.
2582  */
2583  tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
2584 
2585  /* Triggers and stuff need to be invoked in query context. */
2586  MemoryContextSwitchTo(oldcontext);
2587 
2588  /* Place tuple in tuple slot --- but slot shouldn't free it */
2589  slot = myslot;
2590  ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2591 
2592  /* Determine the partition to heap_insert the tuple into */
2593  if (cstate->partition_dispatch_info)
2594  {
2595  int leaf_part_index;
2596  TupleConversionMap *map;
2597 
2598  /*
2599  * Away we go ... If we end up not finding a partition after all,
2600  * ExecFindPartition() does not return and errors out instead.
2601  * Otherwise, the returned value is to be used as an index into
2602  * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
2603  * will get us the ResultRelInfo and TupleConversionMap for the
2604  * partition, respectively.
2605  */
2606  leaf_part_index = ExecFindPartition(resultRelInfo,
2607  cstate->partition_dispatch_info,
2608  slot,
2609  estate);
2610  Assert(leaf_part_index >= 0 &&
2611  leaf_part_index < cstate->num_partitions);
2612 
2613  /*
2614  * If this tuple is mapped to a partition that is not same as the
2615  * previous one, we'd better make the bulk insert mechanism gets a
2616  * new buffer.
2617  */
2618  if (prev_leaf_part_index != leaf_part_index)
2619  {
2620  ReleaseBulkInsertStatePin(bistate);
2621  prev_leaf_part_index = leaf_part_index;
2622  }
2623 
2624  /*
2625  * Save the old ResultRelInfo and switch to the one corresponding
2626  * to the selected partition.
2627  */
2628  saved_resultRelInfo = resultRelInfo;
2629  resultRelInfo = cstate->partitions[leaf_part_index];
2630 
2631  /* We do not yet have a way to insert into a foreign partition */
2632  if (resultRelInfo->ri_FdwRoutine)
2633  ereport(ERROR,
2634  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2635  errmsg("cannot route inserted tuples to a foreign table")));
2636 
2637  /*
2638  * For ExecInsertIndexTuples() to work on the partition's indexes
2639  */
2640  estate->es_result_relation_info = resultRelInfo;
2641 
2642  /*
2643  * If we're capturing transition tuples, we might need to convert
2644  * from the partition rowtype to parent rowtype.
2645  */
2646  if (cstate->transition_capture != NULL)
2647  {
2648  if (resultRelInfo->ri_TrigDesc &&
2649  (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2650  resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
2651  {
2652  /*
2653  * If there are any BEFORE or INSTEAD triggers on the
2654  * partition, we'll have to be ready to convert their
2655  * result back to tuplestore format.
2656  */
2658  cstate->transition_capture->tcs_map =
2659  cstate->transition_tupconv_maps[leaf_part_index];
2660  }
2661  else
2662  {
2663  /*
2664  * Otherwise, just remember the original unconverted
2665  * tuple, to avoid a needless round trip conversion.
2666  */
2668  cstate->transition_capture->tcs_map = NULL;
2669  }
2670  }
2671 
2672  /*
2673  * We might need to convert from the parent rowtype to the
2674  * partition rowtype.
2675  */
2676  map = cstate->partition_tupconv_maps[leaf_part_index];
2677  if (map)
2678  {
2679  Relation partrel = resultRelInfo->ri_RelationDesc;
2680 
2681  tuple = do_convert_tuple(tuple, map);
2682 
2683  /*
2684  * We must use the partition's tuple descriptor from this
2685  * point on. Use a dedicated slot from this point on until
2686  * we're finished dealing with the partition.
2687  */
2688  slot = cstate->partition_tuple_slot;
2689  Assert(slot != NULL);
2690  ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
2691  ExecStoreTuple(tuple, slot, InvalidBuffer, true);
2692  }
2693 
2694  tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
2695  }
2696 
2697  skip_tuple = false;
2698 
2699  /* BEFORE ROW INSERT Triggers */
2700  if (resultRelInfo->ri_TrigDesc &&
2701  resultRelInfo->ri_TrigDesc->trig_insert_before_row)
2702  {
2703  slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
2704 
2705  if (slot == NULL) /* "do nothing" */
2706  skip_tuple = true;
2707  else /* trigger might have changed tuple */
2708  tuple = ExecMaterializeSlot(slot);
2709  }
2710 
2711  if (!skip_tuple)
2712  {
2713  if (resultRelInfo->ri_TrigDesc &&
2714  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
2715  {
2716  /* Pass the data to the INSTEAD ROW INSERT trigger */
2717  ExecIRInsertTriggers(estate, resultRelInfo, slot);
2718  }
2719  else
2720  {
2721  /*
2722  * We always check the partition constraint, including when
2723  * the tuple got here via tuple-routing. However we don't
2724  * need to in the latter case if no BR trigger is defined on
2725  * the partition. Note that a BR trigger might modify the
2726  * tuple such that the partition constraint is no longer
2727  * satisfied, so we need to check in that case.
2728  */
2729  bool check_partition_constr =
2730  (resultRelInfo->ri_PartitionCheck != NIL);
2731 
2732  if (saved_resultRelInfo != NULL &&
2733  !(resultRelInfo->ri_TrigDesc &&
2734  resultRelInfo->ri_TrigDesc->trig_insert_before_row))
2735  check_partition_constr = false;
2736 
2737  /* Check the constraints of the tuple */
2738  if (cstate->rel->rd_att->constr || check_partition_constr)
2739  ExecConstraints(resultRelInfo, slot, estate);
2740 
2741  if (useHeapMultiInsert)
2742  {
2743  /* Add this tuple to the tuple buffer */
2744  if (nBufferedTuples == 0)
2745  firstBufferedLineNo = cstate->cur_lineno;
2746  bufferedTuples[nBufferedTuples++] = tuple;
2747  bufferedTuplesSize += tuple->t_len;
2748 
2749  /*
2750  * If the buffer filled up, flush it. Also flush if the
2751  * total size of all the tuples in the buffer becomes
2752  * large, to avoid using large amounts of memory for the
2753  * buffer when the tuples are exceptionally wide.
2754  */
2755  if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
2756  bufferedTuplesSize > 65535)
2757  {
2758  CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2759  resultRelInfo, myslot, bistate,
2760  nBufferedTuples, bufferedTuples,
2761  firstBufferedLineNo);
2762  nBufferedTuples = 0;
2763  bufferedTuplesSize = 0;
2764  }
2765  }
2766  else
2767  {
2768  List *recheckIndexes = NIL;
2769 
2770  /* OK, store the tuple and create index entries for it */
2771  heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
2772  hi_options, bistate);
2773 
2774  if (resultRelInfo->ri_NumIndices > 0)
2775  recheckIndexes = ExecInsertIndexTuples(slot,
2776  &(tuple->t_self),
2777  estate,
2778  false,
2779  NULL,
2780  NIL);
2781 
2782  /* AFTER ROW INSERT Triggers */
2783  ExecARInsertTriggers(estate, resultRelInfo, tuple,
2784  recheckIndexes, cstate->transition_capture);
2785 
2786  list_free(recheckIndexes);
2787  }
2788  }
2789 
2790  /*
2791  * We count only tuples not suppressed by a BEFORE INSERT trigger;
2792  * this is the same definition used by execMain.c for counting
2793  * tuples inserted by an INSERT command.
2794  */
2795  processed++;
2796 
2797  if (saved_resultRelInfo)
2798  {
2799  resultRelInfo = saved_resultRelInfo;
2800  estate->es_result_relation_info = resultRelInfo;
2801  }
2802  }
2803  }
2804 
2805  /* Flush any remaining buffered tuples */
2806  if (nBufferedTuples > 0)
2807  CopyFromInsertBatch(cstate, estate, mycid, hi_options,
2808  resultRelInfo, myslot, bistate,
2809  nBufferedTuples, bufferedTuples,
2810  firstBufferedLineNo);
2811 
2812  /* Done, clean up */
2813  error_context_stack = errcallback.previous;
2814 
2815  FreeBulkInsertState(bistate);
2816 
2817  MemoryContextSwitchTo(oldcontext);
2818 
2819  /*
2820  * In the old protocol, tell pqcomm that we can process normal protocol
2821  * messages again.
2822  */
2823  if (cstate->copy_dest == COPY_OLD_FE)
2824  pq_endmsgread();
2825 
2826  /* Execute AFTER STATEMENT insertion triggers */
2827  ExecASInsertTriggers(estate, resultRelInfo, cstate->transition_capture);
2828 
2829  /* Handle queued AFTER triggers */
2830  AfterTriggerEndQuery(estate);
2831 
2832  pfree(values);
2833  pfree(nulls);
2834 
2835  ExecResetTupleTable(estate->es_tupleTable, false);
2836 
2837  ExecCloseIndices(resultRelInfo);
2838 
2839  /* Close all the partitioned tables, leaf partitions, and their indices */
2840  if (cstate->partition_dispatch_info)
2841  {
2842  int i;
2843 
2844  /*
2845  * Remember cstate->partition_dispatch_info[0] corresponds to the root
2846  * partitioned table, which we must not try to close, because it is
2847  * the main target table of COPY that will be closed eventually by
2848  * DoCopy(). Also, tupslot is NULL for the root partitioned table.
2849  */
2850  for (i = 1; i < cstate->num_dispatch; i++)
2851  {
2853 
2854  heap_close(pd->reldesc, NoLock);
2856  }
2857  for (i = 0; i < cstate->num_partitions; i++)
2858  {
2859  ResultRelInfo *resultRelInfo = cstate->partitions[i];
2860 
2861  ExecCloseIndices(resultRelInfo);
2862  heap_close(resultRelInfo->ri_RelationDesc, NoLock);
2863  }
2864 
2865  /* Release the standalone partition tuple descriptor */
2867  }
2868 
2869  /* Close any trigger target relations */
2870  ExecCleanUpTriggerState(estate);
2871 
2872  FreeExecutorState(estate);
2873 
2874  /*
2875  * If we skipped writing WAL, then we need to sync the heap (but not
2876  * indexes since those use WAL anyway)
2877  */
2878  if (hi_options & HEAP_INSERT_SKIP_WAL)
2879  heap_sync(cstate->rel);
2880 
2881  return processed;
2882 }
2883 
2884 /*
2885  * A subroutine of CopyFrom, to write the current batch of buffered heap
2886  * tuples to the heap. Also updates indexes and runs AFTER ROW INSERT
2887  * triggers.
2888  */
2889 static void
2891  int hi_options, ResultRelInfo *resultRelInfo,
2892  TupleTableSlot *myslot, BulkInsertState bistate,
2893  int nBufferedTuples, HeapTuple *bufferedTuples,
2894  int firstBufferedLineNo)
2895 {
2896  MemoryContext oldcontext;
2897  int i;
2898  int save_cur_lineno;
2899 
2900  /*
2901  * Print error context information correctly, if one of the operations
2902  * below fail.
2903  */
2904  cstate->line_buf_valid = false;
2905  save_cur_lineno = cstate->cur_lineno;
2906 
2907  /*
2908  * heap_multi_insert leaks memory, so switch to short-lived memory context
2909  * before calling it.
2910  */
2911  oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
2912  heap_multi_insert(cstate->rel,
2913  bufferedTuples,
2914  nBufferedTuples,
2915  mycid,
2916  hi_options,
2917  bistate);
2918  MemoryContextSwitchTo(oldcontext);
2919 
2920  /*
2921  * If there are any indexes, update them for all the inserted tuples, and
2922  * run AFTER ROW INSERT triggers.
2923  */
2924  if (resultRelInfo->ri_NumIndices > 0)
2925  {
2926  for (i = 0; i < nBufferedTuples; i++)
2927  {
2928  List *recheckIndexes;
2929 
2930  cstate->cur_lineno = firstBufferedLineNo + i;
2931  ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
2932  recheckIndexes =
2933  ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
2934  estate, false, NULL, NIL);
2935  ExecARInsertTriggers(estate, resultRelInfo,
2936  bufferedTuples[i],
2937  recheckIndexes, cstate->transition_capture);
2938  list_free(recheckIndexes);
2939  }
2940  }
2941 
2942  /*
2943  * There's no indexes, but see if we need to run AFTER ROW INSERT triggers
2944  * anyway.
2945  */
2946  else if (resultRelInfo->ri_TrigDesc != NULL &&
2947  (resultRelInfo->ri_TrigDesc->trig_insert_after_row ||
2948  resultRelInfo->ri_TrigDesc->trig_insert_new_table))
2949  {
2950  for (i = 0; i < nBufferedTuples; i++)
2951  {
2952  cstate->cur_lineno = firstBufferedLineNo + i;
2953  ExecARInsertTriggers(estate, resultRelInfo,
2954  bufferedTuples[i],
2955  NIL, cstate->transition_capture);
2956  }
2957  }
2958 
2959  /* reset cur_lineno to where we were */
2960  cstate->cur_lineno = save_cur_lineno;
2961 }
2962 
2963 /*
2964  * Setup to read tuples from a file for COPY FROM.
2965  *
2966  * 'rel': Used as a template for the tuples
2967  * 'filename': Name of server-local file to read
2968  * 'attnamelist': List of char *, columns to include. NIL selects all cols.
2969  * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
2970  *
2971  * Returns a CopyState, to be passed to NextCopyFrom and related functions.
2972  */
2973 CopyState
2975  Relation rel,
2976  const char *filename,
2977  bool is_program,
2978  copy_data_source_cb data_source_cb,
2979  List *attnamelist,
2980  List *options)
2981 {
2982  CopyState cstate;
2983  bool pipe = (filename == NULL);
2984  TupleDesc tupDesc;
2985  AttrNumber num_phys_attrs,
2986  num_defaults;
2987  FmgrInfo *in_functions;
2988  Oid *typioparams;
2989  int attnum;
2990  Oid in_func_oid;
2991  int *defmap;
2992  ExprState **defexprs;
2993  MemoryContext oldcontext;
2994  bool volatile_defexprs;
2995 
2996  cstate = BeginCopy(pstate, true, rel, NULL, InvalidOid, attnamelist, options);
2997  oldcontext = MemoryContextSwitchTo(cstate->copycontext);
2998 
2999  /* Initialize state variables */
3000  cstate->fe_eof = false;
3001  cstate->eol_type = EOL_UNKNOWN;
3002  cstate->cur_relname = RelationGetRelationName(cstate->rel);
3003  cstate->cur_lineno = 0;
3004  cstate->cur_attname = NULL;
3005  cstate->cur_attval = NULL;
3006 
3007  /* Set up variables to avoid per-attribute overhead. */
3008  initStringInfo(&cstate->attribute_buf);
3009  initStringInfo(&cstate->line_buf);
3010  cstate->line_buf_converted = false;
3011  cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
3012  cstate->raw_buf_index = cstate->raw_buf_len = 0;
3013 
3014  /* Assign range table, we'll need it in CopyFrom. */
3015  if (pstate)
3016  cstate->range_table = pstate->p_rtable;
3017 
3018  tupDesc = RelationGetDescr(cstate->rel);
3019  num_phys_attrs = tupDesc->natts;
3020  num_defaults = 0;
3021  volatile_defexprs = false;
3022 
3023  /*
3024  * Pick up the required catalog information for each attribute in the
3025  * relation, including the input function, the element type (to pass to
3026  * the input function), and info about defaults and constraints. (Which
3027  * input function we use depends on text/binary format choice.)
3028  */
3029  in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
3030  typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
3031  defmap = (int *) palloc(num_phys_attrs * sizeof(int));
3032  defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
3033 
3034  for (attnum = 1; attnum <= num_phys_attrs; attnum++)
3035  {
3036  Form_pg_attribute att = TupleDescAttr(tupDesc, attnum - 1);
3037 
3038  /* We don't need info for dropped attributes */
3039  if (att->attisdropped)
3040  continue;
3041 
3042  /* Fetch the input function and typioparam info */
3043  if (cstate->binary)
3044  getTypeBinaryInputInfo(att->atttypid,
3045  &in_func_oid, &typioparams[attnum - 1]);
3046  else
3047  getTypeInputInfo(att->atttypid,
3048  &in_func_oid, &typioparams[attnum - 1]);
3049  fmgr_info(in_func_oid, &in_functions[attnum - 1]);
3050 
3051  /* Get default info if needed */
3052  if (!list_member_int(cstate->attnumlist, attnum))
3053  {
3054  /* attribute is NOT to be copied from input */
3055  /* use default value if one exists */
3056  Expr *defexpr = (Expr *) build_column_default(cstate->rel,
3057  attnum);
3058 
3059  if (defexpr != NULL)
3060  {
3061  /* Run the expression through planner */
3062  defexpr = expression_planner(defexpr);
3063 
3064  /* Initialize executable expression in copycontext */
3065  defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
3066  defmap[num_defaults] = attnum - 1;
3067  num_defaults++;
3068 
3069  /*
3070  * If a default expression looks at the table being loaded,
3071  * then it could give the wrong answer when using
3072  * multi-insert. Since database access can be dynamic this is
3073  * hard to test for exactly, so we use the much wider test of
3074  * whether the default expression is volatile. We allow for
3075  * the special case of when the default expression is the
3076  * nextval() of a sequence which in this specific case is
3077  * known to be safe for use with the multi-insert
3078  * optimization. Hence we use this special case function
3079  * checker rather than the standard check for
3080  * contain_volatile_functions().
3081  */
3082  if (!volatile_defexprs)
3083  volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr);
3084  }
3085  }
3086  }
3087 
3088  /* We keep those variables in cstate. */
3089  cstate->in_functions = in_functions;
3090  cstate->typioparams = typioparams;
3091  cstate->defmap = defmap;
3092  cstate->defexprs = defexprs;
3093  cstate->volatile_defexprs = volatile_defexprs;
3094  cstate->num_defaults = num_defaults;
3095  cstate->is_program = is_program;
3096 
3097  if (data_source_cb)
3098  {
3099  cstate->copy_dest = COPY_CALLBACK;
3100  cstate->data_source_cb = data_source_cb;
3101  }
3102  else if (pipe)
3103  {
3104  Assert(!is_program); /* the grammar does not allow this */
3106  ReceiveCopyBegin(cstate);
3107  else
3108  cstate->copy_file = stdin;
3109  }
3110  else
3111  {
3112  cstate->filename = pstrdup(filename);
3113 
3114  if (cstate->is_program)
3115  {
3116  cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
3117  if (cstate->copy_file == NULL)
3118  ereport(ERROR,
3120  errmsg("could not execute command \"%s\": %m",
3121  cstate->filename)));
3122  }
3123  else
3124  {
3125  struct stat st;
3126 
3127  cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
3128  if (cstate->copy_file == NULL)
3129  {
3130  /* copy errno because ereport subfunctions might change it */
3131  int save_errno = errno;
3132 
3133  ereport(ERROR,
3135  errmsg("could not open file \"%s\" for reading: %m",
3136  cstate->filename),
3137  (save_errno == ENOENT || save_errno == EACCES) ?
3138  errhint("COPY FROM instructs the PostgreSQL server process to read a file. "
3139  "You may want a client-side facility such as psql's \\copy.") : 0));
3140  }
3141 
3142  if (fstat(fileno(cstate->copy_file), &st))
3143  ereport(ERROR,
3145  errmsg("could not stat file \"%s\": %m",
3146  cstate->filename)));
3147 
3148  if (S_ISDIR(st.st_mode))
3149  ereport(ERROR,
3150  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
3151  errmsg("\"%s\" is a directory", cstate->filename)));
3152  }
3153  }
3154 
3155  if (!cstate->binary)
3156  {
3157  /* must rely on user to tell us... */
3158  cstate->file_has_oids = cstate->oids;
3159  }
3160  else
3161  {
3162  /* Read and verify binary header */
3163  char readSig[11];
3164  int32 tmp;
3165 
3166  /* Signature */
3167  if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
3168  memcmp(readSig, BinarySignature, 11) != 0)
3169  ereport(ERROR,
3170  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3171  errmsg("COPY file signature not recognized")));
3172  /* Flags field */
3173  if (!CopyGetInt32(cstate, &tmp))
3174  ereport(ERROR,
3175  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3176  errmsg("invalid COPY file header (missing flags)")));
3177  cstate->file_has_oids = (tmp & (1 << 16)) != 0;
3178  tmp &= ~(1 << 16);
3179  if ((tmp >> 16) != 0)
3180  ereport(ERROR,
3181  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3182  errmsg("unrecognized critical flags in COPY file header")));
3183  /* Header extension length */
3184  if (!CopyGetInt32(cstate, &tmp) ||
3185  tmp < 0)
3186  ereport(ERROR,
3187  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3188  errmsg("invalid COPY file header (missing length)")));
3189  /* Skip extension header, if present */
3190  while (tmp-- > 0)
3191  {
3192  if (CopyGetData(cstate, readSig, 1, 1) != 1)
3193  ereport(ERROR,
3194  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3195  errmsg("invalid COPY file header (wrong length)")));
3196  }
3197  }
3198 
3199  if (cstate->file_has_oids && cstate->binary)
3200  {
3202  &in_func_oid, &cstate->oid_typioparam);
3203  fmgr_info(in_func_oid, &cstate->oid_in_function);
3204  }
3205 
3206  /* create workspace for CopyReadAttributes results */
3207  if (!cstate->binary)
3208  {
3209  AttrNumber attr_count = list_length(cstate->attnumlist);
3210  int nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
3211 
3212  cstate->max_fields = nfields;
3213  cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
3214  }
3215 
3216  MemoryContextSwitchTo(oldcontext);
3217 
3218  return cstate;
3219 }
3220 
3221 /*
3222  * Read raw fields in the next line for COPY FROM in text or csv mode.
3223  * Return false if no more lines.
3224  *
3225  * An internal temporary buffer is returned via 'fields'. It is valid until
3226  * the next call of the function. Since the function returns all raw fields
3227  * in the input file, 'nfields' could be different from the number of columns
3228  * in the relation.
3229  *
3230  * NOTE: force_not_null option are not applied to the returned fields.
3231  */
3232 bool
3233 NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
3234 {
3235  int fldct;
3236  bool done;
3237 
3238  /* only available for text or csv input */
3239  Assert(!cstate->binary);
3240 
3241  /* on input just throw the header line away */
3242  if (cstate->cur_lineno == 0 && cstate->header_line)
3243  {
3244  cstate->cur_lineno++;
3245  if (CopyReadLine(cstate))
3246  return false; /* done */
3247  }
3248 
3249  cstate->cur_lineno++;
3250 
3251  /* Actually read the line into memory here */
3252  done = CopyReadLine(cstate);
3253 
3254  /*
3255  * EOF at start of line means we're done. If we see EOF after some
3256  * characters, we act as though it was newline followed by EOF, ie,
3257  * process the line and then exit loop on next iteration.
3258  */
3259  if (done && cstate->line_buf.len == 0)
3260  return false;
3261 
3262  /* Parse the line into de-escaped field values */
3263  if (cstate->csv_mode)
3264  fldct = CopyReadAttributesCSV(cstate);
3265  else
3266  fldct = CopyReadAttributesText(cstate);
3267 
3268  *fields = cstate->raw_fields;
3269  *nfields = fldct;
3270  return true;
3271 }
3272 
3273 /*
3274  * Read next tuple from file for COPY FROM. Return false if no more tuples.
3275  *
3276  * 'econtext' is used to evaluate default expression for each columns not
3277  * read from the file. It can be NULL when no default values are used, i.e.
3278  * when all columns are read from the file.
3279  *
3280  * 'values' and 'nulls' arrays must be the same length as columns of the
3281  * relation passed to BeginCopyFrom. This function fills the arrays.
3282  * Oid of the tuple is returned with 'tupleOid' separately.
3283  */
3284 bool
3286  Datum *values, bool *nulls, Oid *tupleOid)
3287 {
3288  TupleDesc tupDesc;
3289  AttrNumber num_phys_attrs,
3290  attr_count,
3291  num_defaults = cstate->num_defaults;
3292  FmgrInfo *in_functions = cstate->in_functions;
3293  Oid *typioparams = cstate->typioparams;
3294  int i;
3295  int nfields;
3296  bool isnull;
3297  bool file_has_oids = cstate->file_has_oids;
3298  int *defmap = cstate->defmap;
3299  ExprState **defexprs = cstate->defexprs;
3300 
3301  tupDesc = RelationGetDescr(cstate->rel);
3302  num_phys_attrs = tupDesc->natts;
3303  attr_count = list_length(cstate->attnumlist);
3304  nfields = file_has_oids ? (attr_count + 1) : attr_count;
3305 
3306  /* Initialize all values for row to NULL */
3307  MemSet(values, 0, num_phys_attrs * sizeof(Datum));
3308  MemSet(nulls, true, num_phys_attrs * sizeof(bool));
3309 
3310  if (!cstate->binary)
3311  {
3312  char **field_strings;
3313  ListCell *cur;
3314  int fldct;
3315  int fieldno;
3316  char *string;
3317 
3318  /* read raw fields in the next line */
3319  if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
3320  return false;
3321 
3322  /* check for overflowing fields */
3323  if (nfields > 0 && fldct > nfields)
3324  ereport(ERROR,
3325  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3326  errmsg("extra data after last expected column")));
3327 
3328  fieldno = 0;
3329 
3330  /* Read the OID field if present */
3331  if (file_has_oids)
3332  {
3333  if (fieldno >= fldct)
3334  ereport(ERROR,
3335  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3336  errmsg("missing data for OID column")));
3337  string = field_strings[fieldno++];
3338 
3339  if (string == NULL)
3340  ereport(ERROR,
3341  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3342  errmsg("null OID in COPY data")));
3343  else if (cstate->oids && tupleOid != NULL)
3344  {
3345  cstate->cur_attname = "oid";
3346  cstate->cur_attval = string;
3348  CStringGetDatum(string)));
3349  if (*tupleOid == InvalidOid)
3350  ereport(ERROR,
3351  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3352  errmsg("invalid OID in COPY data")));
3353  cstate->cur_attname = NULL;
3354  cstate->cur_attval = NULL;
3355  }
3356  }
3357 
3358  /* Loop to read the user attributes on the line. */
3359  foreach(cur, cstate->attnumlist)
3360  {
3361  int attnum = lfirst_int(cur);
3362  int m = attnum - 1;
3363  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
3364 
3365  if (fieldno >= fldct)
3366  ereport(ERROR,
3367  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3368  errmsg("missing data for column \"%s\"",
3369  NameStr(att->attname))));
3370  string = field_strings[fieldno++];
3371 
3372  if (cstate->convert_select_flags &&
3373  !cstate->convert_select_flags[m])
3374  {
3375  /* ignore input field, leaving column as NULL */
3376  continue;
3377  }
3378 
3379  if (cstate->csv_mode)
3380  {
3381  if (string == NULL &&
3382  cstate->force_notnull_flags[m])
3383  {
3384  /*
3385  * FORCE_NOT_NULL option is set and column is NULL -
3386  * convert it to the NULL string.
3387  */
3388  string = cstate->null_print;
3389  }
3390  else if (string != NULL && cstate->force_null_flags[m]
3391  && strcmp(string, cstate->null_print) == 0)
3392  {
3393  /*
3394  * FORCE_NULL option is set and column matches the NULL
3395  * string. It must have been quoted, or otherwise the
3396  * string would already have been set to NULL. Convert it
3397  * to NULL as specified.
3398  */
3399  string = NULL;
3400  }
3401  }
3402 
3403  cstate->cur_attname = NameStr(att->attname);
3404  cstate->cur_attval = string;
3405  values[m] = InputFunctionCall(&in_functions[m],
3406  string,
3407  typioparams[m],
3408  att->atttypmod);
3409  if (string != NULL)
3410  nulls[m] = false;
3411  cstate->cur_attname = NULL;
3412  cstate->cur_attval = NULL;
3413  }
3414 
3415  Assert(fieldno == nfields);
3416  }
3417  else
3418  {
3419  /* binary */
3420  int16 fld_count;
3421  ListCell *cur;
3422 
3423  cstate->cur_lineno++;
3424 
3425  if (!CopyGetInt16(cstate, &fld_count))
3426  {
3427  /* EOF detected (end of file, or protocol-level EOF) */
3428  return false;
3429  }
3430 
3431  if (fld_count == -1)
3432  {
3433  /*
3434  * Received EOF marker. In a V3-protocol copy, wait for the
3435  * protocol-level EOF, and complain if it doesn't come
3436  * immediately. This ensures that we correctly handle CopyFail,
3437  * if client chooses to send that now.
3438  *
3439  * Note that we MUST NOT try to read more data in an old-protocol
3440  * copy, since there is no protocol-level EOF marker then. We
3441  * could go either way for copy from file, but choose to throw
3442  * error if there's data after the EOF marker, for consistency
3443  * with the new-protocol case.
3444  */
3445  char dummy;
3446 
3447  if (cstate->copy_dest != COPY_OLD_FE &&
3448  CopyGetData(cstate, &dummy, 1, 1) > 0)
3449  ereport(ERROR,
3450  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3451  errmsg("received copy data after EOF marker")));
3452  return false;
3453  }
3454 
3455  if (fld_count != attr_count)
3456  ereport(ERROR,
3457  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3458  errmsg("row field count is %d, expected %d",
3459  (int) fld_count, attr_count)));
3460 
3461  if (file_has_oids)
3462  {
3463  Oid loaded_oid;
3464 
3465  cstate->cur_attname = "oid";
3466  loaded_oid =
3468  0,
3469  &cstate->oid_in_function,
3470  cstate->oid_typioparam,
3471  -1,
3472  &isnull));
3473  if (isnull || loaded_oid == InvalidOid)
3474  ereport(ERROR,
3475  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3476  errmsg("invalid OID in COPY data")));
3477  cstate->cur_attname = NULL;
3478  if (cstate->oids && tupleOid != NULL)
3479  *tupleOid = loaded_oid;
3480  }
3481 
3482  i = 0;
3483  foreach(cur, cstate->attnumlist)
3484  {
3485  int attnum = lfirst_int(cur);
3486  int m = attnum - 1;
3487  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
3488 
3489  cstate->cur_attname = NameStr(att->attname);
3490  i++;
3491  values[m] = CopyReadBinaryAttribute(cstate,
3492  i,
3493  &in_functions[m],
3494  typioparams[m],
3495  att->atttypmod,
3496  &nulls[m]);
3497  cstate->cur_attname = NULL;
3498  }
3499  }
3500 
3501  /*
3502  * Now compute and insert any defaults available for the columns not
3503  * provided by the input data. Anything not processed here or above will
3504  * remain NULL.
3505  */
3506  for (i = 0; i < num_defaults; i++)
3507  {
3508  /*
3509  * The caller must supply econtext and have switched into the
3510  * per-tuple memory context in it.
3511  */
3512  Assert(econtext != NULL);
3514 
3515  values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
3516  &nulls[defmap[i]]);
3517  }
3518 
3519  return true;
3520 }
3521 
3522 /*
3523  * Clean up storage and release resources for COPY FROM.
3524  */
3525 void
3527 {
3528  /* No COPY FROM related resources except memory. */
3529 
3530  EndCopy(cstate);
3531 }
3532 
3533 /*
3534  * Read the next input line and stash it in line_buf, with conversion to
3535  * server encoding.
3536  *
3537  * Result is true if read was terminated by EOF, false if terminated
3538  * by newline. The terminating newline or EOF marker is not included
3539  * in the final value of line_buf.
3540  */
3541 static bool
3543 {
3544  bool result;
3545 
3546  resetStringInfo(&cstate->line_buf);
3547  cstate->line_buf_valid = true;
3548 
3549  /* Mark that encoding conversion hasn't occurred yet */
3550  cstate->line_buf_converted = false;
3551 
3552  /* Parse data and transfer into line_buf */
3553  result = CopyReadLineText(cstate);
3554 
3555  if (result)
3556  {
3557  /*
3558  * Reached EOF. In protocol version 3, we should ignore anything
3559  * after \. up to the protocol end of copy data. (XXX maybe better
3560  * not to treat \. as special?)
3561  */
3562  if (cstate->copy_dest == COPY_NEW_FE)
3563  {
3564  do
3565  {
3566  cstate->raw_buf_index = cstate->raw_buf_len;
3567  } while (CopyLoadRawBuf(cstate));
3568  }
3569  }
3570  else
3571  {
3572  /*
3573  * If we didn't hit EOF, then we must have transferred the EOL marker
3574  * to line_buf along with the data. Get rid of it.
3575  */
3576  switch (cstate->eol_type)
3577  {
3578  case EOL_NL:
3579  Assert(cstate->line_buf.len >= 1);
3580  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
3581  cstate->line_buf.len--;
3582  cstate->line_buf.data[cstate->line_buf.len] = '\0';
3583  break;
3584  case EOL_CR:
3585  Assert(cstate->line_buf.len >= 1);
3586  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
3587  cstate->line_buf.len--;
3588  cstate->line_buf.data[cstate->line_buf.len] = '\0';
3589  break;
3590  case EOL_CRNL:
3591  Assert(cstate->line_buf.len >= 2);
3592  Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
3593  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
3594  cstate->line_buf.len -= 2;
3595  cstate->line_buf.data[cstate->line_buf.len] = '\0';
3596  break;
3597  case EOL_UNKNOWN:
3598  /* shouldn't get here */
3599  Assert(false);
3600  break;
3601  }
3602  }
3603 
3604  /* Done reading the line. Convert it to server encoding. */
3605  if (cstate->need_transcoding)
3606  {
3607  char *cvt;
3608 
3609  cvt = pg_any_to_server(cstate->line_buf.data,
3610  cstate->line_buf.len,
3611  cstate->file_encoding);
3612  if (cvt != cstate->line_buf.data)
3613  {
3614  /* transfer converted data back to line_buf */
3615  resetStringInfo(&cstate->line_buf);
3616  appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
3617  pfree(cvt);
3618  }
3619  }
3620 
3621  /* Now it's safe to use the buffer in error messages */
3622  cstate->line_buf_converted = true;
3623 
3624  return result;
3625 }
3626 
3627 /*
3628  * CopyReadLineText - inner loop of CopyReadLine for text mode
3629  */
3630 static bool
3632 {
3633  char *copy_raw_buf;
3634  int raw_buf_ptr;
3635  int copy_buf_len;
3636  bool need_data = false;
3637  bool hit_eof = false;
3638  bool result = false;
3639  char mblen_str[2];
3640 
3641  /* CSV variables */
3642  bool first_char_in_line = true;
3643  bool in_quote = false,
3644  last_was_esc = false;
3645  char quotec = '\0';
3646  char escapec = '\0';
3647 
3648  if (cstate->csv_mode)
3649  {
3650  quotec = cstate->quote[0];
3651  escapec = cstate->escape[0];
3652  /* ignore special escape processing if it's the same as quotec */
3653  if (quotec == escapec)
3654  escapec = '\0';
3655  }
3656 
3657  mblen_str[1] = '\0';
3658 
3659  /*
3660  * The objective of this loop is to transfer the entire next input line
3661  * into line_buf. Hence, we only care for detecting newlines (\r and/or
3662  * \n) and the end-of-copy marker (\.).
3663  *
3664  * In CSV mode, \r and \n inside a quoted field are just part of the data
3665  * value and are put in line_buf. We keep just enough state to know if we
3666  * are currently in a quoted field or not.
3667  *
3668  * These four characters, and the CSV escape and quote characters, are
3669  * assumed the same in frontend and backend encodings.
3670  *
3671  * For speed, we try to move data from raw_buf to line_buf in chunks
3672  * rather than one character at a time. raw_buf_ptr points to the next
3673  * character to examine; any characters from raw_buf_index to raw_buf_ptr
3674  * have been determined to be part of the line, but not yet transferred to
3675  * line_buf.
3676  *
3677  * For a little extra speed within the loop, we copy raw_buf and
3678  * raw_buf_len into local variables.
3679  */
3680  copy_raw_buf = cstate->raw_buf;
3681  raw_buf_ptr = cstate->raw_buf_index;
3682  copy_buf_len = cstate->raw_buf_len;
3683 
3684  for (;;)
3685  {
3686  int prev_raw_ptr;
3687  char c;
3688 
3689  /*
3690  * Load more data if needed. Ideally we would just force four bytes
3691  * of read-ahead and avoid the many calls to
3692  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
3693  * does not allow us to read too far ahead or we might read into the
3694  * next data, so we read-ahead only as far we know we can. One
3695  * optimization would be to read-ahead four byte here if
3696  * cstate->copy_dest != COPY_OLD_FE, but it hardly seems worth it,
3697  * considering the size of the buffer.
3698  */
3699  if (raw_buf_ptr >= copy_buf_len || need_data)
3700  {
3702 
3703  /*
3704  * Try to read some more data. This will certainly reset
3705  * raw_buf_index to zero, and raw_buf_ptr must go with it.
3706  */
3707  if (!CopyLoadRawBuf(cstate))
3708  hit_eof = true;
3709  raw_buf_ptr = 0;
3710  copy_buf_len = cstate->raw_buf_len;
3711 
3712  /*
3713  * If we are completely out of data, break out of the loop,
3714  * reporting EOF.
3715  */
3716  if (copy_buf_len <= 0)
3717  {
3718  result = true;
3719  break;
3720  }
3721  need_data = false;
3722  }
3723 
3724  /* OK to fetch a character */
3725  prev_raw_ptr = raw_buf_ptr;
3726  c = copy_raw_buf[raw_buf_ptr++];
3727 
3728  if (cstate->csv_mode)
3729  {
3730  /*
3731  * If character is '\\' or '\r', we may need to look ahead below.
3732  * Force fetch of the next character if we don't already have it.
3733  * We need to do this before changing CSV state, in case one of
3734  * these characters is also the quote or escape character.
3735  *
3736  * Note: old-protocol does not like forced prefetch, but it's OK
3737  * here since we cannot validly be at EOF.
3738  */
3739  if (c == '\\' || c == '\r')
3740  {
3742  }
3743 
3744  /*
3745  * Dealing with quotes and escapes here is mildly tricky. If the
3746  * quote char is also the escape char, there's no problem - we
3747  * just use the char as a toggle. If they are different, we need
3748  * to ensure that we only take account of an escape inside a
3749  * quoted field and immediately preceding a quote char, and not
3750  * the second in an escape-escape sequence.
3751  */
3752  if (in_quote && c == escapec)
3753  last_was_esc = !last_was_esc;
3754  if (c == quotec && !last_was_esc)
3755  in_quote = !in_quote;
3756  if (c != escapec)
3757  last_was_esc = false;
3758 
3759  /*
3760  * Updating the line count for embedded CR and/or LF chars is
3761  * necessarily a little fragile - this test is probably about the
3762  * best we can do. (XXX it's arguable whether we should do this
3763  * at all --- is cur_lineno a physical or logical count?)
3764  */
3765  if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
3766  cstate->cur_lineno++;
3767  }
3768 
3769  /* Process \r */
3770  if (c == '\r' && (!cstate->csv_mode || !in_quote))
3771  {
3772  /* Check for \r\n on first line, _and_ handle \r\n. */
3773  if (cstate->eol_type == EOL_UNKNOWN ||
3774  cstate->eol_type == EOL_CRNL)
3775  {
3776  /*
3777  * If need more data, go back to loop top to load it.
3778  *
3779  * Note that if we are at EOF, c will wind up as '\0' because
3780  * of the guaranteed pad of raw_buf.
3781  */
3783 
3784  /* get next char */
3785  c = copy_raw_buf[raw_buf_ptr];
3786 
3787  if (c == '\n')
3788  {
3789  raw_buf_ptr++; /* eat newline */
3790  cstate->eol_type = EOL_CRNL; /* in case not set yet */
3791  }
3792  else
3793  {
3794  /* found \r, but no \n */
3795  if (cstate->eol_type == EOL_CRNL)
3796  ereport(ERROR,
3797  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3798  !cstate->csv_mode ?
3799  errmsg("literal carriage return found in data") :
3800  errmsg("unquoted carriage return found in data"),
3801  !cstate->csv_mode ?
3802  errhint("Use \"\\r\" to represent carriage return.") :
3803  errhint("Use quoted CSV field to represent carriage return.")));
3804 
3805  /*
3806  * if we got here, it is the first line and we didn't find
3807  * \n, so don't consume the peeked character
3808  */
3809  cstate->eol_type = EOL_CR;
3810  }
3811  }
3812  else if (cstate->eol_type == EOL_NL)
3813  ereport(ERROR,
3814  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3815  !cstate->csv_mode ?
3816  errmsg("literal carriage return found in data") :
3817  errmsg("unquoted carriage return found in data"),
3818  !cstate->csv_mode ?
3819  errhint("Use \"\\r\" to represent carriage return.") :
3820  errhint("Use quoted CSV field to represent carriage return.")));
3821  /* If reach here, we have found the line terminator */
3822  break;
3823  }
3824 
3825  /* Process \n */
3826  if (c == '\n' && (!cstate->csv_mode || !in_quote))
3827  {
3828  if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
3829  ereport(ERROR,
3830  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3831  !cstate->csv_mode ?
3832  errmsg("literal newline found in data") :
3833  errmsg("unquoted newline found in data"),
3834  !cstate->csv_mode ?
3835  errhint("Use \"\\n\" to represent newline.") :
3836  errhint("Use quoted CSV field to represent newline.")));
3837  cstate->eol_type = EOL_NL; /* in case not set yet */
3838  /* If reach here, we have found the line terminator */
3839  break;
3840  }
3841 
3842  /*
3843  * In CSV mode, we only recognize \. alone on a line. This is because
3844  * \. is a valid CSV data value.
3845  */
3846  if (c == '\\' && (!cstate->csv_mode || first_char_in_line))
3847  {
3848  char c2;
3849 
3852 
3853  /* -----
3854  * get next character
3855  * Note: we do not change c so if it isn't \., we can fall
3856  * through and continue processing for file encoding.
3857  * -----
3858  */
3859  c2 = copy_raw_buf[raw_buf_ptr];
3860 
3861  if (c2 == '.')
3862  {
3863  raw_buf_ptr++; /* consume the '.' */
3864 
3865  /*
3866  * Note: if we loop back for more data here, it does not
3867  * matter that the CSV state change checks are re-executed; we
3868  * will come back here with no important state changed.
3869  */
3870  if (cstate->eol_type == EOL_CRNL)
3871  {
3872  /* Get the next character */
3874  /* if hit_eof, c2 will become '\0' */
3875  c2 = copy_raw_buf[raw_buf_ptr++];
3876 
3877  if (c2 == '\n')
3878  {
3879  if (!cstate->csv_mode)
3880  ereport(ERROR,
3881  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3882  errmsg("end-of-copy marker does not match previous newline style")));
3883  else
3885  }
3886  else if (c2 != '\r')
3887  {
3888  if (!cstate->csv_mode)
3889  ereport(ERROR,
3890  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3891  errmsg("end-of-copy marker corrupt")));
3892  else
3894  }
3895  }
3896 
3897  /* Get the next character */
3899  /* if hit_eof, c2 will become '\0' */
3900  c2 = copy_raw_buf[raw_buf_ptr++];
3901 
3902  if (c2 != '\r' && c2 != '\n')
3903  {
3904  if (!cstate->csv_mode)
3905  ereport(ERROR,
3906  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3907  errmsg("end-of-copy marker corrupt")));
3908  else
3910  }
3911 
3912  if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
3913  (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
3914  (cstate->eol_type == EOL_CR && c2 != '\r'))
3915  {
3916  ereport(ERROR,
3917  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
3918  errmsg("end-of-copy marker does not match previous newline style")));
3919  }
3920 
3921  /*
3922  * Transfer only the data before the \. into line_buf, then
3923  * discard the data and the \. sequence.
3924  */
3925  if (prev_raw_ptr > cstate->raw_buf_index)
3927  cstate->raw_buf + cstate->raw_buf_index,
3928  prev_raw_ptr - cstate->raw_buf_index);
3929  cstate->raw_buf_index = raw_buf_ptr;
3930  result = true; /* report EOF */
3931  break;
3932  }
3933  else if (!cstate->csv_mode)
3934 
3935  /*
3936  * If we are here, it means we found a backslash followed by
3937  * something other than a period. In non-CSV mode, anything
3938  * after a backslash is special, so we skip over that second
3939  * character too. If we didn't do that \\. would be
3940  * considered an eof-of copy, while in non-CSV mode it is a
3941  * literal backslash followed by a period. In CSV mode,
3942  * backslashes are not special, so we want to process the
3943  * character after the backslash just like a normal character,
3944  * so we don't increment in those cases.
3945  */
3946  raw_buf_ptr++;
3947  }
3948 
3949  /*
3950  * This label is for CSV cases where \. appears at the start of a
3951  * line, but there is more text after it, meaning it was a data value.
3952  * We are more strict for \. in CSV mode because \. could be a data
3953  * value, while in non-CSV mode, \. cannot be a data value.
3954  */
3955 not_end_of_copy:
3956 
3957  /*
3958  * Process all bytes of a multi-byte character as a group.
3959  *
3960  * We only support multi-byte sequences where the first byte has the
3961  * high-bit set, so as an optimization we can avoid this block
3962  * entirely if it is not set.
3963  */
3964  if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
3965  {
3966  int mblen;
3967 
3968  mblen_str[0] = c;
3969  /* All our encodings only read the first byte to get the length */
3970  mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
3972  IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
3973  raw_buf_ptr += mblen - 1;
3974  }
3975  first_char_in_line = false;
3976  } /* end of outer loop */
3977 
3978  /*
3979  * Transfer any still-uncopied data to line_buf.
3980  */
3982 
3983  return result;
3984 }
3985 
3986 /*
3987  * Return decimal value for a hexadecimal digit
3988  */
3989 static int
3991 {
3992  if (isdigit((unsigned char) hex))
3993  return hex - '0';
3994  else
3995  return tolower((unsigned char) hex) - 'a' + 10;
3996 }
3997 
3998 /*
3999  * Parse the current line into separate attributes (fields),
4000  * performing de-escaping as needed.
4001  *
4002  * The input is in line_buf. We use attribute_buf to hold the result
4003  * strings. cstate->raw_fields[k] is set to point to the k'th attribute
4004  * string, or NULL when the input matches the null marker string.
4005  * This array is expanded as necessary.
4006  *
4007  * (Note that the caller cannot check for nulls since the returned
4008  * string would be the post-de-escaping equivalent, which may look
4009  * the same as some valid data string.)
4010  *
4011  * delim is the column delimiter string (must be just one byte for now).
4012  * null_print is the null marker string. Note that this is compared to
4013  * the pre-de-escaped input string.
4014  *
4015  * The return value is the number of fields actually read.
4016  */
4017 static int
4019 {
4020  char delimc = cstate->delim[0];
4021  int fieldno;
4022  char *output_ptr;
4023  char *cur_ptr;
4024  char *line_end_ptr;
4025 
4026  /*
4027  * We need a special case for zero-column tables: check that the input
4028  * line is empty, and return.
4029  */
4030  if (cstate->max_fields <= 0)
4031  {
4032  if (cstate->line_buf.len != 0)
4033  ereport(ERROR,
4034  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
4035  errmsg("extra data after last expected column")));
4036  return 0;
4037  }
4038 
4039  resetStringInfo(&cstate->attribute_buf);
4040 
4041  /*
4042  * The de-escaped attributes will certainly not be longer than the input
4043  * data line, so we can just force attribute_buf to be large enough and
4044  * then transfer data without any checks for enough space. We need to do
4045  * it this way because enlarging attribute_buf mid-stream would invalidate
4046  * pointers already stored into cstate->raw_fields[].
4047  */
4048  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
4049  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
4050  output_ptr = cstate->attribute_buf.data;
4051 
4052  /* set pointer variables for loop */
4053  cur_ptr = cstate->line_buf.data;
4054  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
4055 
4056  /* Outer loop iterates over fields */
4057  fieldno = 0;
4058  for (;;)
4059  {
4060  bool found_delim = false;
4061  char *start_ptr;
4062  char *end_ptr;
4063  int input_len;
4064  bool saw_non_ascii = false;
4065 
4066  /* Make sure there is enough space for the next value */
4067  if (fieldno >= cstate->max_fields)
4068  {
4069  cstate->max_fields *= 2;
4070  cstate->raw_fields =
4071  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
4072  }
4073 
4074  /* Remember start of field on both input and output sides */
4075  start_ptr = cur_ptr;
4076  cstate->raw_fields[fieldno] = output_ptr;
4077 
4078  /*
4079  * Scan data for field.
4080  *
4081  * Note that in this loop, we are scanning to locate the end of field
4082  * and also speculatively performing de-escaping. Once we find the
4083  * end-of-field, we can match the raw field contents against the null
4084  * marker string. Only after that comparison fails do we know that
4085  * de-escaping is actually the right thing to do; therefore we *must
4086  * not* throw any syntax errors before we've done the null-marker
4087  * check.
4088  */
4089  for (;;)
4090  {
4091  char c;
4092 
4093  end_ptr = cur_ptr;
4094  if (cur_ptr >= line_end_ptr)
4095  break;
4096  c = *cur_ptr++;
4097  if (c == delimc)
4098  {
4099  found_delim = true;
4100  break;
4101  }
4102  if (c == '\\')
4103  {
4104  if (cur_ptr >= line_end_ptr)
4105  break;
4106  c = *cur_ptr++;
4107  switch (c)
4108  {
4109  case '0':
4110  case '1':
4111  case '2':
4112  case '3':
4113  case '4':
4114  case '5':
4115  case '6':
4116  case '7':
4117  {
4118  /* handle \013 */
4119  int val;
4120 
4121  val = OCTVALUE(c);
4122  if (cur_ptr < line_end_ptr)
4123  {
4124  c = *cur_ptr;
4125  if (ISOCTAL(c))
4126  {
4127  cur_ptr++;
4128  val = (val << 3) + OCTVALUE(c);
4129  if (cur_ptr < line_end_ptr)
4130  {
4131  c = *cur_ptr;
4132  if (ISOCTAL(c))
4133  {
4134  cur_ptr++;
4135  val = (val << 3) + OCTVALUE(c);
4136  }
4137  }
4138  }
4139  }
4140  c = val & 0377;
4141  if (c == '\0' || IS_HIGHBIT_SET(c))
4142  saw_non_ascii = true;
4143  }
4144  break;
4145  case 'x':
4146  /* Handle \x3F */
4147  if (cur_ptr < line_end_ptr)
4148  {
4149  char hexchar = *cur_ptr;
4150 
4151  if (isxdigit((unsigned char) hexchar))
4152  {
4153  int val = GetDecimalFromHex(hexchar);
4154 
4155  cur_ptr++;
4156  if (cur_ptr < line_end_ptr)
4157  {
4158  hexchar = *cur_ptr;
4159  if (isxdigit((unsigned char) hexchar))
4160  {
4161  cur_ptr++;
4162  val = (val << 4) + GetDecimalFromHex(hexchar);
4163  }
4164  }
4165  c = val & 0xff;
4166  if (c == '\0' || IS_HIGHBIT_SET(c))
4167  saw_non_ascii = true;
4168  }
4169  }
4170  break;
4171  case 'b':
4172  c = '\b';
4173  break;
4174  case 'f':
4175  c = '\f';
4176  break;
4177  case 'n':
4178  c = '\n';
4179  break;
4180  case 'r':
4181  c = '\r';
4182  break;
4183  case 't':
4184  c = '\t';
4185  break;
4186  case 'v':
4187  c = '\v';
4188  break;
4189 
4190  /*
4191  * in all other cases, take the char after '\'
4192  * literally
4193  */
4194  }
4195  }
4196 
4197  /* Add c to output string */
4198  *output_ptr++ = c;
4199  }
4200 
4201  /* Check whether raw input matched null marker */
4202  input_len = end_ptr - start_ptr;
4203  if (input_len == cstate->null_print_len &&
4204  strncmp(start_ptr, cstate->null_print, input_len) == 0)
4205  cstate->raw_fields[fieldno] = NULL;
4206  else
4207  {
4208  /*
4209  * At this point we know the field is supposed to contain data.
4210  *
4211  * If we de-escaped any non-7-bit-ASCII chars, make sure the
4212  * resulting string is valid data for the db encoding.
4213  */
4214  if (saw_non_ascii)
4215  {
4216  char *fld = cstate->raw_fields[fieldno];
4217 
4218  pg_verifymbstr(fld, output_ptr - fld, false);
4219  }
4220  }
4221 
4222  /* Terminate attribute value in output area */
4223  *output_ptr++ = '\0';
4224 
4225  fieldno++;
4226  /* Done if we hit EOL instead of a delim */
4227  if (!found_delim)
4228  break;
4229  }
4230 
4231  /* Clean up state of attribute_buf */
4232  output_ptr--;
4233  Assert(*output_ptr == '\0');
4234  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
4235 
4236  return fieldno;
4237 }
4238 
4239 /*
4240  * Parse the current line into separate attributes (fields),
4241  * performing de-escaping as needed. This has exactly the same API as
4242  * CopyReadAttributesText, except we parse the fields according to
4243  * "standard" (i.e. common) CSV usage.
4244  */
4245 static int
4247 {
4248  char delimc = cstate->delim[0];
4249  char quotec = cstate->quote[0];
4250  char escapec = cstate->escape[0];
4251  int fieldno;
4252  char *output_ptr;
4253  char *cur_ptr;
4254  char *line_end_ptr;
4255 
4256  /*
4257  * We need a special case for zero-column tables: check that the input
4258  * line is empty, and return.
4259  */
4260  if (cstate->max_fields <= 0)
4261  {
4262  if (cstate->line_buf.len != 0)
4263  ereport(ERROR,
4264  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
4265  errmsg("extra data after last expected column")));
4266  return 0;
4267  }
4268 
4269  resetStringInfo(&cstate->attribute_buf);
4270 
4271  /*
4272  * The de-escaped attributes will certainly not be longer than the input
4273  * data line, so we can just force attribute_buf to be large enough and
4274  * then transfer data without any checks for enough space. We need to do
4275  * it this way because enlarging attribute_buf mid-stream would invalidate
4276  * pointers already stored into cstate->raw_fields[].
4277  */
4278  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
4279  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
4280  output_ptr = cstate->attribute_buf.data;
4281 
4282  /* set pointer variables for loop */
4283  cur_ptr = cstate->line_buf.data;
4284  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
4285 
4286  /* Outer loop iterates over fields */
4287  fieldno = 0;
4288  for (;;)
4289  {
4290  bool found_delim = false;
4291  bool saw_quote = false;
4292  char *start_ptr;
4293  char *end_ptr;
4294  int input_len;
4295 
4296  /* Make sure there is enough space for the next value */
4297  if (fieldno >= cstate->max_fields)
4298  {
4299  cstate->max_fields *= 2;
4300  cstate->raw_fields =
4301  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
4302  }
4303 
4304  /* Remember start of field on both input and output sides */
4305  start_ptr = cur_ptr;
4306  cstate->raw_fields[fieldno] = output_ptr;
4307 
4308  /*
4309  * Scan data for field,
4310  *
4311  * The loop starts in "not quote" mode and then toggles between that
4312  * and "in quote" mode. The loop exits normally if it is in "not
4313  * quote" mode and a delimiter or line end is seen.
4314  */
4315  for (;;)
4316  {
4317  char c;
4318 
4319  /* Not in quote */
4320  for (;;)
4321  {
4322  end_ptr = cur_ptr;
4323  if (cur_ptr >= line_end_ptr)
4324  goto endfield;
4325  c = *cur_ptr++;
4326  /* unquoted field delimiter */
4327  if (c == delimc)
4328  {
4329  found_delim = true;
4330  goto endfield;
4331  }
4332  /* start of quoted field (or part of field) */
4333  if (c == quotec)
4334  {
4335  saw_quote = true;
4336  break;
4337  }
4338  /* Add c to output string */
4339  *output_ptr++ = c;
4340  }
4341 
4342  /* In quote */
4343  for (;;)
4344  {
4345  end_ptr = cur_ptr;
4346  if (cur_ptr >= line_end_ptr)
4347  ereport(ERROR,
4348  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
4349  errmsg("unterminated CSV quoted field")));
4350 
4351  c = *cur_ptr++;
4352 
4353  /* escape within a quoted field */
4354  if (c == escapec)
4355  {
4356  /*
4357  * peek at the next char if available, and escape it if it
4358  * is an escape char or a quote char
4359  */
4360  if (cur_ptr < line_end_ptr)
4361  {
4362  char nextc = *cur_ptr;
4363 
4364  if (nextc == escapec || nextc == quotec)
4365  {
4366  *output_ptr++ = nextc;
4367  cur_ptr++;
4368  continue;
4369  }
4370  }
4371  }
4372 
4373  /*
4374  * end of quoted field. Must do this test after testing for
4375  * escape in case quote char and escape char are the same
4376  * (which is the common case).
4377  */
4378  if (c == quotec)
4379  break;
4380 
4381  /* Add c to output string */
4382  *output_ptr++ = c;
4383  }
4384  }
4385 endfield:
4386 
4387  /* Terminate attribute value in output area */
4388  *output_ptr++ = '\0';
4389 
4390  /* Check whether raw input matched null marker */
4391  input_len = end_ptr - start_ptr;
4392  if (!saw_quote && input_len == cstate->null_print_len &&
4393  strncmp(start_ptr, cstate->null_print, input_len) == 0)
4394  cstate->raw_fields[fieldno] = NULL;
4395 
4396  fieldno++;
4397  /* Done if we hit EOL instead of a delim */
4398  if (!found_delim)
4399  break;
4400  }
4401 
4402  /* Clean up state of attribute_buf */
4403  output_ptr--;
4404  Assert(*output_ptr == '\0');
4405  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
4406 
4407  return fieldno;
4408 }
4409 
4410 
4411 /*
4412  * Read a binary attribute
4413  */
4414 static Datum
4416  int column_no, FmgrInfo *flinfo,
4417  Oid typioparam, int32 typmod,
4418  bool *isnull)
4419 {
4420  int32 fld_size;
4421  Datum result;
4422 
4423  if (!CopyGetInt32(cstate, &fld_size))
4424  ereport(ERROR,
4425  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
4426  errmsg("unexpected EOF in COPY data")));
4427  if (fld_size == -1)
4428  {
4429  *isnull = true;
4430  return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
4431  }
4432  if (fld_size < 0)
4433  ereport(ERROR,
4434  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
4435  errmsg("invalid field size")));
4436 
4437  /* reset attribute_buf to empty, and load raw data in it */
4438  resetStringInfo(&cstate->attribute_buf);
4439 
4440  enlargeStringInfo(&cstate->attribute_buf, fld_size);
4441  if (CopyGetData(cstate, cstate->attribute_buf.data,
4442  fld_size, fld_size) != fld_size)
4443  ereport(ERROR,
4444  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
4445  errmsg("unexpected EOF in COPY data")));
4446 
4447  cstate->attribute_buf.len = fld_size;
4448  cstate->attribute_buf.data[fld_size] = '\0';
4449 
4450  /* Call the column type's binary input converter */
4451  result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
4452  typioparam, typmod);
4453 
4454  /* Trouble if it didn't eat the whole buffer */
4455  if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
4456  ereport(ERROR,
4457  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
4458  errmsg("incorrect binary data format")));
4459 
4460  *isnull = false;
4461  return result;
4462 }
4463 
4464 /*
4465  * Send text representation of one attribute, with conversion and escaping
4466  */
4467 #define DUMPSOFAR() \
4468  do { \
4469  if (ptr > start) \
4470  CopySendData(cstate, start, ptr - start); \
4471  } while (0)
4472 
4473 static void
4474 CopyAttributeOutText(CopyState cstate, char *string)
4475 {
4476  char *ptr;
4477  char *start;
4478  char c;
4479  char delimc = cstate->delim[0];
4480 
4481  if (cstate->need_transcoding)
4482  ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
4483  else
4484  ptr = string;
4485 
4486  /*
4487  * We have to grovel through the string searching for control characters
4488  * and instances of the delimiter character. In most cases, though, these
4489  * are infrequent. To avoid overhead from calling CopySendData once per
4490  * character, we dump out all characters between escaped characters in a
4491  * single call. The loop invariant is that the data from "start" to "ptr"
4492  * can be sent literally, but hasn't yet been.
4493  *
4494  * We can skip pg_encoding_mblen() overhead when encoding is safe, because
4495  * in valid backend encodings, extra bytes of a multibyte character never
4496  * look like ASCII. This loop is sufficiently performance-critical that
4497  * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
4498  * of the normal safe-encoding path.
4499  */
4500  if (cstate->encoding_embeds_ascii)
4501  {
4502  start = ptr;
4503  while ((c = *ptr) != '\0')
4504  {
4505  if ((unsigned char) c < (unsigned char) 0x20)
4506  {
4507  /*
4508  * \r and \n must be escaped, the others are traditional. We
4509  * prefer to dump these using the C-like notation, rather than
4510  * a backslash and the literal character, because it makes the
4511  * dump file a bit more proof against Microsoftish data
4512  * mangling.
4513  */
4514  switch (c)
4515  {
4516  case '\b':
4517  c = 'b';
4518  break;
4519  case '\f':
4520  c = 'f';
4521  break;
4522  case '\n':
4523  c = 'n';
4524  break;
4525  case '\r':
4526  c = 'r';
4527  break;
4528  case '\t':
4529  c = 't';
4530  break;
4531  case '\v':
4532  c = 'v';
4533  break;
4534  default:
4535  /* If it's the delimiter, must backslash it */
4536  if (c == delimc)
4537  break;
4538  /* All ASCII control chars are length 1 */
4539  ptr++;
4540  continue; /* fall to end of loop */
4541  }
4542  /* if we get here, we need to convert the control char */
4543  DUMPSOFAR();
4544  CopySendChar(cstate, '\\');
4545  CopySendChar(cstate, c);
4546  start = ++ptr; /* do not include char in next run */
4547  }
4548  else if (c == '\\' || c == delimc)
4549  {
4550  DUMPSOFAR();
4551  CopySendChar(cstate, '\\');
4552  start = ptr++; /* we include char in next run */
4553  }
4554  else if (IS_HIGHBIT_SET(c))
4555  ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
4556  else
4557  ptr++;
4558  }
4559  }
4560  else
4561  {
4562  start = ptr;
4563  while ((c = *ptr) != '\0')
4564  {
4565  if ((unsigned char) c < (unsigned char) 0x20)
4566  {
4567  /*
4568  * \r and \n must be escaped, the others are traditional. We
4569  * prefer to dump these using the C-like notation, rather than
4570  * a backslash and the literal character, because it makes the
4571  * dump file a bit more proof against Microsoftish data
4572  * mangling.
4573  */
4574  switch (c)
4575  {
4576  case '\b':
4577  c = 'b';
4578  break;
4579  case '\f':
4580  c = 'f';
4581  break;
4582  case '\n':
4583  c = 'n';
4584  break;
4585  case '\r':
4586  c = 'r';
4587  break;
4588  case '\t':
4589  c = 't';
4590  break;
4591  case '\v':
4592  c = 'v';
4593  break;
4594  default:
4595  /* If it's the delimiter, must backslash it */
4596  if (c == delimc)
4597  break;
4598  /* All ASCII control chars are length 1 */
4599  ptr++;
4600  continue; /* fall to end of loop */
4601  }
4602  /* if we get here, we need to convert the control char */
4603  DUMPSOFAR();
4604  CopySendChar(cstate, '\\');
4605  CopySendChar(cstate, c);
4606  start = ++ptr; /* do not include char in next run */
4607  }
4608  else if (c == '\\' || c == delimc)
4609  {
4610  DUMPSOFAR();
4611  CopySendChar(cstate, '\\');
4612  start = ptr++; /* we include char in next run */
4613  }
4614  else
4615  ptr++;
4616  }
4617  }
4618 
4619  DUMPSOFAR();
4620 }
4621 
4622 /*
4623  * Send text representation of one attribute, with conversion and
4624  * CSV-style escaping
4625  */
4626 static void
4627 CopyAttributeOutCSV(CopyState cstate, char *string,
4628  bool use_quote, bool single_attr)
4629 {
4630  char *ptr;
4631  char *start;
4632  char c;
4633  char delimc = cstate->delim[0];
4634  char quotec = cstate->quote[0];
4635  char escapec = cstate->escape[0];
4636 
4637  /* force quoting if it matches null_print (before conversion!) */
4638  if (!use_quote && strcmp(string, cstate->null_print) == 0)
4639  use_quote = true;
4640 
4641  if (cstate->need_transcoding)
4642  ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
4643  else
4644  ptr = string;
4645 
4646  /*
4647  * Make a preliminary pass to discover if it needs quoting
4648  */
4649  if (!use_quote)
4650  {
4651  /*
4652  * Because '\.' can be a data value, quote it if it appears alone on a
4653  * line so it is not interpreted as the end-of-data marker.
4654  */
4655  if (single_attr && strcmp(ptr, "\\.") == 0)
4656  use_quote = true;
4657  else
4658  {
4659  char *tptr = ptr;
4660 
4661  while ((c = *tptr) != '\0')
4662  {
4663  if (c == delimc || c == quotec || c == '\n' || c == '\r')
4664  {
4665  use_quote = true;
4666  break;
4667  }
4668  if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4669  tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
4670  else
4671  tptr++;
4672  }
4673  }
4674  }
4675 
4676  if (use_quote)
4677  {
4678  CopySendChar(cstate, quotec);
4679 
4680  /*
4681  * We adopt the same optimization strategy as in CopyAttributeOutText
4682  */
4683  start = ptr;
4684  while ((c = *ptr) != '\0')
4685  {
4686  if (c == quotec || c == escapec)
4687  {
4688  DUMPSOFAR();
4689  CopySendChar(cstate, escapec);
4690  start = ptr; /* we include char in next run */
4691  }
4692  if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
4693  ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
4694  else
4695  ptr++;
4696  }
4697  DUMPSOFAR();
4698 
4699  CopySendChar(cstate, quotec);
4700  }
4701  else
4702  {
4703  /* If it doesn't need quoting, we can just dump it as-is */
4704  CopySendString(cstate, ptr);
4705  }
4706 }
4707 
4708 /*
4709  * CopyGetAttnums - build an integer list of attnums to be copied
4710  *
4711  * The input attnamelist is either the user-specified column list,
4712  * or NIL if there was none (in which case we want all the non-dropped
4713  * columns).
4714  *
4715  * rel can be NULL ... it's only used for error reports.
4716  */
4717 static List *
4718 CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
4719 {
4720  List *attnums = NIL;
4721 
4722  if (attnamelist == NIL)
4723  {
4724  /* Generate default column list */
4725  int attr_count = tupDesc->natts;
4726  int i;
4727 
4728  for (i = 0; i < attr_count; i++)
4729  {
4730  if (TupleDescAttr(tupDesc, i)->attisdropped)
4731  continue;
4732  attnums = lappend_int(attnums, i + 1);
4733  }
4734  }
4735  else
4736  {
4737  /* Validate the user-supplied list and extract attnums */
4738  ListCell *l;
4739 
4740  foreach(l, attnamelist)
4741  {
4742  char *name = strVal(lfirst(l));
4743  int attnum;
4744  int i;
4745 
4746  /* Lookup column name */
4747  attnum = InvalidAttrNumber;
4748  for (i = 0; i < tupDesc->natts; i++)
4749  {
4750  Form_pg_attribute att = TupleDescAttr(tupDesc, i);
4751 
4752  if (att->attisdropped)
4753  continue;
4754  if (namestrcmp(&(att->attname), name) == 0)
4755  {
4756  attnum = att->attnum;
4757  break;
4758  }
4759  }
4760  if (attnum == InvalidAttrNumber)
4761  {
4762  if (rel != NULL)
4763  ereport(ERROR,
4764  (errcode(ERRCODE_UNDEFINED_COLUMN),
4765  errmsg("column \"%s\" of relation \"%s\" does not exist",
4766  name, RelationGetRelationName(rel))));
4767  else
4768  ereport(ERROR,
4769  (errcode(ERRCODE_UNDEFINED_COLUMN),
4770  errmsg("column \"%s\" does not exist",
4771  name)));
4772  }
4773  /* Check for duplicates */
4774  if (list_member_int(attnums, attnum))
4775  ereport(ERROR,
4776  (errcode(ERRCODE_DUPLICATE_COLUMN),
4777  errmsg("column \"%s\" specified more than once",
4778  name)));
4779  attnums = lappend_int(attnums, attnum);
4780  }
4781  }
4782 
4783  return attnums;
4784 }
4785 
4786 
4787 /*
4788  * copy_dest_startup --- executor startup
4789  */
4790 static void
4791 copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
4792 {
4793  /* no-op */
4794 }
4795 
4796 /*
4797  * copy_dest_receive --- receive one tuple
4798  */
4799 static bool
4801 {
4802  DR_copy *myState = (DR_copy *) self;
4803  CopyState cstate = myState->cstate;
4804 
4805  /* Make sure the tuple is fully deconstructed */
4806  slot_getallattrs(slot);
4807 
4808  /* And send the data */
4809  CopyOneRowTo(cstate, InvalidOid, slot->tts_values, slot->tts_isnull);
4810  myState->processed++;
4811 
4812  return true;
4813 }
4814 
4815 /*
4816  * copy_dest_shutdown --- executor end
4817  */
4818 static void
4820 {
4821  /* no-op */
4822 }
4823 
4824 /*
4825  * copy_dest_destroy --- release DestReceiver object
4826  */
4827 static void
4829 {
4830  pfree(self);
4831 }
4832 
4833 /*
4834  * CreateCopyDestReceiver -- create a suitable DestReceiver object
4835  */
4836 DestReceiver *
4838 {
4839  DR_copy *self = (DR_copy *) palloc(sizeof(DR_copy));
4840 
4841  self->pub.receiveSlot = copy_dest_receive;
4842  self->pub.rStartup = copy_dest_startup;
4843  self->pub.rShutdown = copy_dest_shutdown;
4844  self->pub.rDestroy = copy_dest_destroy;
4845  self->pub.mydest = DestCopyOut;
4846 
4847  self->cstate = NULL; /* will be set later */
4848  self->processed = 0;
4849 
4850  return (DestReceiver *) self;
4851 }
signed short int16
Definition: c.h:245
List * indirection
Definition: parsenodes.h:440
bool NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields)
Definition: copy.c:3233
int ri_NumIndices
Definition: execnodes.h:357
#define NIL
Definition: pg_list.h:69
uint32 CommandId
Definition: c.h:405
static Datum CopyReadBinaryAttribute(CopyState cstate, int column_no, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
Definition: copy.c:4415
TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree)
Definition: execTuples.c:320
static int GetDecimalFromHex(char hex)
Definition: copy.c:3990
Definition: fmgr.h:56
#define MAX_COPY_DATA_DISPLAY
bool csv_mode
Definition: copy.c:117
static void SendCopyEnd(CopyState cstate)
Definition: copy.c:417
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, Relation partition_root, int instrument_options)
Definition: execMain.c:1308
Relation ri_RelationDesc
Definition: execnodes.h:354
List * range_table
Definition: copy.c:165
static CopyState BeginCopy(ParseState *pstate, bool is_from, Relation rel, RawStmt *raw_query, Oid queryRelId, List *attnamelist, List *options)
Definition: copy.c:1373
void UpdateActiveSnapshotCommandId(void)
Definition: snapmgr.c:781
#define IsA(nodeptr, _type_)
Definition: nodes.h:561
static bool CopyReadLineText(CopyState cstate)
Definition: copy.c:3631
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:123
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate)
Definition: execTuples.c:852
bool contain_volatile_functions_not_nextval(Node *clause)
Definition: clauses.c:1007
Node * val
Definition: parsenodes.h:441
static bool CopyGetInt32(CopyState cstate, int32 *val)
Definition: copy.c:683
int errhint(const char *fmt,...)
Definition: elog.c:987
int pg_char_to_encoding(const char *name)
Definition: encnames.c:551
char ** raw_fields
Definition: copy.c:189
Definition: copy.c:73
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2632
#define VARDATA(PTR)
Definition: postgres.h:303
static void EndCopy(CopyState cstate)
Definition: copy.c:1715
bool binary
Definition: copy.c:114
#define pq_flush()
Definition: libpq.h:39
void CopyFromErrorCallback(void *arg)
Definition: copy.c:2176
void heap_endscan(HeapScanDesc scan)
Definition: heapam.c:1565
void PreventCommandIfParallelMode(const char *cmdname)
Definition: utility.c:254
List * ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, EState *estate, bool noDupErr, bool *specConflict, List *arbiterIndexes)
Definition: execIndexing.c:271
List * attlist
Definition: parsenodes.h:1945
List * fromClause
Definition: parsenodes.h:1515
#define ISOCTAL(c)
Definition: copy.c:52
#define OCTVALUE(c)
Definition: copy.c:53
#define ResetPerTupleExprContext(estate)
Definition: executor.h:486
#define RelationGetDescr(relation)
Definition: rel.h:428
#define HEAP_INSERT_FROZEN
Definition: heapam.h:30
char * name
Definition: parsenodes.h:439
bool need_transcoding
Definition: copy.c:104
#define castNode(_type_, nodeptr)
Definition: nodes.h:579
void FreeQueryDesc(QueryDesc *qdesc)
Definition: pquery.c:105
FmgrInfo * in_functions
Definition: copy.c:160
AttrNumber num_defaults
Definition: copy.c:156
List * attnumlist
Definition: copy.c:110
#define OIDOID
Definition: pg_type.h:328
#define VARSIZE(PTR)
Definition: postgres.h:304
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:581
char * filename
Definition: copy.c:111
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:84
CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename, bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options)
Definition: copy.c:2974
#define VARHDRSZ
Definition: c.h:439
bool file_has_oids
Definition: copy.c:157
#define DatumGetObjectId(X)
Definition: postgres.h:506
List * relationOids
Definition: plannodes.h:88
char * pstrdup(const char *in)
Definition: mcxt.c:1076
#define pg_hton16(x)
Definition: pg_bswap.h:120
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:203
static void ReceiveCopyBegin(CopyState cstate)
Definition: copy.c:381
#define XLogIsNeeded()
Definition: xlog.h:146
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
Definition: copy.c:216
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
#define MAX_BUFFERED_TUPLES
bool rd_islocaltemp
Definition: rel.h:90
TupleTableSlot * ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TupleTableSlot *slot)
Definition: trigger.c:2369
Expr * expression_planner(Expr *expr)
Definition: planner.c:5997
void ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:147
DestReceiver pub
Definition: copy.c:218
void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options)
Definition: copy.c:1015
#define RELKIND_MATVIEW
Definition: pg_class.h:165
void ExecConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate)
Definition: execMain.c:1940
StringInfoData line_buf
Definition: copy.c:198
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
Definition: copy.c:252
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:839
#define AccessShareLock
Definition: lockdefs.h:36
#define InvalidBuffer
Definition: buf.h:25
#define gettext_noop(x)
Definition: c.h:139
struct CopyStateData CopyStateData
Definition: nodes.h:510
#define strVal(v)
Definition: value.h:54
struct cursor * cur
Definition: ecpg.c:28
int raw_buf_index
Definition: copy.c:211
static void CopyAttributeOutText(CopyState cstate, char *string)
Definition: copy.c:4474
bool line_buf_valid
Definition: copy.c:200
bool ThereAreNoPriorRegisteredSnapshots(void)
Definition: snapmgr.c:1649
CopyState cstate
Definition: copy.c:219
int errcode(int sqlerrcode)
Definition: elog.c:575
#define PG_BINARY_W
Definition: c.h:1047
bool superuser(void)
Definition: superuser.c:47
int namestrcmp(Name name, const char *str)
Definition: name.c:247
#define MemSet(start, val, len)
Definition: c.h:863
bool fe_eof
Definition: copy.c:101
uint64 CopyFrom(CopyState cstate)
Definition: copy.c:2281
SubTransactionId rd_newRelfilenodeSubid
Definition: rel.h:111
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
void ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo, HeapTuple trigtuple, List *recheckIndexes, TransitionCaptureState *transition_capture)
Definition: trigger.c:2354
#define select(n, r, w, e, timeout)
Definition: win32.h:374
void heap_sync(Relation rel)
Definition: heapam.c:9202
Datum * tts_values
Definition: tuptable.h:125
#define FirstLowInvalidHeapAttributeNumber
Definition: sysattr.h:28
static void ClosePipeToProgram(CopyState cstate)
Definition: copy.c:1692
#define HEAP_INSERT_SKIP_WAL
Definition: heapam.h:28
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:135
static int CopyReadAttributesCSV(CopyState cstate)
Definition: copy.c:4246
void PopActiveSnapshot(void)
Definition: snapmgr.c:812
AclMode requiredPerms
Definition: parsenodes.h:1053
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:695
#define heap_close(r, l)
Definition: heapam.h:97
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:585
static void pq_sendbyte(StringInfo buf, int8 byt)
Definition: pqformat.h:164
bool * force_quote_flags
Definition: copy.c:127
List * es_range_table
Definition: execnodes.h:431
Form_pg_class rd_rel
Definition: rel.h:114
unsigned int Oid
Definition: postgres_ext.h:31
List * pg_analyze_and_rewrite(RawStmt *parsetree, const char *query_string, Oid *paramTypes, int numParams, QueryEnvironment *queryEnv)
Definition: postgres.c:649
char * delim
Definition: copy.c:122
#define PG_PROTOCOL_MAJOR(v)
Definition: pqcomm.h:104
Node * utilityStmt
Definition: parsenodes.h:118
bool is_program
Definition: parsenodes.h:1948
#define linitial_node(type, l)
Definition: pg_list.h:114
bool volatile_defexprs
Definition: copy.c:164
Datum oidout(PG_FUNCTION_ARGS)
Definition: oid.c:127
struct ErrorContextCallback * previous
Definition: elog.h:238
#define PG_BINARY_R
Definition: c.h:1046
static void copy_dest_destroy(DestReceiver *self)
Definition: copy.c:4828
bool * force_null_flags
Definition: copy.c:131
char * wait_result_to_str(int exitstatus)
Definition: wait_error.c:32
MemoryContext rowcontext
Definition: copy.c:151
int natts
Definition: tupdesc.h:73
bool line_buf_converted
Definition: copy.c:199
HeapTuple tcs_original_insert_tuple
Definition: trigger.h:82
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:634
signed int int32
Definition: c.h:246
int ClosePipeStream(FILE *file)
Definition: fd.c:2502
int errdetail_internal(const char *fmt,...)
Definition: elog.c:900
bool * convert_select_flags
Definition: copy.c:134
static void CopySendChar(CopyState cstate, char c)
Definition: copy.c:458
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1662
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
Definition: execIndexing.c:149
int location
Definition: parsenodes.h:235
CopyDest copy_dest
Definition: copy.c:97
int location
Definition: parsenodes.h:442
#define REFILL_LINEBUF
Definition: copy.c:269
#define HeapTupleSetOid(tuple, oid)
Definition: htup_details.h:698
ErrorContextCallback * error_context_stack
Definition: elog.c:88
#define list_make1(x1)
Definition: pg_list.h:139
char * null_print
Definition: copy.c:119
const char * cur_attname
Definition: copy.c:139
void ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:461
Definition: copy.c:61
bool trig_insert_instead_row
Definition: reltrigger.h:57
void FreeExecutorState(EState *estate)
Definition: execUtils.c:183
Relation rel
Definition: copy.c:108
#define GetPerTupleExprContext(estate)
Definition: executor.h:477
BulkInsertState GetBulkInsertState(void)
Definition: heapam.c:2379
MemoryContext copycontext
Definition: copy.c:145
#define pq_startcopyout()
Definition: libpq.h:46
copy_data_source_cb data_source_cb
Definition: copy.c:113
bool defGetBoolean(DefElem *def)
Definition: define.c:111
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
bool trig_insert_new_table
Definition: reltrigger.h:74
Bitmapset * selectedCols
Definition: parsenodes.h:1055
unsigned short uint16
Definition: c.h:257
void pfree(void *pointer)
Definition: mcxt.c:949
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define IS_HIGHBIT_SET(ch)
Definition: c.h:979
static CopyState BeginCopyTo(ParseState *pstate, Relation rel, RawStmt *query, Oid queryRelId, const char *filename, bool is_program, List *attnamelist, List *options)
Definition: copy.c:1738
static void CopySendInt16(CopyState cstate, int16 val)
Definition: copy.c:700
bool ThereAreNoReadyPortals(void)
Definition: portalmem.c:1141
TupleTableSlot * partition_tuple_slot
Definition: copy.c:172
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
void pq_startmsgread(void)
Definition: pqcomm.c:1210
#define DatumGetCString(X)
Definition: postgres.h:572
static int CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
Definition: copy.c:557
#define S_IWGRP
Definition: win32.h:449
#define lfirst_int(lc)
Definition: pg_list.h:107
void ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
Definition: execMain.c:299
#define pg_hton32(x)
Definition: pg_bswap.h:121
static void CopyAttributeOutCSV(CopyState cstate, char *string, bool use_quote, bool single_attr)
Definition: copy.c:4627
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1676
static void CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, int hi_options, ResultRelInfo *resultRelInfo, TupleTableSlot *myslot, BulkInsertState bistate, int nBufferedTuples, HeapTuple *bufferedTuples, int firstBufferedLineNo)
Definition: copy.c:2890
TupleConversionMap * tcs_map
Definition: trigger.h:73
#define FATAL
Definition: elog.h:52
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:122
char * defGetString(DefElem *def)
Definition: define.c:49
RangeVar * relation
Definition: parsenodes.h:1942
static struct @121 value
ItemPointerData t_self
Definition: htup.h:65
void PushCopiedSnapshot(Snapshot snapshot)
Definition: snapmgr.c:769
QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, DestReceiver *dest, ParamListInfo params, QueryEnvironment *queryEnv, int instrument_options)
Definition: pquery.c:67
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:820
TriggerDesc * trigdesc
Definition: rel.h:120
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:165
static char * limit_printout_length(const char *str)
Definition: copy.c:2252
#define lfirst_node(type, lc)
Definition: pg_list.h:109
QueryDesc * queryDesc
Definition: copy.c:109
EolType
Definition: copy.c:70
bool list_member_int(const List *list, int datum)
Definition: list.c:485
static void copy_dest_shutdown(DestReceiver *self)
Definition: copy.c:4819
int num_dispatch
Definition: copy.c:168
bool encoding_embeds_ascii
Definition: copy.c:105
uint32 t_len
Definition: htup.h:64
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3033
char * c
void ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo)
Definition: trigger.c:2220
char * raw_buf
Definition: copy.c:210
static bool CopyLoadRawBuf(CopyState cstate)
Definition: copy.c:737
Node * stmt
Definition: parsenodes.h:1425
#define NoLock
Definition: lockdefs.h:34
int pq_getbytes(char *s, size_t len)
Definition: pqcomm.c:1094
char * quote
Definition: copy.c:123
static char * buf
Definition: pg_test_fsync.c:67
#define memmove(d, s, c)
Definition: c.h:1064
bool * tts_isnull
Definition: tuptable.h:126
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:216
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:286
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
List * targetList
Definition: parsenodes.h:1514
ResultRelInfo * es_result_relations
Definition: execnodes.h:441
static uint64 DoCopyTo(CopyState cstate)
Definition: copy.c:1876
DestReceiver * CreateDestReceiver(CommandDest dest)
Definition: dest.c:109
static void copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
Definition: copy.c:4791
int location
Definition: parsenodes.h:722
#define RowExclusiveLock
Definition: lockdefs.h:38
ExprState ** defexprs
Definition: copy.c:163
const char * cur_relname
Definition: copy.c:137
int errcode_for_file_access(void)
Definition: elog.c:598
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1785
#define is_absolute_path(filename)
Definition: port.h:77
#define CStringGetDatum(X)
Definition: postgres.h:584
char string[11]
Definition: preproc-type.c:46
Definition: copy.c:74
List * options
Definition: parsenodes.h:1950
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2117
FmgrInfo oid_in_function
Definition: copy.c:158
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:264
#define RelationGetRelationName(relation)
Definition: rel.h:436
static const char BinarySignature[11]
Definition: copy.c:289
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:187
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:62
struct FdwRoutine * ri_FdwRoutine
Definition: execnodes.h:378
unsigned int uint32
Definition: c.h:258
bytea * SendFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1723
int raw_buf_len
Definition: copy.c:212
Oid t_tableOid
Definition: htup.h:66
#define RELKIND_FOREIGN_TABLE
Definition: pg_class.h:167
bool trig_insert_after_row
Definition: reltrigger.h:56
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
int max_fields
Definition: copy.c:188
char * escape
Definition: copy.c:124
const char * p_sourcetext
Definition: parse_node.h:171
List * returningList
Definition: parsenodes.h:144
TupleTableSlot * es_trig_tuple_slot
Definition: execnodes.h:460
FILE * OpenPipeStream(const char *command, const char *mode)
Definition: fd.c:2216
void getTypeBinaryInputInfo(Oid type, Oid *typReceive, Oid *typIOParam)
Definition: lsyscache.c:2665
void getTypeInputInfo(Oid type, Oid *typInput, Oid *typIOParam)
Definition: lsyscache.c:2599
#define ereport(elevel, rest)
Definition: elog.h:122
Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2456
int null_print_len
Definition: copy.c:120
void slot_getallattrs(TupleTableSlot *slot)
Definition: heaptuple.c:1238
List * force_null
Definition: copy.c:130
TriggerDesc * ri_TrigDesc
Definition: execnodes.h:366
void ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:401
EState * CreateExecutorState(void)
Definition: execUtils.c:80
List * lappend_int(List *list, int datum)
Definition: list.c:146
Node * arg
Definition: parsenodes.h:720
TupleConversionMap * convert_tuples_by_name(TupleDesc indesc, TupleDesc outdesc, const char *msg)
Definition: tupconvert.c:210
TupleConversionMap ** transition_tupconv_maps
Definition: copy.c:174
Definition: copy.c:75
List * lappend(List *list, void *datum)
Definition: list.c:128
#define S_IWOTH
Definition: win32.h:453
int file_encoding
Definition: copy.c:103
static List * CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
Definition: copy.c:4718
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
TupleDesc tupDesc
Definition: execdesc.h:47
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1272
#define InvalidSnapshot
Definition: snapshot.h:25
SubTransactionId rd_createSubid
Definition: rel.h:110
Oid * typioparams
Definition: copy.c:161
bool is_program
Definition: copy.c:112
#define RELKIND_PARTITIONED_TABLE
Definition: pg_class.h:168
Node * build_column_default(Relation rel, int attrno)
bool trig_insert_before_row
Definition: reltrigger.h:55
void getTypeBinaryOutputInfo(Oid type, Oid *typSend, bool *typIsVarlena)
Definition: lsyscache.c:2698
List * es_tupleTable
Definition: execnodes.h:473
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
void ExecResetTupleTable(List *tupleTable, bool shouldFree)
Definition: execTuples.c:156
void * palloc0(Size size)
Definition: mcxt.c:877
bool header_line
Definition: copy.c:118
void ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo, TransitionCaptureState *transition_capture)
Definition: trigger.c:2277
uintptr_t Datum
Definition: postgres.h:372
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
int pg_get_client_encoding(void)
Definition: mbutils.c:306
#define ACL_SELECT
Definition: parsenodes.h:73
TupleTableSlot * tupslot
Definition: partition.h:66
void ExecSetSlotDescriptor(TupleTableSlot *slot, TupleDesc tupdesc)
Definition: execTuples.c:247
int ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, TupleTableSlot *slot, EState *estate)
Definition: execMain.c:3362
TransitionCaptureState * MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType)
Definition: trigger.c:4404
HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction)
Definition: heapam.c:1808
int stmt_len
Definition: parsenodes.h:1427
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copy.c:240
int stmt_location
Definition: parsenodes.h:1426
int es_num_result_relations
Definition: execnodes.h:442
List * ri_PartitionCheck
Definition: execnodes.h:408
#define RAW_BUF_SIZE
Definition: copy.c:209
TupleDesc rd_att
Definition: rel.h:115
bool freeze
Definition: copy.c:116
Datum InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
Definition: fmgr.c:1618
int pq_getbyte(void)
Definition: pqcomm.c:1000
void pq_endmsgread(void)
Definition: pqcomm.c:1234
Relation heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
Definition: heapam.c:1318
#define InvalidOid
Definition: postgres_ext.h:36
static bool copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
Definition: copy.c:4800
bool XactReadOnly
Definition: xact.c:77
bool * force_notnull_flags
Definition: copy.c:129
CmdType commandType
Definition: parsenodes.h:110
void AfterTriggerBeginQuery(void)
Definition: trigger.c:4526
List * force_notnull
Definition: copy.c:128
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:53
int errmsg_internal(const char *fmt,...)
Definition: elog.c:827
void EndCopyFrom(CopyState cstate)
Definition: copy.c:3526
#define PG_CATCH()
Definition: elog.h:293
#define makeNode(_type_)
Definition: nodes.h:558
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:505
EolType eol_type
Definition: copy.c:102