PostgreSQL Source Code  git master
copyfromparse.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * copyfromparse.c
4  * Parse CSV/text/binary format for COPY FROM.
5  *
6  * This file contains routines to parse the text, CSV and binary input
7  * formats. The main entry point is NextCopyFrom(), which parses the
8  * next input line and returns it as Datums.
9  *
10  * In text/CSV mode, the parsing happens in multiple stages:
11  *
12  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
13  * 1. 2. 3. 4.
14  *
15  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
16  * places it into 'raw_buf'.
17  *
18  * 2. CopyConvertBuf() calls the encoding conversion function to convert
19  * the data in 'raw_buf' from client to server encoding, placing the
20  * converted result in 'input_buf'.
21  *
22  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
23  * It is responsible for finding the next newline marker, taking quote and
24  * escape characters into account according to the COPY options. The line
25  * is copied into 'line_buf', with quotes and escape characters still
26  * intact.
27  *
28  * 4. CopyReadAttributesText/CSV() function takes the input line from
29  * 'line_buf', and splits it into fields, unescaping the data as required.
30  * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
31  * pointers to each field.
32  *
33  * If encoding conversion is not required, a shortcut is taken in step 2 to
34  * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
35  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
36  * directly into 'input_buf'. CopyConvertBuf() then merely validates that
37  * the data is valid in the current encoding.
38  *
39  * In binary mode, the pipeline is much simpler. Input is loaded into
40  * 'raw_buf', and encoding conversion is done in the datatype-specific
41  * receive functions, if required. 'input_buf' and 'line_buf' are not used,
42  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
43  * data when it's passed the receive function.
44  *
45  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also
46  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'
47  * and 'attribute_buf' are expanded on demand, to hold the longest line
48  * encountered so far.
49  *
50  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
51  * Portions Copyright (c) 1994, Regents of the University of California
52  *
53  *
54  * IDENTIFICATION
55  * src/backend/commands/copyfromparse.c
56  *
57  *-------------------------------------------------------------------------
58  */
59 #include "postgres.h"
60 
61 #include <ctype.h>
62 #include <unistd.h>
63 #include <sys/stat.h>
64 
65 #include "commands/copy.h"
67 #include "commands/progress.h"
68 #include "executor/executor.h"
69 #include "libpq/libpq.h"
70 #include "libpq/pqformat.h"
71 #include "mb/pg_wchar.h"
72 #include "miscadmin.h"
73 #include "nodes/miscnodes.h"
74 #include "pgstat.h"
75 #include "port/pg_bswap.h"
76 #include "utils/builtins.h"
77 #include "utils/memutils.h"
78 #include "utils/rel.h"
79 
80 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
81 #define OCTVALUE(c) ((c) - '0')
82 
83 /*
84  * These macros centralize code used to process line_buf and input_buf buffers.
85  * They are macros because they often do continue/break control and to avoid
86  * function call overhead in tight COPY loops.
87  *
88  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
89  * prevent the continue/break processing from working. We end the "if (1)"
90  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
91  * any "else" in the calling code, and to avoid any compiler warnings about
92  * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
93  */
94 
95 /*
96  * This keeps the character read at the top of the loop in the buffer
97  * even if there is more than one read-ahead.
98  */
99 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
100 if (1) \
101 { \
102  if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
103  { \
104  input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
105  need_data = true; \
106  continue; \
107  } \
108 } else ((void) 0)
109 
110 /* This consumes the remainder of the buffer and breaks */
111 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
112 if (1) \
113 { \
114  if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
115  { \
116  if (extralen) \
117  input_buf_ptr = copy_buf_len; /* consume the partial character */ \
118  /* backslash just before EOF, treat as data char */ \
119  result = true; \
120  break; \
121  } \
122 } else ((void) 0)
123 
124 /*
125  * Transfer any approved data to line_buf; must do this to be sure
126  * there is some room in input_buf.
127  */
128 #define REFILL_LINEBUF \
129 if (1) \
130 { \
131  if (input_buf_ptr > cstate->input_buf_index) \
132  { \
133  appendBinaryStringInfo(&cstate->line_buf, \
134  cstate->input_buf + cstate->input_buf_index, \
135  input_buf_ptr - cstate->input_buf_index); \
136  cstate->input_buf_index = input_buf_ptr; \
137  } \
138 } else ((void) 0)
139 
140 /* Undo any read-ahead and jump out of the block. */
141 #define NO_END_OF_COPY_GOTO \
142 if (1) \
143 { \
144  input_buf_ptr = prev_raw_ptr + 1; \
145  goto not_end_of_copy; \
146 } else ((void) 0)
147 
148 /* NOTE: there's a copy of this in copyto.c */
149 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
150 
151 
152 /* non-export function prototypes */
153 static bool CopyReadLine(CopyFromState cstate);
154 static bool CopyReadLineText(CopyFromState cstate);
155 static int CopyReadAttributesText(CopyFromState cstate);
156 static int CopyReadAttributesCSV(CopyFromState cstate);
157 static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
158  Oid typioparam, int32 typmod,
159  bool *isnull);
160 
161 
162 /* Low-level communications functions */
163 static int CopyGetData(CopyFromState cstate, void *databuf,
164  int minread, int maxread);
165 static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
166 static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
167 static void CopyLoadInputBuf(CopyFromState cstate);
168 static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
169 
170 void
172 {
174  int natts = list_length(cstate->attnumlist);
175  int16 format = (cstate->opts.binary ? 1 : 0);
176  int i;
177 
179  pq_sendbyte(&buf, format); /* overall format */
180  pq_sendint16(&buf, natts);
181  for (i = 0; i < natts; i++)
182  pq_sendint16(&buf, format); /* per-column formats */
183  pq_endmessage(&buf);
184  cstate->copy_src = COPY_FRONTEND;
185  cstate->fe_msgbuf = makeStringInfo();
186  /* We *must* flush here to ensure FE knows it can send. */
187  pq_flush();
188 }
189 
190 void
192 {
193  char readSig[11];
194  int32 tmp;
195 
196  /* Signature */
197  if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
198  memcmp(readSig, BinarySignature, 11) != 0)
199  ereport(ERROR,
200  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
201  errmsg("COPY file signature not recognized")));
202  /* Flags field */
203  if (!CopyGetInt32(cstate, &tmp))
204  ereport(ERROR,
205  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
206  errmsg("invalid COPY file header (missing flags)")));
207  if ((tmp & (1 << 16)) != 0)
208  ereport(ERROR,
209  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
210  errmsg("invalid COPY file header (WITH OIDS)")));
211  tmp &= ~(1 << 16);
212  if ((tmp >> 16) != 0)
213  ereport(ERROR,
214  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
215  errmsg("unrecognized critical flags in COPY file header")));
216  /* Header extension length */
217  if (!CopyGetInt32(cstate, &tmp) ||
218  tmp < 0)
219  ereport(ERROR,
220  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
221  errmsg("invalid COPY file header (missing length)")));
222  /* Skip extension header, if present */
223  while (tmp-- > 0)
224  {
225  if (CopyReadBinaryData(cstate, readSig, 1) != 1)
226  ereport(ERROR,
227  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
228  errmsg("invalid COPY file header (wrong length)")));
229  }
230 }
231 
232 /*
233  * CopyGetData reads data from the source (file or frontend)
234  *
235  * We attempt to read at least minread, and at most maxread, bytes from
236  * the source. The actual number of bytes read is returned; if this is
237  * less than minread, EOF was detected.
238  *
239  * Note: when copying from the frontend, we expect a proper EOF mark per
240  * protocol; if the frontend simply drops the connection, we raise error.
241  * It seems unwise to allow the COPY IN to complete normally in that case.
242  *
243  * NB: no data conversion is applied here.
244  */
245 static int
246 CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
247 {
248  int bytesread = 0;
249 
250  switch (cstate->copy_src)
251  {
252  case COPY_FILE:
253  bytesread = fread(databuf, 1, maxread, cstate->copy_file);
254  if (ferror(cstate->copy_file))
255  ereport(ERROR,
257  errmsg("could not read from COPY file: %m")));
258  if (bytesread == 0)
259  cstate->raw_reached_eof = true;
260  break;
261  case COPY_FRONTEND:
262  while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
263  {
264  int avail;
265 
266  while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
267  {
268  /* Try to receive another message */
269  int mtype;
270  int maxmsglen;
271 
272  readmessage:
274  pq_startmsgread();
275  mtype = pq_getbyte();
276  if (mtype == EOF)
277  ereport(ERROR,
278  (errcode(ERRCODE_CONNECTION_FAILURE),
279  errmsg("unexpected EOF on client connection with an open transaction")));
280  /* Validate message type and set packet size limit */
281  switch (mtype)
282  {
283  case PqMsg_CopyData:
284  maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
285  break;
286  case PqMsg_CopyDone:
287  case PqMsg_CopyFail:
288  case PqMsg_Flush:
289  case PqMsg_Sync:
290  maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
291  break;
292  default:
293  ereport(ERROR,
294  (errcode(ERRCODE_PROTOCOL_VIOLATION),
295  errmsg("unexpected message type 0x%02X during COPY from stdin",
296  mtype)));
297  maxmsglen = 0; /* keep compiler quiet */
298  break;
299  }
300  /* Now collect the message body */
301  if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
302  ereport(ERROR,
303  (errcode(ERRCODE_CONNECTION_FAILURE),
304  errmsg("unexpected EOF on client connection with an open transaction")));
306  /* ... and process it */
307  switch (mtype)
308  {
309  case PqMsg_CopyData:
310  break;
311  case PqMsg_CopyDone:
312  /* COPY IN correctly terminated by frontend */
313  cstate->raw_reached_eof = true;
314  return bytesread;
315  case PqMsg_CopyFail:
316  ereport(ERROR,
317  (errcode(ERRCODE_QUERY_CANCELED),
318  errmsg("COPY from stdin failed: %s",
319  pq_getmsgstring(cstate->fe_msgbuf))));
320  break;
321  case PqMsg_Flush:
322  case PqMsg_Sync:
323 
324  /*
325  * Ignore Flush/Sync for the convenience of client
326  * libraries (such as libpq) that may send those
327  * without noticing that the command they just
328  * sent was COPY.
329  */
330  goto readmessage;
331  default:
332  Assert(false); /* NOT REACHED */
333  }
334  }
335  avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
336  if (avail > maxread)
337  avail = maxread;
338  pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
339  databuf = (void *) ((char *) databuf + avail);
340  maxread -= avail;
341  bytesread += avail;
342  }
343  break;
344  case COPY_CALLBACK:
345  bytesread = cstate->data_source_cb(databuf, minread, maxread);
346  break;
347  }
348 
349  return bytesread;
350 }
351 
352 
353 /*
354  * These functions do apply some data conversion
355  */
356 
357 /*
358  * CopyGetInt32 reads an int32 that appears in network byte order
359  *
360  * Returns true if OK, false if EOF
361  */
362 static inline bool
364 {
365  uint32 buf;
366 
367  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
368  {
369  *val = 0; /* suppress compiler warning */
370  return false;
371  }
372  *val = (int32) pg_ntoh32(buf);
373  return true;
374 }
375 
376 /*
377  * CopyGetInt16 reads an int16 that appears in network byte order
378  */
379 static inline bool
381 {
382  uint16 buf;
383 
384  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
385  {
386  *val = 0; /* suppress compiler warning */
387  return false;
388  }
389  *val = (int16) pg_ntoh16(buf);
390  return true;
391 }
392 
393 
394 /*
395  * Perform encoding conversion on data in 'raw_buf', writing the converted
396  * data into 'input_buf'.
397  *
398  * On entry, there must be some data to convert in 'raw_buf'.
399  */
400 static void
402 {
403  /*
404  * If the file and server encoding are the same, no encoding conversion is
405  * required. However, we still need to verify that the input is valid for
406  * the encoding.
407  */
408  if (!cstate->need_transcoding)
409  {
410  /*
411  * When conversion is not required, input_buf and raw_buf are the
412  * same. raw_buf_len is the total number of bytes in the buffer, and
413  * input_buf_len tracks how many of those bytes have already been
414  * verified.
415  */
416  int preverifiedlen = cstate->input_buf_len;
417  int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
418  int nverified;
419 
420  if (unverifiedlen == 0)
421  {
422  /*
423  * If no more raw data is coming, report the EOF to the caller.
424  */
425  if (cstate->raw_reached_eof)
426  cstate->input_reached_eof = true;
427  return;
428  }
429 
430  /*
431  * Verify the new data, including any residual unverified bytes from
432  * previous round.
433  */
434  nverified = pg_encoding_verifymbstr(cstate->file_encoding,
435  cstate->raw_buf + preverifiedlen,
436  unverifiedlen);
437  if (nverified == 0)
438  {
439  /*
440  * Could not verify anything.
441  *
442  * If there is no more raw input data coming, it means that there
443  * was an incomplete multi-byte sequence at the end. Also, if
444  * there's "enough" input left, we should be able to verify at
445  * least one character, and a failure to do so means that we've
446  * hit an invalid byte sequence.
447  */
448  if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
449  cstate->input_reached_error = true;
450  return;
451  }
452  cstate->input_buf_len += nverified;
453  }
454  else
455  {
456  /*
457  * Encoding conversion is needed.
458  */
459  int nbytes;
460  unsigned char *src;
461  int srclen;
462  unsigned char *dst;
463  int dstlen;
464  int convertedlen;
465 
466  if (RAW_BUF_BYTES(cstate) == 0)
467  {
468  /*
469  * If no more raw data is coming, report the EOF to the caller.
470  */
471  if (cstate->raw_reached_eof)
472  cstate->input_reached_eof = true;
473  return;
474  }
475 
476  /*
477  * First, copy down any unprocessed data.
478  */
479  nbytes = INPUT_BUF_BYTES(cstate);
480  if (nbytes > 0 && cstate->input_buf_index > 0)
481  memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
482  nbytes);
483  cstate->input_buf_index = 0;
484  cstate->input_buf_len = nbytes;
485  cstate->input_buf[nbytes] = '\0';
486 
487  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
488  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
489  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
490  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
491 
492  /*
493  * Do the conversion. This might stop short, if there is an invalid
494  * byte sequence in the input. We'll convert as much as we can in
495  * that case.
496  *
497  * Note: Even if we hit an invalid byte sequence, we don't report the
498  * error until all the valid bytes have been consumed. The input
499  * might contain an end-of-input marker (\.), and we don't want to
500  * report an error if the invalid byte sequence is after the
501  * end-of-input marker. We might unnecessarily convert some data
502  * after the end-of-input marker as long as it's valid for the
503  * encoding, but that's harmless.
504  */
505  convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
506  cstate->file_encoding,
508  src, srclen,
509  dst, dstlen,
510  true);
511  if (convertedlen == 0)
512  {
513  /*
514  * Could not convert anything. If there is no more raw input data
515  * coming, it means that there was an incomplete multi-byte
516  * sequence at the end. Also, if there is plenty of input left,
517  * we should be able to convert at least one character, so a
518  * failure to do so must mean that we've hit a byte sequence
519  * that's invalid.
520  */
521  if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
522  cstate->input_reached_error = true;
523  return;
524  }
525  cstate->raw_buf_index += convertedlen;
526  cstate->input_buf_len += strlen((char *) dst);
527  }
528 }
529 
530 /*
531  * Report an encoding or conversion error.
532  */
533 static void
535 {
536  Assert(cstate->raw_buf_len > 0);
537  Assert(cstate->input_reached_error);
538 
539  if (!cstate->need_transcoding)
540  {
541  /*
542  * Everything up to input_buf_len was successfully verified, and
543  * input_buf_len points to the invalid or incomplete character.
544  */
546  cstate->raw_buf + cstate->input_buf_len,
547  cstate->raw_buf_len - cstate->input_buf_len);
548  }
549  else
550  {
551  /*
552  * raw_buf_index points to the invalid or untranslatable character. We
553  * let the conversion routine report the error, because it can provide
554  * a more specific error message than we could here. An earlier call
555  * to the conversion routine in CopyConvertBuf() detected that there
556  * is an error, now we call the conversion routine again with
557  * noError=false, to have it throw the error.
558  */
559  unsigned char *src;
560  int srclen;
561  unsigned char *dst;
562  int dstlen;
563 
564  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
565  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
566  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
567  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
568 
570  cstate->file_encoding,
572  src, srclen,
573  dst, dstlen,
574  false);
575 
576  /*
577  * The conversion routine should have reported an error, so this
578  * should not be reached.
579  */
580  elog(ERROR, "encoding conversion failed without error");
581  }
582 }
583 
584 /*
585  * Load more data from data source to raw_buf.
586  *
587  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
588  * beginning of the buffer, and we load new data after that.
589  */
590 static void
592 {
593  int nbytes;
594  int inbytes;
595 
596  /*
597  * In text mode, if encoding conversion is not required, raw_buf and
598  * input_buf point to the same buffer. Their len/index better agree, too.
599  */
600  if (cstate->raw_buf == cstate->input_buf)
601  {
602  Assert(!cstate->need_transcoding);
603  Assert(cstate->raw_buf_index == cstate->input_buf_index);
604  Assert(cstate->input_buf_len <= cstate->raw_buf_len);
605  }
606 
607  /*
608  * Copy down the unprocessed data if any.
609  */
610  nbytes = RAW_BUF_BYTES(cstate);
611  if (nbytes > 0 && cstate->raw_buf_index > 0)
612  memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
613  nbytes);
614  cstate->raw_buf_len -= cstate->raw_buf_index;
615  cstate->raw_buf_index = 0;
616 
617  /*
618  * If raw_buf and input_buf are in fact the same buffer, adjust the
619  * input_buf variables, too.
620  */
621  if (cstate->raw_buf == cstate->input_buf)
622  {
623  cstate->input_buf_len -= cstate->input_buf_index;
624  cstate->input_buf_index = 0;
625  }
626 
627  /* Load more data */
628  inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
629  1, RAW_BUF_SIZE - cstate->raw_buf_len);
630  nbytes += inbytes;
631  cstate->raw_buf[nbytes] = '\0';
632  cstate->raw_buf_len = nbytes;
633 
634  cstate->bytes_processed += inbytes;
636 
637  if (inbytes == 0)
638  cstate->raw_reached_eof = true;
639 }
640 
641 /*
642  * CopyLoadInputBuf loads some more data into input_buf
643  *
644  * On return, at least one more input character is loaded into
645  * input_buf, or input_reached_eof is set.
646  *
647  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
648  * of the buffer and then we load more data after that.
649  */
650 static void
652 {
653  int nbytes = INPUT_BUF_BYTES(cstate);
654 
655  /*
656  * The caller has updated input_buf_index to indicate how much of the
657  * input has been consumed and isn't needed anymore. If input_buf is the
658  * same physical area as raw_buf, update raw_buf_index accordingly.
659  */
660  if (cstate->raw_buf == cstate->input_buf)
661  {
662  Assert(!cstate->need_transcoding);
663  Assert(cstate->input_buf_index >= cstate->raw_buf_index);
664  cstate->raw_buf_index = cstate->input_buf_index;
665  }
666 
667  for (;;)
668  {
669  /* If we now have some unconverted data, try to convert it */
670  CopyConvertBuf(cstate);
671 
672  /* If we now have some more input bytes ready, return them */
673  if (INPUT_BUF_BYTES(cstate) > nbytes)
674  return;
675 
676  /*
677  * If we reached an invalid byte sequence, or we're at an incomplete
678  * multi-byte character but there is no more raw input data, report
679  * conversion error.
680  */
681  if (cstate->input_reached_error)
682  CopyConversionError(cstate);
683 
684  /* no more input, and everything has been converted */
685  if (cstate->input_reached_eof)
686  break;
687 
688  /* Try to load more raw data */
689  Assert(!cstate->raw_reached_eof);
690  CopyLoadRawBuf(cstate);
691  }
692 }
693 
694 /*
695  * CopyReadBinaryData
696  *
697  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
698  * and writes them to 'dest'. Returns the number of bytes read (which
699  * would be less than 'nbytes' only if we reach EOF).
700  */
701 static int
702 CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
703 {
704  int copied_bytes = 0;
705 
706  if (RAW_BUF_BYTES(cstate) >= nbytes)
707  {
708  /* Enough bytes are present in the buffer. */
709  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
710  cstate->raw_buf_index += nbytes;
711  copied_bytes = nbytes;
712  }
713  else
714  {
715  /*
716  * Not enough bytes in the buffer, so must read from the file. Need
717  * to loop since 'nbytes' could be larger than the buffer size.
718  */
719  do
720  {
721  int copy_bytes;
722 
723  /* Load more data if buffer is empty. */
724  if (RAW_BUF_BYTES(cstate) == 0)
725  {
726  CopyLoadRawBuf(cstate);
727  if (cstate->raw_reached_eof)
728  break; /* EOF */
729  }
730 
731  /* Transfer some bytes. */
732  copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
733  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
734  cstate->raw_buf_index += copy_bytes;
735  dest += copy_bytes;
736  copied_bytes += copy_bytes;
737  } while (copied_bytes < nbytes);
738  }
739 
740  return copied_bytes;
741 }
742 
743 /*
744  * Read raw fields in the next line for COPY FROM in text or csv mode.
745  * Return false if no more lines.
746  *
747  * An internal temporary buffer is returned via 'fields'. It is valid until
748  * the next call of the function. Since the function returns all raw fields
749  * in the input file, 'nfields' could be different from the number of columns
750  * in the relation.
751  *
752  * NOTE: force_not_null option are not applied to the returned fields.
753  */
754 bool
755 NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
756 {
757  int fldct;
758  bool done;
759 
760  /* only available for text or csv input */
761  Assert(!cstate->opts.binary);
762 
763  /* on input check that the header line is correct if needed */
764  if (cstate->cur_lineno == 0 && cstate->opts.header_line)
765  {
766  ListCell *cur;
767  TupleDesc tupDesc;
768 
769  tupDesc = RelationGetDescr(cstate->rel);
770 
771  cstate->cur_lineno++;
772  done = CopyReadLine(cstate);
773 
774  if (cstate->opts.header_line == COPY_HEADER_MATCH)
775  {
776  int fldnum;
777 
778  if (cstate->opts.csv_mode)
779  fldct = CopyReadAttributesCSV(cstate);
780  else
781  fldct = CopyReadAttributesText(cstate);
782 
783  if (fldct != list_length(cstate->attnumlist))
784  ereport(ERROR,
785  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
786  errmsg("wrong number of fields in header line: got %d, expected %d",
787  fldct, list_length(cstate->attnumlist))));
788 
789  fldnum = 0;
790  foreach(cur, cstate->attnumlist)
791  {
792  int attnum = lfirst_int(cur);
793  char *colName;
794  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
795 
796  Assert(fldnum < cstate->max_fields);
797 
798  colName = cstate->raw_fields[fldnum++];
799  if (colName == NULL)
800  ereport(ERROR,
801  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
802  errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
803  fldnum, cstate->opts.null_print, NameStr(attr->attname))));
804 
805  if (namestrcmp(&attr->attname, colName) != 0)
806  {
807  ereport(ERROR,
808  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
809  errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
810  fldnum, colName, NameStr(attr->attname))));
811  }
812  }
813  }
814 
815  if (done)
816  return false;
817  }
818 
819  cstate->cur_lineno++;
820 
821  /* Actually read the line into memory here */
822  done = CopyReadLine(cstate);
823 
824  /*
825  * EOF at start of line means we're done. If we see EOF after some
826  * characters, we act as though it was newline followed by EOF, ie,
827  * process the line and then exit loop on next iteration.
828  */
829  if (done && cstate->line_buf.len == 0)
830  return false;
831 
832  /* Parse the line into de-escaped field values */
833  if (cstate->opts.csv_mode)
834  fldct = CopyReadAttributesCSV(cstate);
835  else
836  fldct = CopyReadAttributesText(cstate);
837 
838  *fields = cstate->raw_fields;
839  *nfields = fldct;
840  return true;
841 }
842 
843 /*
844  * Read next tuple from file for COPY FROM. Return false if no more tuples.
845  *
846  * 'econtext' is used to evaluate default expression for each column that is
847  * either not read from the file or is using the DEFAULT option of COPY FROM.
848  * It can be NULL when no default values are used, i.e. when all columns are
849  * read from the file, and DEFAULT option is unset.
850  *
851  * 'values' and 'nulls' arrays must be the same length as columns of the
852  * relation passed to BeginCopyFrom. This function fills the arrays.
853  */
854 bool
856  Datum *values, bool *nulls)
857 {
858  TupleDesc tupDesc;
859  AttrNumber num_phys_attrs,
860  attr_count,
861  num_defaults = cstate->num_defaults;
862  FmgrInfo *in_functions = cstate->in_functions;
863  Oid *typioparams = cstate->typioparams;
864  int i;
865  int *defmap = cstate->defmap;
866  ExprState **defexprs = cstate->defexprs;
867 
868  tupDesc = RelationGetDescr(cstate->rel);
869  num_phys_attrs = tupDesc->natts;
870  attr_count = list_length(cstate->attnumlist);
871 
872  /* Initialize all values for row to NULL */
873  MemSet(values, 0, num_phys_attrs * sizeof(Datum));
874  MemSet(nulls, true, num_phys_attrs * sizeof(bool));
875  MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
876 
877  if (!cstate->opts.binary)
878  {
879  char **field_strings;
880  ListCell *cur;
881  int fldct;
882  int fieldno;
883  char *string;
884 
885  /* read raw fields in the next line */
886  if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
887  return false;
888 
889  /* check for overflowing fields */
890  if (attr_count > 0 && fldct > attr_count)
891  ereport(ERROR,
892  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
893  errmsg("extra data after last expected column")));
894 
895  fieldno = 0;
896 
897  /* Loop to read the user attributes on the line. */
898  foreach(cur, cstate->attnumlist)
899  {
900  int attnum = lfirst_int(cur);
901  int m = attnum - 1;
902  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
903 
904  if (fieldno >= fldct)
905  ereport(ERROR,
906  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
907  errmsg("missing data for column \"%s\"",
908  NameStr(att->attname))));
909  string = field_strings[fieldno++];
910 
911  if (cstate->convert_select_flags &&
912  !cstate->convert_select_flags[m])
913  {
914  /* ignore input field, leaving column as NULL */
915  continue;
916  }
917 
918  if (cstate->opts.csv_mode)
919  {
920  if (string == NULL &&
921  cstate->opts.force_notnull_flags[m])
922  {
923  /*
924  * FORCE_NOT_NULL option is set and column is NULL -
925  * convert it to the NULL string.
926  */
927  string = cstate->opts.null_print;
928  }
929  else if (string != NULL && cstate->opts.force_null_flags[m]
930  && strcmp(string, cstate->opts.null_print) == 0)
931  {
932  /*
933  * FORCE_NULL option is set and column matches the NULL
934  * string. It must have been quoted, or otherwise the
935  * string would already have been set to NULL. Convert it
936  * to NULL as specified.
937  */
938  string = NULL;
939  }
940  }
941 
942  cstate->cur_attname = NameStr(att->attname);
943  cstate->cur_attval = string;
944 
945  if (string != NULL)
946  nulls[m] = false;
947 
948  if (cstate->defaults[m])
949  {
950  /*
951  * The caller must supply econtext and have switched into the
952  * per-tuple memory context in it.
953  */
954  Assert(econtext != NULL);
956 
957  values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
958  }
959 
960  /*
961  * If ON_ERROR is specified with IGNORE, skip rows with soft
962  * errors
963  */
964  else if (!InputFunctionCallSafe(&in_functions[m],
965  string,
966  typioparams[m],
967  att->atttypmod,
968  (Node *) cstate->escontext,
969  &values[m]))
970  {
971  cstate->num_errors++;
972  return true;
973  }
974 
975  cstate->cur_attname = NULL;
976  cstate->cur_attval = NULL;
977  }
978 
979  Assert(fieldno == attr_count);
980  }
981  else
982  {
983  /* binary */
984  int16 fld_count;
985  ListCell *cur;
986 
987  cstate->cur_lineno++;
988 
989  if (!CopyGetInt16(cstate, &fld_count))
990  {
991  /* EOF detected (end of file, or protocol-level EOF) */
992  return false;
993  }
994 
995  if (fld_count == -1)
996  {
997  /*
998  * Received EOF marker. Wait for the protocol-level EOF, and
999  * complain if it doesn't come immediately. In COPY FROM STDIN,
1000  * this ensures that we correctly handle CopyFail, if client
1001  * chooses to send that now. When copying from file, we could
1002  * ignore the rest of the file like in text mode, but we choose to
1003  * be consistent with the COPY FROM STDIN case.
1004  */
1005  char dummy;
1006 
1007  if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1008  ereport(ERROR,
1009  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1010  errmsg("received copy data after EOF marker")));
1011  return false;
1012  }
1013 
1014  if (fld_count != attr_count)
1015  ereport(ERROR,
1016  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1017  errmsg("row field count is %d, expected %d",
1018  (int) fld_count, attr_count)));
1019 
1020  foreach(cur, cstate->attnumlist)
1021  {
1022  int attnum = lfirst_int(cur);
1023  int m = attnum - 1;
1024  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1025 
1026  cstate->cur_attname = NameStr(att->attname);
1027  values[m] = CopyReadBinaryAttribute(cstate,
1028  &in_functions[m],
1029  typioparams[m],
1030  att->atttypmod,
1031  &nulls[m]);
1032  cstate->cur_attname = NULL;
1033  }
1034  }
1035 
1036  /*
1037  * Now compute and insert any defaults available for the columns not
1038  * provided by the input data. Anything not processed here or above will
1039  * remain NULL.
1040  */
1041  for (i = 0; i < num_defaults; i++)
1042  {
1043  /*
1044  * The caller must supply econtext and have switched into the
1045  * per-tuple memory context in it.
1046  */
1047  Assert(econtext != NULL);
1049 
1050  values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
1051  &nulls[defmap[i]]);
1052  }
1053 
1054  return true;
1055 }
1056 
1057 /*
1058  * Read the next input line and stash it in line_buf.
1059  *
1060  * Result is true if read was terminated by EOF, false if terminated
1061  * by newline. The terminating newline or EOF marker is not included
1062  * in the final value of line_buf.
1063  */
1064 static bool
1066 {
1067  bool result;
1068 
1069  resetStringInfo(&cstate->line_buf);
1070  cstate->line_buf_valid = false;
1071 
1072  /* Parse data and transfer into line_buf */
1073  result = CopyReadLineText(cstate);
1074 
1075  if (result)
1076  {
1077  /*
1078  * Reached EOF. In protocol version 3, we should ignore anything
1079  * after \. up to the protocol end of copy data. (XXX maybe better
1080  * not to treat \. as special?)
1081  */
1082  if (cstate->copy_src == COPY_FRONTEND)
1083  {
1084  int inbytes;
1085 
1086  do
1087  {
1088  inbytes = CopyGetData(cstate, cstate->input_buf,
1089  1, INPUT_BUF_SIZE);
1090  } while (inbytes > 0);
1091  cstate->input_buf_index = 0;
1092  cstate->input_buf_len = 0;
1093  cstate->raw_buf_index = 0;
1094  cstate->raw_buf_len = 0;
1095  }
1096  }
1097  else
1098  {
1099  /*
1100  * If we didn't hit EOF, then we must have transferred the EOL marker
1101  * to line_buf along with the data. Get rid of it.
1102  */
1103  switch (cstate->eol_type)
1104  {
1105  case EOL_NL:
1106  Assert(cstate->line_buf.len >= 1);
1107  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1108  cstate->line_buf.len--;
1109  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1110  break;
1111  case EOL_CR:
1112  Assert(cstate->line_buf.len >= 1);
1113  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1114  cstate->line_buf.len--;
1115  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1116  break;
1117  case EOL_CRNL:
1118  Assert(cstate->line_buf.len >= 2);
1119  Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1120  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1121  cstate->line_buf.len -= 2;
1122  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1123  break;
1124  case EOL_UNKNOWN:
1125  /* shouldn't get here */
1126  Assert(false);
1127  break;
1128  }
1129  }
1130 
1131  /* Now it's safe to use the buffer in error messages */
1132  cstate->line_buf_valid = true;
1133 
1134  return result;
1135 }
1136 
1137 /*
1138  * CopyReadLineText - inner loop of CopyReadLine for text mode
1139  */
1140 static bool
1142 {
1143  char *copy_input_buf;
1144  int input_buf_ptr;
1145  int copy_buf_len;
1146  bool need_data = false;
1147  bool hit_eof = false;
1148  bool result = false;
1149 
1150  /* CSV variables */
1151  bool first_char_in_line = true;
1152  bool in_quote = false,
1153  last_was_esc = false;
1154  char quotec = '\0';
1155  char escapec = '\0';
1156 
1157  if (cstate->opts.csv_mode)
1158  {
1159  quotec = cstate->opts.quote[0];
1160  escapec = cstate->opts.escape[0];
1161  /* ignore special escape processing if it's the same as quotec */
1162  if (quotec == escapec)
1163  escapec = '\0';
1164  }
1165 
1166  /*
1167  * The objective of this loop is to transfer the entire next input line
1168  * into line_buf. Hence, we only care for detecting newlines (\r and/or
1169  * \n) and the end-of-copy marker (\.).
1170  *
1171  * In CSV mode, \r and \n inside a quoted field are just part of the data
1172  * value and are put in line_buf. We keep just enough state to know if we
1173  * are currently in a quoted field or not.
1174  *
1175  * These four characters, and the CSV escape and quote characters, are
1176  * assumed the same in frontend and backend encodings.
1177  *
1178  * The input has already been converted to the database encoding. All
1179  * supported server encodings have the property that all bytes in a
1180  * multi-byte sequence have the high bit set, so a multibyte character
1181  * cannot contain any newline or escape characters embedded in the
1182  * multibyte sequence. Therefore, we can process the input byte-by-byte,
1183  * regardless of the encoding.
1184  *
1185  * For speed, we try to move data from input_buf to line_buf in chunks
1186  * rather than one character at a time. input_buf_ptr points to the next
1187  * character to examine; any characters from input_buf_index to
1188  * input_buf_ptr have been determined to be part of the line, but not yet
1189  * transferred to line_buf.
1190  *
1191  * For a little extra speed within the loop, we copy input_buf and
1192  * input_buf_len into local variables.
1193  */
1194  copy_input_buf = cstate->input_buf;
1195  input_buf_ptr = cstate->input_buf_index;
1196  copy_buf_len = cstate->input_buf_len;
1197 
1198  for (;;)
1199  {
1200  int prev_raw_ptr;
1201  char c;
1202 
1203  /*
1204  * Load more data if needed.
1205  *
1206  * TODO: We could just force four bytes of read-ahead and avoid the
1207  * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1208  * unsafe with the old v2 COPY protocol, but we don't support that
1209  * anymore.
1210  */
1211  if (input_buf_ptr >= copy_buf_len || need_data)
1212  {
1214 
1215  CopyLoadInputBuf(cstate);
1216  /* update our local variables */
1217  hit_eof = cstate->input_reached_eof;
1218  input_buf_ptr = cstate->input_buf_index;
1219  copy_buf_len = cstate->input_buf_len;
1220 
1221  /*
1222  * If we are completely out of data, break out of the loop,
1223  * reporting EOF.
1224  */
1225  if (INPUT_BUF_BYTES(cstate) <= 0)
1226  {
1227  result = true;
1228  break;
1229  }
1230  need_data = false;
1231  }
1232 
1233  /* OK to fetch a character */
1234  prev_raw_ptr = input_buf_ptr;
1235  c = copy_input_buf[input_buf_ptr++];
1236 
1237  if (cstate->opts.csv_mode)
1238  {
1239  /*
1240  * If character is '\\' or '\r', we may need to look ahead below.
1241  * Force fetch of the next character if we don't already have it.
1242  * We need to do this before changing CSV state, in case one of
1243  * these characters is also the quote or escape character.
1244  */
1245  if (c == '\\' || c == '\r')
1246  {
1248  }
1249 
1250  /*
1251  * Dealing with quotes and escapes here is mildly tricky. If the
1252  * quote char is also the escape char, there's no problem - we
1253  * just use the char as a toggle. If they are different, we need
1254  * to ensure that we only take account of an escape inside a
1255  * quoted field and immediately preceding a quote char, and not
1256  * the second in an escape-escape sequence.
1257  */
1258  if (in_quote && c == escapec)
1259  last_was_esc = !last_was_esc;
1260  if (c == quotec && !last_was_esc)
1261  in_quote = !in_quote;
1262  if (c != escapec)
1263  last_was_esc = false;
1264 
1265  /*
1266  * Updating the line count for embedded CR and/or LF chars is
1267  * necessarily a little fragile - this test is probably about the
1268  * best we can do. (XXX it's arguable whether we should do this
1269  * at all --- is cur_lineno a physical or logical count?)
1270  */
1271  if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1272  cstate->cur_lineno++;
1273  }
1274 
1275  /* Process \r */
1276  if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1277  {
1278  /* Check for \r\n on first line, _and_ handle \r\n. */
1279  if (cstate->eol_type == EOL_UNKNOWN ||
1280  cstate->eol_type == EOL_CRNL)
1281  {
1282  /*
1283  * If need more data, go back to loop top to load it.
1284  *
1285  * Note that if we are at EOF, c will wind up as '\0' because
1286  * of the guaranteed pad of input_buf.
1287  */
1289 
1290  /* get next char */
1291  c = copy_input_buf[input_buf_ptr];
1292 
1293  if (c == '\n')
1294  {
1295  input_buf_ptr++; /* eat newline */
1296  cstate->eol_type = EOL_CRNL; /* in case not set yet */
1297  }
1298  else
1299  {
1300  /* found \r, but no \n */
1301  if (cstate->eol_type == EOL_CRNL)
1302  ereport(ERROR,
1303  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1304  !cstate->opts.csv_mode ?
1305  errmsg("literal carriage return found in data") :
1306  errmsg("unquoted carriage return found in data"),
1307  !cstate->opts.csv_mode ?
1308  errhint("Use \"\\r\" to represent carriage return.") :
1309  errhint("Use quoted CSV field to represent carriage return.")));
1310 
1311  /*
1312  * if we got here, it is the first line and we didn't find
1313  * \n, so don't consume the peeked character
1314  */
1315  cstate->eol_type = EOL_CR;
1316  }
1317  }
1318  else if (cstate->eol_type == EOL_NL)
1319  ereport(ERROR,
1320  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1321  !cstate->opts.csv_mode ?
1322  errmsg("literal carriage return found in data") :
1323  errmsg("unquoted carriage return found in data"),
1324  !cstate->opts.csv_mode ?
1325  errhint("Use \"\\r\" to represent carriage return.") :
1326  errhint("Use quoted CSV field to represent carriage return.")));
1327  /* If reach here, we have found the line terminator */
1328  break;
1329  }
1330 
1331  /* Process \n */
1332  if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1333  {
1334  if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1335  ereport(ERROR,
1336  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1337  !cstate->opts.csv_mode ?
1338  errmsg("literal newline found in data") :
1339  errmsg("unquoted newline found in data"),
1340  !cstate->opts.csv_mode ?
1341  errhint("Use \"\\n\" to represent newline.") :
1342  errhint("Use quoted CSV field to represent newline.")));
1343  cstate->eol_type = EOL_NL; /* in case not set yet */
1344  /* If reach here, we have found the line terminator */
1345  break;
1346  }
1347 
1348  /*
1349  * In CSV mode, we only recognize \. alone on a line. This is because
1350  * \. is a valid CSV data value.
1351  */
1352  if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
1353  {
1354  char c2;
1355 
1358 
1359  /* -----
1360  * get next character
1361  * Note: we do not change c so if it isn't \., we can fall
1362  * through and continue processing.
1363  * -----
1364  */
1365  c2 = copy_input_buf[input_buf_ptr];
1366 
1367  if (c2 == '.')
1368  {
1369  input_buf_ptr++; /* consume the '.' */
1370 
1371  /*
1372  * Note: if we loop back for more data here, it does not
1373  * matter that the CSV state change checks are re-executed; we
1374  * will come back here with no important state changed.
1375  */
1376  if (cstate->eol_type == EOL_CRNL)
1377  {
1378  /* Get the next character */
1380  /* if hit_eof, c2 will become '\0' */
1381  c2 = copy_input_buf[input_buf_ptr++];
1382 
1383  if (c2 == '\n')
1384  {
1385  if (!cstate->opts.csv_mode)
1386  ereport(ERROR,
1387  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1388  errmsg("end-of-copy marker does not match previous newline style")));
1389  else
1391  }
1392  else if (c2 != '\r')
1393  {
1394  if (!cstate->opts.csv_mode)
1395  ereport(ERROR,
1396  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1397  errmsg("end-of-copy marker corrupt")));
1398  else
1400  }
1401  }
1402 
1403  /* Get the next character */
1405  /* if hit_eof, c2 will become '\0' */
1406  c2 = copy_input_buf[input_buf_ptr++];
1407 
1408  if (c2 != '\r' && c2 != '\n')
1409  {
1410  if (!cstate->opts.csv_mode)
1411  ereport(ERROR,
1412  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1413  errmsg("end-of-copy marker corrupt")));
1414  else
1416  }
1417 
1418  if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1419  (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1420  (cstate->eol_type == EOL_CR && c2 != '\r'))
1421  {
1422  ereport(ERROR,
1423  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1424  errmsg("end-of-copy marker does not match previous newline style")));
1425  }
1426 
1427  /*
1428  * Transfer only the data before the \. into line_buf, then
1429  * discard the data and the \. sequence.
1430  */
1431  if (prev_raw_ptr > cstate->input_buf_index)
1433  cstate->input_buf + cstate->input_buf_index,
1434  prev_raw_ptr - cstate->input_buf_index);
1435  cstate->input_buf_index = input_buf_ptr;
1436  result = true; /* report EOF */
1437  break;
1438  }
1439  else if (!cstate->opts.csv_mode)
1440  {
1441  /*
1442  * If we are here, it means we found a backslash followed by
1443  * something other than a period. In non-CSV mode, anything
1444  * after a backslash is special, so we skip over that second
1445  * character too. If we didn't do that \\. would be
1446  * considered an eof-of copy, while in non-CSV mode it is a
1447  * literal backslash followed by a period. In CSV mode,
1448  * backslashes are not special, so we want to process the
1449  * character after the backslash just like a normal character,
1450  * so we don't increment in those cases.
1451  */
1452  input_buf_ptr++;
1453  }
1454  }
1455 
1456  /*
1457  * This label is for CSV cases where \. appears at the start of a
1458  * line, but there is more text after it, meaning it was a data value.
1459  * We are more strict for \. in CSV mode because \. could be a data
1460  * value, while in non-CSV mode, \. cannot be a data value.
1461  */
1462 not_end_of_copy:
1463  first_char_in_line = false;
1464  } /* end of outer loop */
1465 
1466  /*
1467  * Transfer any still-uncopied data to line_buf.
1468  */
1470 
1471  return result;
1472 }
1473 
1474 /*
1475  * Return decimal value for a hexadecimal digit
1476  */
1477 static int
1479 {
1480  if (isdigit((unsigned char) hex))
1481  return hex - '0';
1482  else
1483  return tolower((unsigned char) hex) - 'a' + 10;
1484 }
1485 
1486 /*
1487  * Parse the current line into separate attributes (fields),
1488  * performing de-escaping as needed.
1489  *
1490  * The input is in line_buf. We use attribute_buf to hold the result
1491  * strings. cstate->raw_fields[k] is set to point to the k'th attribute
1492  * string, or NULL when the input matches the null marker string.
1493  * This array is expanded as necessary.
1494  *
1495  * (Note that the caller cannot check for nulls since the returned
1496  * string would be the post-de-escaping equivalent, which may look
1497  * the same as some valid data string.)
1498  *
1499  * delim is the column delimiter string (must be just one byte for now).
1500  * null_print is the null marker string. Note that this is compared to
1501  * the pre-de-escaped input string.
1502  *
1503  * The return value is the number of fields actually read.
1504  */
1505 static int
1507 {
1508  char delimc = cstate->opts.delim[0];
1509  int fieldno;
1510  char *output_ptr;
1511  char *cur_ptr;
1512  char *line_end_ptr;
1513 
1514  /*
1515  * We need a special case for zero-column tables: check that the input
1516  * line is empty, and return.
1517  */
1518  if (cstate->max_fields <= 0)
1519  {
1520  if (cstate->line_buf.len != 0)
1521  ereport(ERROR,
1522  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1523  errmsg("extra data after last expected column")));
1524  return 0;
1525  }
1526 
1527  resetStringInfo(&cstate->attribute_buf);
1528 
1529  /*
1530  * The de-escaped attributes will certainly not be longer than the input
1531  * data line, so we can just force attribute_buf to be large enough and
1532  * then transfer data without any checks for enough space. We need to do
1533  * it this way because enlarging attribute_buf mid-stream would invalidate
1534  * pointers already stored into cstate->raw_fields[].
1535  */
1536  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1537  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1538  output_ptr = cstate->attribute_buf.data;
1539 
1540  /* set pointer variables for loop */
1541  cur_ptr = cstate->line_buf.data;
1542  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1543 
1544  /* Outer loop iterates over fields */
1545  fieldno = 0;
1546  for (;;)
1547  {
1548  bool found_delim = false;
1549  char *start_ptr;
1550  char *end_ptr;
1551  int input_len;
1552  bool saw_non_ascii = false;
1553 
1554  /* Make sure there is enough space for the next value */
1555  if (fieldno >= cstate->max_fields)
1556  {
1557  cstate->max_fields *= 2;
1558  cstate->raw_fields =
1559  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1560  }
1561 
1562  /* Remember start of field on both input and output sides */
1563  start_ptr = cur_ptr;
1564  cstate->raw_fields[fieldno] = output_ptr;
1565 
1566  /*
1567  * Scan data for field.
1568  *
1569  * Note that in this loop, we are scanning to locate the end of field
1570  * and also speculatively performing de-escaping. Once we find the
1571  * end-of-field, we can match the raw field contents against the null
1572  * marker string. Only after that comparison fails do we know that
1573  * de-escaping is actually the right thing to do; therefore we *must
1574  * not* throw any syntax errors before we've done the null-marker
1575  * check.
1576  */
1577  for (;;)
1578  {
1579  char c;
1580 
1581  end_ptr = cur_ptr;
1582  if (cur_ptr >= line_end_ptr)
1583  break;
1584  c = *cur_ptr++;
1585  if (c == delimc)
1586  {
1587  found_delim = true;
1588  break;
1589  }
1590  if (c == '\\')
1591  {
1592  if (cur_ptr >= line_end_ptr)
1593  break;
1594  c = *cur_ptr++;
1595  switch (c)
1596  {
1597  case '0':
1598  case '1':
1599  case '2':
1600  case '3':
1601  case '4':
1602  case '5':
1603  case '6':
1604  case '7':
1605  {
1606  /* handle \013 */
1607  int val;
1608 
1609  val = OCTVALUE(c);
1610  if (cur_ptr < line_end_ptr)
1611  {
1612  c = *cur_ptr;
1613  if (ISOCTAL(c))
1614  {
1615  cur_ptr++;
1616  val = (val << 3) + OCTVALUE(c);
1617  if (cur_ptr < line_end_ptr)
1618  {
1619  c = *cur_ptr;
1620  if (ISOCTAL(c))
1621  {
1622  cur_ptr++;
1623  val = (val << 3) + OCTVALUE(c);
1624  }
1625  }
1626  }
1627  }
1628  c = val & 0377;
1629  if (c == '\0' || IS_HIGHBIT_SET(c))
1630  saw_non_ascii = true;
1631  }
1632  break;
1633  case 'x':
1634  /* Handle \x3F */
1635  if (cur_ptr < line_end_ptr)
1636  {
1637  char hexchar = *cur_ptr;
1638 
1639  if (isxdigit((unsigned char) hexchar))
1640  {
1641  int val = GetDecimalFromHex(hexchar);
1642 
1643  cur_ptr++;
1644  if (cur_ptr < line_end_ptr)
1645  {
1646  hexchar = *cur_ptr;
1647  if (isxdigit((unsigned char) hexchar))
1648  {
1649  cur_ptr++;
1650  val = (val << 4) + GetDecimalFromHex(hexchar);
1651  }
1652  }
1653  c = val & 0xff;
1654  if (c == '\0' || IS_HIGHBIT_SET(c))
1655  saw_non_ascii = true;
1656  }
1657  }
1658  break;
1659  case 'b':
1660  c = '\b';
1661  break;
1662  case 'f':
1663  c = '\f';
1664  break;
1665  case 'n':
1666  c = '\n';
1667  break;
1668  case 'r':
1669  c = '\r';
1670  break;
1671  case 't':
1672  c = '\t';
1673  break;
1674  case 'v':
1675  c = '\v';
1676  break;
1677 
1678  /*
1679  * in all other cases, take the char after '\'
1680  * literally
1681  */
1682  }
1683  }
1684 
1685  /* Add c to output string */
1686  *output_ptr++ = c;
1687  }
1688 
1689  /* Check whether raw input matched null marker */
1690  input_len = end_ptr - start_ptr;
1691  if (input_len == cstate->opts.null_print_len &&
1692  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1693  cstate->raw_fields[fieldno] = NULL;
1694  /* Check whether raw input matched default marker */
1695  else if (fieldno < list_length(cstate->attnumlist) &&
1696  cstate->opts.default_print &&
1697  input_len == cstate->opts.default_print_len &&
1698  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1699  {
1700  /* fieldno is 0-indexed and attnum is 1-indexed */
1701  int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1702 
1703  if (cstate->defexprs[m] != NULL)
1704  {
1705  /* defaults contain entries for all physical attributes */
1706  cstate->defaults[m] = true;
1707  }
1708  else
1709  {
1710  TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1711  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1712 
1713  ereport(ERROR,
1714  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1715  errmsg("unexpected default marker in COPY data"),
1716  errdetail("Column \"%s\" has no default value.",
1717  NameStr(att->attname))));
1718  }
1719  }
1720  else
1721  {
1722  /*
1723  * At this point we know the field is supposed to contain data.
1724  *
1725  * If we de-escaped any non-7-bit-ASCII chars, make sure the
1726  * resulting string is valid data for the db encoding.
1727  */
1728  if (saw_non_ascii)
1729  {
1730  char *fld = cstate->raw_fields[fieldno];
1731 
1732  pg_verifymbstr(fld, output_ptr - fld, false);
1733  }
1734  }
1735 
1736  /* Terminate attribute value in output area */
1737  *output_ptr++ = '\0';
1738 
1739  fieldno++;
1740  /* Done if we hit EOL instead of a delim */
1741  if (!found_delim)
1742  break;
1743  }
1744 
1745  /* Clean up state of attribute_buf */
1746  output_ptr--;
1747  Assert(*output_ptr == '\0');
1748  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1749 
1750  return fieldno;
1751 }
1752 
1753 /*
1754  * Parse the current line into separate attributes (fields),
1755  * performing de-escaping as needed. This has exactly the same API as
1756  * CopyReadAttributesText, except we parse the fields according to
1757  * "standard" (i.e. common) CSV usage.
1758  */
1759 static int
1761 {
1762  char delimc = cstate->opts.delim[0];
1763  char quotec = cstate->opts.quote[0];
1764  char escapec = cstate->opts.escape[0];
1765  int fieldno;
1766  char *output_ptr;
1767  char *cur_ptr;
1768  char *line_end_ptr;
1769 
1770  /*
1771  * We need a special case for zero-column tables: check that the input
1772  * line is empty, and return.
1773  */
1774  if (cstate->max_fields <= 0)
1775  {
1776  if (cstate->line_buf.len != 0)
1777  ereport(ERROR,
1778  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1779  errmsg("extra data after last expected column")));
1780  return 0;
1781  }
1782 
1783  resetStringInfo(&cstate->attribute_buf);
1784 
1785  /*
1786  * The de-escaped attributes will certainly not be longer than the input
1787  * data line, so we can just force attribute_buf to be large enough and
1788  * then transfer data without any checks for enough space. We need to do
1789  * it this way because enlarging attribute_buf mid-stream would invalidate
1790  * pointers already stored into cstate->raw_fields[].
1791  */
1792  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1793  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1794  output_ptr = cstate->attribute_buf.data;
1795 
1796  /* set pointer variables for loop */
1797  cur_ptr = cstate->line_buf.data;
1798  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1799 
1800  /* Outer loop iterates over fields */
1801  fieldno = 0;
1802  for (;;)
1803  {
1804  bool found_delim = false;
1805  bool saw_quote = false;
1806  char *start_ptr;
1807  char *end_ptr;
1808  int input_len;
1809 
1810  /* Make sure there is enough space for the next value */
1811  if (fieldno >= cstate->max_fields)
1812  {
1813  cstate->max_fields *= 2;
1814  cstate->raw_fields =
1815  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1816  }
1817 
1818  /* Remember start of field on both input and output sides */
1819  start_ptr = cur_ptr;
1820  cstate->raw_fields[fieldno] = output_ptr;
1821 
1822  /*
1823  * Scan data for field,
1824  *
1825  * The loop starts in "not quote" mode and then toggles between that
1826  * and "in quote" mode. The loop exits normally if it is in "not
1827  * quote" mode and a delimiter or line end is seen.
1828  */
1829  for (;;)
1830  {
1831  char c;
1832 
1833  /* Not in quote */
1834  for (;;)
1835  {
1836  end_ptr = cur_ptr;
1837  if (cur_ptr >= line_end_ptr)
1838  goto endfield;
1839  c = *cur_ptr++;
1840  /* unquoted field delimiter */
1841  if (c == delimc)
1842  {
1843  found_delim = true;
1844  goto endfield;
1845  }
1846  /* start of quoted field (or part of field) */
1847  if (c == quotec)
1848  {
1849  saw_quote = true;
1850  break;
1851  }
1852  /* Add c to output string */
1853  *output_ptr++ = c;
1854  }
1855 
1856  /* In quote */
1857  for (;;)
1858  {
1859  end_ptr = cur_ptr;
1860  if (cur_ptr >= line_end_ptr)
1861  ereport(ERROR,
1862  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1863  errmsg("unterminated CSV quoted field")));
1864 
1865  c = *cur_ptr++;
1866 
1867  /* escape within a quoted field */
1868  if (c == escapec)
1869  {
1870  /*
1871  * peek at the next char if available, and escape it if it
1872  * is an escape char or a quote char
1873  */
1874  if (cur_ptr < line_end_ptr)
1875  {
1876  char nextc = *cur_ptr;
1877 
1878  if (nextc == escapec || nextc == quotec)
1879  {
1880  *output_ptr++ = nextc;
1881  cur_ptr++;
1882  continue;
1883  }
1884  }
1885  }
1886 
1887  /*
1888  * end of quoted field. Must do this test after testing for
1889  * escape in case quote char and escape char are the same
1890  * (which is the common case).
1891  */
1892  if (c == quotec)
1893  break;
1894 
1895  /* Add c to output string */
1896  *output_ptr++ = c;
1897  }
1898  }
1899 endfield:
1900 
1901  /* Terminate attribute value in output area */
1902  *output_ptr++ = '\0';
1903 
1904  /* Check whether raw input matched null marker */
1905  input_len = end_ptr - start_ptr;
1906  if (!saw_quote && input_len == cstate->opts.null_print_len &&
1907  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1908  cstate->raw_fields[fieldno] = NULL;
1909  /* Check whether raw input matched default marker */
1910  else if (fieldno < list_length(cstate->attnumlist) &&
1911  cstate->opts.default_print &&
1912  input_len == cstate->opts.default_print_len &&
1913  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1914  {
1915  /* fieldno is 0-index and attnum is 1-index */
1916  int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1917 
1918  if (cstate->defexprs[m] != NULL)
1919  {
1920  /* defaults contain entries for all physical attributes */
1921  cstate->defaults[m] = true;
1922  }
1923  else
1924  {
1925  TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1926  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1927 
1928  ereport(ERROR,
1929  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1930  errmsg("unexpected default marker in COPY data"),
1931  errdetail("Column \"%s\" has no default value.",
1932  NameStr(att->attname))));
1933  }
1934  }
1935 
1936  fieldno++;
1937  /* Done if we hit EOL instead of a delim */
1938  if (!found_delim)
1939  break;
1940  }
1941 
1942  /* Clean up state of attribute_buf */
1943  output_ptr--;
1944  Assert(*output_ptr == '\0');
1945  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1946 
1947  return fieldno;
1948 }
1949 
1950 
1951 /*
1952  * Read a binary attribute
1953  */
1954 static Datum
1956  Oid typioparam, int32 typmod,
1957  bool *isnull)
1958 {
1959  int32 fld_size;
1960  Datum result;
1961 
1962  if (!CopyGetInt32(cstate, &fld_size))
1963  ereport(ERROR,
1964  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1965  errmsg("unexpected EOF in COPY data")));
1966  if (fld_size == -1)
1967  {
1968  *isnull = true;
1969  return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1970  }
1971  if (fld_size < 0)
1972  ereport(ERROR,
1973  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1974  errmsg("invalid field size")));
1975 
1976  /* reset attribute_buf to empty, and load raw data in it */
1977  resetStringInfo(&cstate->attribute_buf);
1978 
1979  enlargeStringInfo(&cstate->attribute_buf, fld_size);
1980  if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1981  fld_size) != fld_size)
1982  ereport(ERROR,
1983  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1984  errmsg("unexpected EOF in COPY data")));
1985 
1986  cstate->attribute_buf.len = fld_size;
1987  cstate->attribute_buf.data[fld_size] = '\0';
1988 
1989  /* Call the column type's binary input converter */
1990  result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1991  typioparam, typmod);
1992 
1993  /* Trouble if it didn't eat the whole buffer */
1994  if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1995  ereport(ERROR,
1996  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1997  errmsg("incorrect binary data format")));
1998 
1999  *isnull = false;
2000  return result;
2001 }
int16 AttrNumber
Definition: attnum.h:21
void pgstat_progress_update_param(int index, int64 val)
static Datum values[MAXATTR]
Definition: bootstrap.c:156
#define NameStr(name)
Definition: c.h:735
unsigned short uint16
Definition: c.h:494
unsigned int uint32
Definition: c.h:495
#define Min(x, y)
Definition: c.h:993
signed short int16
Definition: c.h:482
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1144
signed int int32
Definition: c.h:483
#define MemSet(start, val, len)
Definition: c.h:1009
#define RAW_BUF_BYTES(cstate)
#define INPUT_BUF_SIZE
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
#define INPUT_BUF_BYTES(cstate)
#define RAW_BUF_SIZE
static int CopyReadAttributesCSV(CopyFromState cstate)
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
static void CopyConversionError(CopyFromState cstate)
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
static void CopyLoadRawBuf(CopyFromState cstate)
#define OCTVALUE(c)
Definition: copyfromparse.c:81
#define REFILL_LINEBUF
#define NO_END_OF_COPY_GOTO
static void CopyLoadInputBuf(CopyFromState cstate)
#define ISOCTAL(c)
Definition: copyfromparse.c:80
void ReceiveCopyBinaryHeader(CopyFromState cstate)
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
static bool CopyReadLineText(CopyFromState cstate)
static int GetDecimalFromHex(char hex)
void ReceiveCopyBegin(CopyFromState cstate)
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
static int CopyReadAttributesText(CopyFromState cstate)
static const char BinarySignature[11]
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:99
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
static bool CopyReadLine(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)
bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
@ COPY_FILE
Definition: copyto.c:52
@ COPY_CALLBACK
Definition: copyto.c:54
@ COPY_FRONTEND
Definition: copyto.c:53
struct cursor * cur
Definition: ecpg.c:28
int errcode_for_file_access(void)
Definition: elog.c:883
int errdetail(const char *fmt,...)
Definition: elog.c:1208
int errhint(const char *fmt,...)
Definition: elog.c:1322
int errcode(int sqlerrcode)
Definition: elog.c:860
int errmsg(const char *fmt,...)
Definition: elog.c:1075
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:332
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, fmNodePtr escontext, Datum *result)
Definition: fmgr.c:1585
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1697
@ COPY_HEADER_MATCH
Definition: copy.h:30
long val
Definition: informix.c:664
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
#define pq_flush()
Definition: libpq.h:46
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31
Assert(fmt[strlen(fmt) - 1] !='\n')
int GetDatabaseEncoding(void)
Definition: mbutils.c:1268
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1563
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:470
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1705
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1451
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:143
int namestrcmp(Name name, const char *str)
Definition: name.c:247
int16 attnum
Definition: pg_attribute.h:74
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
static char format
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static int list_length(const List *l)
Definition: pg_list.h:152
#define lfirst_int(lc)
Definition: pg_list.h:173
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
static char * buf
Definition: pg_test_fsync.c:73
#define MAX_CONVERSION_INPUT_LENGTH
Definition: pg_wchar.h:321
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1218
int pq_getbyte(void)
Definition: pqcomm.c:981
void pq_startmsgread(void)
Definition: pqcomm.c:1156
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:582
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:531
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:299
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:88
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
char * c
char string[11]
Definition: preproc-type.c:52
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:139
#define PqMsg_CopyDone
Definition: protocol.h:64
#define PqMsg_CopyData
Definition: protocol.h:65
#define PqMsg_CopyInResponse
Definition: protocol.h:45
#define PqMsg_Sync
Definition: protocol.h:27
#define PqMsg_CopyFail
Definition: protocol.h:29
#define PqMsg_Flush
Definition: protocol.h:24
#define RelationGetDescr(relation)
Definition: rel.h:530
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:78
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:289
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
int default_print_len
Definition: copy.h:61
bool binary
Definition: copy.h:53
int null_print_len
Definition: copy.h:58
char * quote
Definition: copy.h:63
CopyHeaderChoice header_line
Definition: copy.h:56
char * escape
Definition: copy.h:64
char * null_print
Definition: copy.h:57
char * delim
Definition: copy.h:62
bool * force_notnull_flags
Definition: copy.h:70
bool csv_mode
Definition: copy.h:55
bool * force_null_flags
Definition: copy.h:73
char * default_print
Definition: copy.h:60
copy_data_source_cb data_source_cb
StringInfoData line_buf
CopyFormatOptions opts
StringInfoData attribute_buf
const char * cur_attval
const char * cur_attname
ErrorSaveContext * escontext
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:263
Definition: fmgr.h:57
Definition: nodes.h:129
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition: wchar.c:2178
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2189