PostgreSQL Source Code  git master
copyfromparse.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * copyfromparse.c
4  * Parse CSV/text/binary format for COPY FROM.
5  *
6  * This file contains routines to parse the text, CSV and binary input
7  * formats. The main entry point is NextCopyFrom(), which parses the
8  * next input line and returns it as Datums.
9  *
10  * In text/CSV mode, the parsing happens in multiple stages:
11  *
12  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
13  * 1. 2. 3. 4.
14  *
15  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
16  * places it into 'raw_buf'.
17  *
18  * 2. CopyConvertBuf() calls the encoding conversion function to convert
19  * the data in 'raw_buf' from client to server encoding, placing the
20  * converted result in 'input_buf'.
21  *
22  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
23  * It is responsible for finding the next newline marker, taking quote and
24  * escape characters into account according to the COPY options. The line
25  * is copied into 'line_buf', with quotes and escape characters still
26  * intact.
27  *
28  * 4. CopyReadAttributesText/CSV() function takes the input line from
29  * 'line_buf', and splits it into fields, unescaping the data as required.
30  * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
31  * pointers to each field.
32  *
33  * If encoding conversion is not required, a shortcut is taken in step 2 to
34  * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
35  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
36  * directly into 'input_buf'. CopyConvertBuf() then merely validates that
37  * the data is valid in the current encoding.
38  *
39  * In binary mode, the pipeline is much simpler. Input is loaded into
40  * 'raw_buf', and encoding conversion is done in the datatype-specific
41  * receive functions, if required. 'input_buf' and 'line_buf' are not used,
42  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
43  * data when it's passed the receive function.
44  *
45  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also
46  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'
47  * and 'attribute_buf' are expanded on demand, to hold the longest line
48  * encountered so far.
49  *
50  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
51  * Portions Copyright (c) 1994, Regents of the University of California
52  *
53  *
54  * IDENTIFICATION
55  * src/backend/commands/copyfromparse.c
56  *
57  *-------------------------------------------------------------------------
58  */
59 #include "postgres.h"
60 
61 #include <ctype.h>
62 #include <unistd.h>
63 #include <sys/stat.h>
64 
65 #include "commands/copy.h"
67 #include "commands/progress.h"
68 #include "executor/executor.h"
69 #include "libpq/libpq.h"
70 #include "libpq/pqformat.h"
71 #include "mb/pg_wchar.h"
72 #include "miscadmin.h"
73 #include "nodes/miscnodes.h"
74 #include "pgstat.h"
75 #include "port/pg_bswap.h"
76 #include "utils/builtins.h"
77 #include "utils/rel.h"
78 
79 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
80 #define OCTVALUE(c) ((c) - '0')
81 
82 /*
83  * These macros centralize code used to process line_buf and input_buf buffers.
84  * They are macros because they often do continue/break control and to avoid
85  * function call overhead in tight COPY loops.
86  *
87  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
88  * prevent the continue/break processing from working. We end the "if (1)"
89  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
90  * any "else" in the calling code, and to avoid any compiler warnings about
91  * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
92  */
93 
94 /*
95  * This keeps the character read at the top of the loop in the buffer
96  * even if there is more than one read-ahead.
97  */
98 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
99 if (1) \
100 { \
101  if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
102  { \
103  input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
104  need_data = true; \
105  continue; \
106  } \
107 } else ((void) 0)
108 
109 /* This consumes the remainder of the buffer and breaks */
110 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
111 if (1) \
112 { \
113  if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
114  { \
115  if (extralen) \
116  input_buf_ptr = copy_buf_len; /* consume the partial character */ \
117  /* backslash just before EOF, treat as data char */ \
118  result = true; \
119  break; \
120  } \
121 } else ((void) 0)
122 
123 /*
124  * Transfer any approved data to line_buf; must do this to be sure
125  * there is some room in input_buf.
126  */
127 #define REFILL_LINEBUF \
128 if (1) \
129 { \
130  if (input_buf_ptr > cstate->input_buf_index) \
131  { \
132  appendBinaryStringInfo(&cstate->line_buf, \
133  cstate->input_buf + cstate->input_buf_index, \
134  input_buf_ptr - cstate->input_buf_index); \
135  cstate->input_buf_index = input_buf_ptr; \
136  } \
137 } else ((void) 0)
138 
139 /* Undo any read-ahead and jump out of the block. */
140 #define NO_END_OF_COPY_GOTO \
141 if (1) \
142 { \
143  input_buf_ptr = prev_raw_ptr + 1; \
144  goto not_end_of_copy; \
145 } else ((void) 0)
146 
147 /* NOTE: there's a copy of this in copyto.c */
148 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
149 
150 
151 /* non-export function prototypes */
152 static bool CopyReadLine(CopyFromState cstate);
153 static bool CopyReadLineText(CopyFromState cstate);
154 static int CopyReadAttributesText(CopyFromState cstate);
155 static int CopyReadAttributesCSV(CopyFromState cstate);
156 static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
157  Oid typioparam, int32 typmod,
158  bool *isnull);
159 
160 
161 /* Low-level communications functions */
162 static int CopyGetData(CopyFromState cstate, void *databuf,
163  int minread, int maxread);
164 static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
165 static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
166 static void CopyLoadInputBuf(CopyFromState cstate);
167 static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
168 
169 void
171 {
173  int natts = list_length(cstate->attnumlist);
174  int16 format = (cstate->opts.binary ? 1 : 0);
175  int i;
176 
178  pq_sendbyte(&buf, format); /* overall format */
179  pq_sendint16(&buf, natts);
180  for (i = 0; i < natts; i++)
181  pq_sendint16(&buf, format); /* per-column formats */
182  pq_endmessage(&buf);
183  cstate->copy_src = COPY_FRONTEND;
184  cstate->fe_msgbuf = makeStringInfo();
185  /* We *must* flush here to ensure FE knows it can send. */
186  pq_flush();
187 }
188 
189 void
191 {
192  char readSig[11];
193  int32 tmp;
194 
195  /* Signature */
196  if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
197  memcmp(readSig, BinarySignature, 11) != 0)
198  ereport(ERROR,
199  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
200  errmsg("COPY file signature not recognized")));
201  /* Flags field */
202  if (!CopyGetInt32(cstate, &tmp))
203  ereport(ERROR,
204  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
205  errmsg("invalid COPY file header (missing flags)")));
206  if ((tmp & (1 << 16)) != 0)
207  ereport(ERROR,
208  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
209  errmsg("invalid COPY file header (WITH OIDS)")));
210  tmp &= ~(1 << 16);
211  if ((tmp >> 16) != 0)
212  ereport(ERROR,
213  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
214  errmsg("unrecognized critical flags in COPY file header")));
215  /* Header extension length */
216  if (!CopyGetInt32(cstate, &tmp) ||
217  tmp < 0)
218  ereport(ERROR,
219  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
220  errmsg("invalid COPY file header (missing length)")));
221  /* Skip extension header, if present */
222  while (tmp-- > 0)
223  {
224  if (CopyReadBinaryData(cstate, readSig, 1) != 1)
225  ereport(ERROR,
226  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
227  errmsg("invalid COPY file header (wrong length)")));
228  }
229 }
230 
231 /*
232  * CopyGetData reads data from the source (file or frontend)
233  *
234  * We attempt to read at least minread, and at most maxread, bytes from
235  * the source. The actual number of bytes read is returned; if this is
236  * less than minread, EOF was detected.
237  *
238  * Note: when copying from the frontend, we expect a proper EOF mark per
239  * protocol; if the frontend simply drops the connection, we raise error.
240  * It seems unwise to allow the COPY IN to complete normally in that case.
241  *
242  * NB: no data conversion is applied here.
243  */
244 static int
245 CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
246 {
247  int bytesread = 0;
248 
249  switch (cstate->copy_src)
250  {
251  case COPY_FILE:
252  bytesread = fread(databuf, 1, maxread, cstate->copy_file);
253  if (ferror(cstate->copy_file))
254  ereport(ERROR,
256  errmsg("could not read from COPY file: %m")));
257  if (bytesread == 0)
258  cstate->raw_reached_eof = true;
259  break;
260  case COPY_FRONTEND:
261  while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
262  {
263  int avail;
264 
265  while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
266  {
267  /* Try to receive another message */
268  int mtype;
269  int maxmsglen;
270 
271  readmessage:
273  pq_startmsgread();
274  mtype = pq_getbyte();
275  if (mtype == EOF)
276  ereport(ERROR,
277  (errcode(ERRCODE_CONNECTION_FAILURE),
278  errmsg("unexpected EOF on client connection with an open transaction")));
279  /* Validate message type and set packet size limit */
280  switch (mtype)
281  {
282  case PqMsg_CopyData:
283  maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
284  break;
285  case PqMsg_CopyDone:
286  case PqMsg_CopyFail:
287  case PqMsg_Flush:
288  case PqMsg_Sync:
289  maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
290  break;
291  default:
292  ereport(ERROR,
293  (errcode(ERRCODE_PROTOCOL_VIOLATION),
294  errmsg("unexpected message type 0x%02X during COPY from stdin",
295  mtype)));
296  maxmsglen = 0; /* keep compiler quiet */
297  break;
298  }
299  /* Now collect the message body */
300  if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
301  ereport(ERROR,
302  (errcode(ERRCODE_CONNECTION_FAILURE),
303  errmsg("unexpected EOF on client connection with an open transaction")));
305  /* ... and process it */
306  switch (mtype)
307  {
308  case PqMsg_CopyData:
309  break;
310  case PqMsg_CopyDone:
311  /* COPY IN correctly terminated by frontend */
312  cstate->raw_reached_eof = true;
313  return bytesread;
314  case PqMsg_CopyFail:
315  ereport(ERROR,
316  (errcode(ERRCODE_QUERY_CANCELED),
317  errmsg("COPY from stdin failed: %s",
318  pq_getmsgstring(cstate->fe_msgbuf))));
319  break;
320  case PqMsg_Flush:
321  case PqMsg_Sync:
322 
323  /*
324  * Ignore Flush/Sync for the convenience of client
325  * libraries (such as libpq) that may send those
326  * without noticing that the command they just
327  * sent was COPY.
328  */
329  goto readmessage;
330  default:
331  Assert(false); /* NOT REACHED */
332  }
333  }
334  avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
335  if (avail > maxread)
336  avail = maxread;
337  pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
338  databuf = (void *) ((char *) databuf + avail);
339  maxread -= avail;
340  bytesread += avail;
341  }
342  break;
343  case COPY_CALLBACK:
344  bytesread = cstate->data_source_cb(databuf, minread, maxread);
345  break;
346  }
347 
348  return bytesread;
349 }
350 
351 
352 /*
353  * These functions do apply some data conversion
354  */
355 
356 /*
357  * CopyGetInt32 reads an int32 that appears in network byte order
358  *
359  * Returns true if OK, false if EOF
360  */
361 static inline bool
363 {
364  uint32 buf;
365 
366  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
367  {
368  *val = 0; /* suppress compiler warning */
369  return false;
370  }
371  *val = (int32) pg_ntoh32(buf);
372  return true;
373 }
374 
375 /*
376  * CopyGetInt16 reads an int16 that appears in network byte order
377  */
378 static inline bool
380 {
381  uint16 buf;
382 
383  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
384  {
385  *val = 0; /* suppress compiler warning */
386  return false;
387  }
388  *val = (int16) pg_ntoh16(buf);
389  return true;
390 }
391 
392 
393 /*
394  * Perform encoding conversion on data in 'raw_buf', writing the converted
395  * data into 'input_buf'.
396  *
397  * On entry, there must be some data to convert in 'raw_buf'.
398  */
399 static void
401 {
402  /*
403  * If the file and server encoding are the same, no encoding conversion is
404  * required. However, we still need to verify that the input is valid for
405  * the encoding.
406  */
407  if (!cstate->need_transcoding)
408  {
409  /*
410  * When conversion is not required, input_buf and raw_buf are the
411  * same. raw_buf_len is the total number of bytes in the buffer, and
412  * input_buf_len tracks how many of those bytes have already been
413  * verified.
414  */
415  int preverifiedlen = cstate->input_buf_len;
416  int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
417  int nverified;
418 
419  if (unverifiedlen == 0)
420  {
421  /*
422  * If no more raw data is coming, report the EOF to the caller.
423  */
424  if (cstate->raw_reached_eof)
425  cstate->input_reached_eof = true;
426  return;
427  }
428 
429  /*
430  * Verify the new data, including any residual unverified bytes from
431  * previous round.
432  */
433  nverified = pg_encoding_verifymbstr(cstate->file_encoding,
434  cstate->raw_buf + preverifiedlen,
435  unverifiedlen);
436  if (nverified == 0)
437  {
438  /*
439  * Could not verify anything.
440  *
441  * If there is no more raw input data coming, it means that there
442  * was an incomplete multi-byte sequence at the end. Also, if
443  * there's "enough" input left, we should be able to verify at
444  * least one character, and a failure to do so means that we've
445  * hit an invalid byte sequence.
446  */
447  if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
448  cstate->input_reached_error = true;
449  return;
450  }
451  cstate->input_buf_len += nverified;
452  }
453  else
454  {
455  /*
456  * Encoding conversion is needed.
457  */
458  int nbytes;
459  unsigned char *src;
460  int srclen;
461  unsigned char *dst;
462  int dstlen;
463  int convertedlen;
464 
465  if (RAW_BUF_BYTES(cstate) == 0)
466  {
467  /*
468  * If no more raw data is coming, report the EOF to the caller.
469  */
470  if (cstate->raw_reached_eof)
471  cstate->input_reached_eof = true;
472  return;
473  }
474 
475  /*
476  * First, copy down any unprocessed data.
477  */
478  nbytes = INPUT_BUF_BYTES(cstate);
479  if (nbytes > 0 && cstate->input_buf_index > 0)
480  memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
481  nbytes);
482  cstate->input_buf_index = 0;
483  cstate->input_buf_len = nbytes;
484  cstate->input_buf[nbytes] = '\0';
485 
486  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
487  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
488  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
489  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
490 
491  /*
492  * Do the conversion. This might stop short, if there is an invalid
493  * byte sequence in the input. We'll convert as much as we can in
494  * that case.
495  *
496  * Note: Even if we hit an invalid byte sequence, we don't report the
497  * error until all the valid bytes have been consumed. The input
498  * might contain an end-of-input marker (\.), and we don't want to
499  * report an error if the invalid byte sequence is after the
500  * end-of-input marker. We might unnecessarily convert some data
501  * after the end-of-input marker as long as it's valid for the
502  * encoding, but that's harmless.
503  */
504  convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
505  cstate->file_encoding,
507  src, srclen,
508  dst, dstlen,
509  true);
510  if (convertedlen == 0)
511  {
512  /*
513  * Could not convert anything. If there is no more raw input data
514  * coming, it means that there was an incomplete multi-byte
515  * sequence at the end. Also, if there is plenty of input left,
516  * we should be able to convert at least one character, so a
517  * failure to do so must mean that we've hit a byte sequence
518  * that's invalid.
519  */
520  if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
521  cstate->input_reached_error = true;
522  return;
523  }
524  cstate->raw_buf_index += convertedlen;
525  cstate->input_buf_len += strlen((char *) dst);
526  }
527 }
528 
529 /*
530  * Report an encoding or conversion error.
531  */
532 static void
534 {
535  Assert(cstate->raw_buf_len > 0);
536  Assert(cstate->input_reached_error);
537 
538  if (!cstate->need_transcoding)
539  {
540  /*
541  * Everything up to input_buf_len was successfully verified, and
542  * input_buf_len points to the invalid or incomplete character.
543  */
545  cstate->raw_buf + cstate->input_buf_len,
546  cstate->raw_buf_len - cstate->input_buf_len);
547  }
548  else
549  {
550  /*
551  * raw_buf_index points to the invalid or untranslatable character. We
552  * let the conversion routine report the error, because it can provide
553  * a more specific error message than we could here. An earlier call
554  * to the conversion routine in CopyConvertBuf() detected that there
555  * is an error, now we call the conversion routine again with
556  * noError=false, to have it throw the error.
557  */
558  unsigned char *src;
559  int srclen;
560  unsigned char *dst;
561  int dstlen;
562 
563  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
564  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
565  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
566  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
567 
569  cstate->file_encoding,
571  src, srclen,
572  dst, dstlen,
573  false);
574 
575  /*
576  * The conversion routine should have reported an error, so this
577  * should not be reached.
578  */
579  elog(ERROR, "encoding conversion failed without error");
580  }
581 }
582 
583 /*
584  * Load more data from data source to raw_buf.
585  *
586  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
587  * beginning of the buffer, and we load new data after that.
588  */
589 static void
591 {
592  int nbytes;
593  int inbytes;
594 
595  /*
596  * In text mode, if encoding conversion is not required, raw_buf and
597  * input_buf point to the same buffer. Their len/index better agree, too.
598  */
599  if (cstate->raw_buf == cstate->input_buf)
600  {
601  Assert(!cstate->need_transcoding);
602  Assert(cstate->raw_buf_index == cstate->input_buf_index);
603  Assert(cstate->input_buf_len <= cstate->raw_buf_len);
604  }
605 
606  /*
607  * Copy down the unprocessed data if any.
608  */
609  nbytes = RAW_BUF_BYTES(cstate);
610  if (nbytes > 0 && cstate->raw_buf_index > 0)
611  memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
612  nbytes);
613  cstate->raw_buf_len -= cstate->raw_buf_index;
614  cstate->raw_buf_index = 0;
615 
616  /*
617  * If raw_buf and input_buf are in fact the same buffer, adjust the
618  * input_buf variables, too.
619  */
620  if (cstate->raw_buf == cstate->input_buf)
621  {
622  cstate->input_buf_len -= cstate->input_buf_index;
623  cstate->input_buf_index = 0;
624  }
625 
626  /* Load more data */
627  inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
628  1, RAW_BUF_SIZE - cstate->raw_buf_len);
629  nbytes += inbytes;
630  cstate->raw_buf[nbytes] = '\0';
631  cstate->raw_buf_len = nbytes;
632 
633  cstate->bytes_processed += inbytes;
635 
636  if (inbytes == 0)
637  cstate->raw_reached_eof = true;
638 }
639 
640 /*
641  * CopyLoadInputBuf loads some more data into input_buf
642  *
643  * On return, at least one more input character is loaded into
644  * input_buf, or input_reached_eof is set.
645  *
646  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
647  * of the buffer and then we load more data after that.
648  */
649 static void
651 {
652  int nbytes = INPUT_BUF_BYTES(cstate);
653 
654  /*
655  * The caller has updated input_buf_index to indicate how much of the
656  * input has been consumed and isn't needed anymore. If input_buf is the
657  * same physical area as raw_buf, update raw_buf_index accordingly.
658  */
659  if (cstate->raw_buf == cstate->input_buf)
660  {
661  Assert(!cstate->need_transcoding);
662  Assert(cstate->input_buf_index >= cstate->raw_buf_index);
663  cstate->raw_buf_index = cstate->input_buf_index;
664  }
665 
666  for (;;)
667  {
668  /* If we now have some unconverted data, try to convert it */
669  CopyConvertBuf(cstate);
670 
671  /* If we now have some more input bytes ready, return them */
672  if (INPUT_BUF_BYTES(cstate) > nbytes)
673  return;
674 
675  /*
676  * If we reached an invalid byte sequence, or we're at an incomplete
677  * multi-byte character but there is no more raw input data, report
678  * conversion error.
679  */
680  if (cstate->input_reached_error)
681  CopyConversionError(cstate);
682 
683  /* no more input, and everything has been converted */
684  if (cstate->input_reached_eof)
685  break;
686 
687  /* Try to load more raw data */
688  Assert(!cstate->raw_reached_eof);
689  CopyLoadRawBuf(cstate);
690  }
691 }
692 
693 /*
694  * CopyReadBinaryData
695  *
696  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
697  * and writes them to 'dest'. Returns the number of bytes read (which
698  * would be less than 'nbytes' only if we reach EOF).
699  */
700 static int
701 CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
702 {
703  int copied_bytes = 0;
704 
705  if (RAW_BUF_BYTES(cstate) >= nbytes)
706  {
707  /* Enough bytes are present in the buffer. */
708  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
709  cstate->raw_buf_index += nbytes;
710  copied_bytes = nbytes;
711  }
712  else
713  {
714  /*
715  * Not enough bytes in the buffer, so must read from the file. Need
716  * to loop since 'nbytes' could be larger than the buffer size.
717  */
718  do
719  {
720  int copy_bytes;
721 
722  /* Load more data if buffer is empty. */
723  if (RAW_BUF_BYTES(cstate) == 0)
724  {
725  CopyLoadRawBuf(cstate);
726  if (cstate->raw_reached_eof)
727  break; /* EOF */
728  }
729 
730  /* Transfer some bytes. */
731  copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
732  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
733  cstate->raw_buf_index += copy_bytes;
734  dest += copy_bytes;
735  copied_bytes += copy_bytes;
736  } while (copied_bytes < nbytes);
737  }
738 
739  return copied_bytes;
740 }
741 
742 /*
743  * Read raw fields in the next line for COPY FROM in text or csv mode.
744  * Return false if no more lines.
745  *
746  * An internal temporary buffer is returned via 'fields'. It is valid until
747  * the next call of the function. Since the function returns all raw fields
748  * in the input file, 'nfields' could be different from the number of columns
749  * in the relation.
750  *
751  * NOTE: force_not_null option are not applied to the returned fields.
752  */
753 bool
754 NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
755 {
756  int fldct;
757  bool done;
758 
759  /* only available for text or csv input */
760  Assert(!cstate->opts.binary);
761 
762  /* on input check that the header line is correct if needed */
763  if (cstate->cur_lineno == 0 && cstate->opts.header_line)
764  {
765  ListCell *cur;
766  TupleDesc tupDesc;
767 
768  tupDesc = RelationGetDescr(cstate->rel);
769 
770  cstate->cur_lineno++;
771  done = CopyReadLine(cstate);
772 
773  if (cstate->opts.header_line == COPY_HEADER_MATCH)
774  {
775  int fldnum;
776 
777  if (cstate->opts.csv_mode)
778  fldct = CopyReadAttributesCSV(cstate);
779  else
780  fldct = CopyReadAttributesText(cstate);
781 
782  if (fldct != list_length(cstate->attnumlist))
783  ereport(ERROR,
784  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
785  errmsg("wrong number of fields in header line: got %d, expected %d",
786  fldct, list_length(cstate->attnumlist))));
787 
788  fldnum = 0;
789  foreach(cur, cstate->attnumlist)
790  {
791  int attnum = lfirst_int(cur);
792  char *colName;
793  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
794 
795  Assert(fldnum < cstate->max_fields);
796 
797  colName = cstate->raw_fields[fldnum++];
798  if (colName == NULL)
799  ereport(ERROR,
800  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
801  errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
802  fldnum, cstate->opts.null_print, NameStr(attr->attname))));
803 
804  if (namestrcmp(&attr->attname, colName) != 0)
805  {
806  ereport(ERROR,
807  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
808  errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
809  fldnum, colName, NameStr(attr->attname))));
810  }
811  }
812  }
813 
814  if (done)
815  return false;
816  }
817 
818  cstate->cur_lineno++;
819 
820  /* Actually read the line into memory here */
821  done = CopyReadLine(cstate);
822 
823  /*
824  * EOF at start of line means we're done. If we see EOF after some
825  * characters, we act as though it was newline followed by EOF, ie,
826  * process the line and then exit loop on next iteration.
827  */
828  if (done && cstate->line_buf.len == 0)
829  return false;
830 
831  /* Parse the line into de-escaped field values */
832  if (cstate->opts.csv_mode)
833  fldct = CopyReadAttributesCSV(cstate);
834  else
835  fldct = CopyReadAttributesText(cstate);
836 
837  *fields = cstate->raw_fields;
838  *nfields = fldct;
839  return true;
840 }
841 
842 /*
843  * Read next tuple from file for COPY FROM. Return false if no more tuples.
844  *
845  * 'econtext' is used to evaluate default expression for each column that is
846  * either not read from the file or is using the DEFAULT option of COPY FROM.
847  * It can be NULL when no default values are used, i.e. when all columns are
848  * read from the file, and DEFAULT option is unset.
849  *
850  * 'values' and 'nulls' arrays must be the same length as columns of the
851  * relation passed to BeginCopyFrom. This function fills the arrays.
852  */
853 bool
855  Datum *values, bool *nulls)
856 {
857  TupleDesc tupDesc;
858  AttrNumber num_phys_attrs,
859  attr_count,
860  num_defaults = cstate->num_defaults;
861  FmgrInfo *in_functions = cstate->in_functions;
862  Oid *typioparams = cstate->typioparams;
863  int i;
864  int *defmap = cstate->defmap;
865  ExprState **defexprs = cstate->defexprs;
866 
867  tupDesc = RelationGetDescr(cstate->rel);
868  num_phys_attrs = tupDesc->natts;
869  attr_count = list_length(cstate->attnumlist);
870 
871  /* Initialize all values for row to NULL */
872  MemSet(values, 0, num_phys_attrs * sizeof(Datum));
873  MemSet(nulls, true, num_phys_attrs * sizeof(bool));
874  MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
875 
876  if (!cstate->opts.binary)
877  {
878  char **field_strings;
879  ListCell *cur;
880  int fldct;
881  int fieldno;
882  char *string;
883 
884  /* read raw fields in the next line */
885  if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
886  return false;
887 
888  /* check for overflowing fields */
889  if (attr_count > 0 && fldct > attr_count)
890  ereport(ERROR,
891  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
892  errmsg("extra data after last expected column")));
893 
894  fieldno = 0;
895 
896  /* Loop to read the user attributes on the line. */
897  foreach(cur, cstate->attnumlist)
898  {
899  int attnum = lfirst_int(cur);
900  int m = attnum - 1;
901  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
902 
903  if (fieldno >= fldct)
904  ereport(ERROR,
905  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
906  errmsg("missing data for column \"%s\"",
907  NameStr(att->attname))));
908  string = field_strings[fieldno++];
909 
910  if (cstate->convert_select_flags &&
911  !cstate->convert_select_flags[m])
912  {
913  /* ignore input field, leaving column as NULL */
914  continue;
915  }
916 
917  if (cstate->opts.csv_mode)
918  {
919  if (string == NULL &&
920  cstate->opts.force_notnull_flags[m])
921  {
922  /*
923  * FORCE_NOT_NULL option is set and column is NULL -
924  * convert it to the NULL string.
925  */
926  string = cstate->opts.null_print;
927  }
928  else if (string != NULL && cstate->opts.force_null_flags[m]
929  && strcmp(string, cstate->opts.null_print) == 0)
930  {
931  /*
932  * FORCE_NULL option is set and column matches the NULL
933  * string. It must have been quoted, or otherwise the
934  * string would already have been set to NULL. Convert it
935  * to NULL as specified.
936  */
937  string = NULL;
938  }
939  }
940 
941  cstate->cur_attname = NameStr(att->attname);
942  cstate->cur_attval = string;
943 
944  if (string != NULL)
945  nulls[m] = false;
946 
947  if (cstate->defaults[m])
948  {
949  /*
950  * The caller must supply econtext and have switched into the
951  * per-tuple memory context in it.
952  */
953  Assert(econtext != NULL);
955 
956  values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
957  }
958 
959  /*
960  * If ON_ERROR is specified with IGNORE, skip rows with soft
961  * errors
962  */
963  else if (!InputFunctionCallSafe(&in_functions[m],
964  string,
965  typioparams[m],
966  att->atttypmod,
967  (Node *) cstate->escontext,
968  &values[m]))
969  {
971 
972  cstate->num_errors++;
973 
975  {
976  /*
977  * Since we emit line number and column info in the below
978  * notice message, we suppress error context information
979  * other than the relation name.
980  */
981  Assert(!cstate->relname_only);
982  cstate->relname_only = true;
983 
984  if (cstate->cur_attval)
985  {
986  char *attval;
987 
988  attval = CopyLimitPrintoutLength(cstate->cur_attval);
989  ereport(NOTICE,
990  errmsg("skipping row due to data type incompatibility at line %llu for column %s: \"%s\"",
991  (unsigned long long) cstate->cur_lineno,
992  cstate->cur_attname,
993  attval));
994  pfree(attval);
995  }
996  else
997  ereport(NOTICE,
998  errmsg("skipping row due to data type incompatibility at line %llu for column %s: null input",
999  (unsigned long long) cstate->cur_lineno,
1000  cstate->cur_attname));
1001 
1002  /* reset relname_only */
1003  cstate->relname_only = false;
1004  }
1005 
1006  return true;
1007  }
1008 
1009  cstate->cur_attname = NULL;
1010  cstate->cur_attval = NULL;
1011  }
1012 
1013  Assert(fieldno == attr_count);
1014  }
1015  else
1016  {
1017  /* binary */
1018  int16 fld_count;
1019  ListCell *cur;
1020 
1021  cstate->cur_lineno++;
1022 
1023  if (!CopyGetInt16(cstate, &fld_count))
1024  {
1025  /* EOF detected (end of file, or protocol-level EOF) */
1026  return false;
1027  }
1028 
1029  if (fld_count == -1)
1030  {
1031  /*
1032  * Received EOF marker. Wait for the protocol-level EOF, and
1033  * complain if it doesn't come immediately. In COPY FROM STDIN,
1034  * this ensures that we correctly handle CopyFail, if client
1035  * chooses to send that now. When copying from file, we could
1036  * ignore the rest of the file like in text mode, but we choose to
1037  * be consistent with the COPY FROM STDIN case.
1038  */
1039  char dummy;
1040 
1041  if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1042  ereport(ERROR,
1043  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1044  errmsg("received copy data after EOF marker")));
1045  return false;
1046  }
1047 
1048  if (fld_count != attr_count)
1049  ereport(ERROR,
1050  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1051  errmsg("row field count is %d, expected %d",
1052  (int) fld_count, attr_count)));
1053 
1054  foreach(cur, cstate->attnumlist)
1055  {
1056  int attnum = lfirst_int(cur);
1057  int m = attnum - 1;
1058  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1059 
1060  cstate->cur_attname = NameStr(att->attname);
1061  values[m] = CopyReadBinaryAttribute(cstate,
1062  &in_functions[m],
1063  typioparams[m],
1064  att->atttypmod,
1065  &nulls[m]);
1066  cstate->cur_attname = NULL;
1067  }
1068  }
1069 
1070  /*
1071  * Now compute and insert any defaults available for the columns not
1072  * provided by the input data. Anything not processed here or above will
1073  * remain NULL.
1074  */
1075  for (i = 0; i < num_defaults; i++)
1076  {
1077  /*
1078  * The caller must supply econtext and have switched into the
1079  * per-tuple memory context in it.
1080  */
1081  Assert(econtext != NULL);
1083 
1084  values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
1085  &nulls[defmap[i]]);
1086  }
1087 
1088  return true;
1089 }
1090 
1091 /*
1092  * Read the next input line and stash it in line_buf.
1093  *
1094  * Result is true if read was terminated by EOF, false if terminated
1095  * by newline. The terminating newline or EOF marker is not included
1096  * in the final value of line_buf.
1097  */
1098 static bool
1100 {
1101  bool result;
1102 
1103  resetStringInfo(&cstate->line_buf);
1104  cstate->line_buf_valid = false;
1105 
1106  /* Parse data and transfer into line_buf */
1107  result = CopyReadLineText(cstate);
1108 
1109  if (result)
1110  {
1111  /*
1112  * Reached EOF. In protocol version 3, we should ignore anything
1113  * after \. up to the protocol end of copy data. (XXX maybe better
1114  * not to treat \. as special?)
1115  */
1116  if (cstate->copy_src == COPY_FRONTEND)
1117  {
1118  int inbytes;
1119 
1120  do
1121  {
1122  inbytes = CopyGetData(cstate, cstate->input_buf,
1123  1, INPUT_BUF_SIZE);
1124  } while (inbytes > 0);
1125  cstate->input_buf_index = 0;
1126  cstate->input_buf_len = 0;
1127  cstate->raw_buf_index = 0;
1128  cstate->raw_buf_len = 0;
1129  }
1130  }
1131  else
1132  {
1133  /*
1134  * If we didn't hit EOF, then we must have transferred the EOL marker
1135  * to line_buf along with the data. Get rid of it.
1136  */
1137  switch (cstate->eol_type)
1138  {
1139  case EOL_NL:
1140  Assert(cstate->line_buf.len >= 1);
1141  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1142  cstate->line_buf.len--;
1143  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1144  break;
1145  case EOL_CR:
1146  Assert(cstate->line_buf.len >= 1);
1147  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1148  cstate->line_buf.len--;
1149  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1150  break;
1151  case EOL_CRNL:
1152  Assert(cstate->line_buf.len >= 2);
1153  Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1154  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1155  cstate->line_buf.len -= 2;
1156  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1157  break;
1158  case EOL_UNKNOWN:
1159  /* shouldn't get here */
1160  Assert(false);
1161  break;
1162  }
1163  }
1164 
1165  /* Now it's safe to use the buffer in error messages */
1166  cstate->line_buf_valid = true;
1167 
1168  return result;
1169 }
1170 
1171 /*
1172  * CopyReadLineText - inner loop of CopyReadLine for text mode
1173  */
1174 static bool
1176 {
1177  char *copy_input_buf;
1178  int input_buf_ptr;
1179  int copy_buf_len;
1180  bool need_data = false;
1181  bool hit_eof = false;
1182  bool result = false;
1183 
1184  /* CSV variables */
1185  bool first_char_in_line = true;
1186  bool in_quote = false,
1187  last_was_esc = false;
1188  char quotec = '\0';
1189  char escapec = '\0';
1190 
1191  if (cstate->opts.csv_mode)
1192  {
1193  quotec = cstate->opts.quote[0];
1194  escapec = cstate->opts.escape[0];
1195  /* ignore special escape processing if it's the same as quotec */
1196  if (quotec == escapec)
1197  escapec = '\0';
1198  }
1199 
1200  /*
1201  * The objective of this loop is to transfer the entire next input line
1202  * into line_buf. Hence, we only care for detecting newlines (\r and/or
1203  * \n) and the end-of-copy marker (\.).
1204  *
1205  * In CSV mode, \r and \n inside a quoted field are just part of the data
1206  * value and are put in line_buf. We keep just enough state to know if we
1207  * are currently in a quoted field or not.
1208  *
1209  * The input has already been converted to the database encoding. All
1210  * supported server encodings have the property that all bytes in a
1211  * multi-byte sequence have the high bit set, so a multibyte character
1212  * cannot contain any newline or escape characters embedded in the
1213  * multibyte sequence. Therefore, we can process the input byte-by-byte,
1214  * regardless of the encoding.
1215  *
1216  * For speed, we try to move data from input_buf to line_buf in chunks
1217  * rather than one character at a time. input_buf_ptr points to the next
1218  * character to examine; any characters from input_buf_index to
1219  * input_buf_ptr have been determined to be part of the line, but not yet
1220  * transferred to line_buf.
1221  *
1222  * For a little extra speed within the loop, we copy input_buf and
1223  * input_buf_len into local variables.
1224  */
1225  copy_input_buf = cstate->input_buf;
1226  input_buf_ptr = cstate->input_buf_index;
1227  copy_buf_len = cstate->input_buf_len;
1228 
1229  for (;;)
1230  {
1231  int prev_raw_ptr;
1232  char c;
1233 
1234  /*
1235  * Load more data if needed.
1236  *
1237  * TODO: We could just force four bytes of read-ahead and avoid the
1238  * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1239  * unsafe with the old v2 COPY protocol, but we don't support that
1240  * anymore.
1241  */
1242  if (input_buf_ptr >= copy_buf_len || need_data)
1243  {
1245 
1246  CopyLoadInputBuf(cstate);
1247  /* update our local variables */
1248  hit_eof = cstate->input_reached_eof;
1249  input_buf_ptr = cstate->input_buf_index;
1250  copy_buf_len = cstate->input_buf_len;
1251 
1252  /*
1253  * If we are completely out of data, break out of the loop,
1254  * reporting EOF.
1255  */
1256  if (INPUT_BUF_BYTES(cstate) <= 0)
1257  {
1258  result = true;
1259  break;
1260  }
1261  need_data = false;
1262  }
1263 
1264  /* OK to fetch a character */
1265  prev_raw_ptr = input_buf_ptr;
1266  c = copy_input_buf[input_buf_ptr++];
1267 
1268  if (cstate->opts.csv_mode)
1269  {
1270  /*
1271  * If character is '\\' or '\r', we may need to look ahead below.
1272  * Force fetch of the next character if we don't already have it.
1273  * We need to do this before changing CSV state, in case one of
1274  * these characters is also the quote or escape character.
1275  */
1276  if (c == '\\' || c == '\r')
1277  {
1279  }
1280 
1281  /*
1282  * Dealing with quotes and escapes here is mildly tricky. If the
1283  * quote char is also the escape char, there's no problem - we
1284  * just use the char as a toggle. If they are different, we need
1285  * to ensure that we only take account of an escape inside a
1286  * quoted field and immediately preceding a quote char, and not
1287  * the second in an escape-escape sequence.
1288  */
1289  if (in_quote && c == escapec)
1290  last_was_esc = !last_was_esc;
1291  if (c == quotec && !last_was_esc)
1292  in_quote = !in_quote;
1293  if (c != escapec)
1294  last_was_esc = false;
1295 
1296  /*
1297  * Updating the line count for embedded CR and/or LF chars is
1298  * necessarily a little fragile - this test is probably about the
1299  * best we can do. (XXX it's arguable whether we should do this
1300  * at all --- is cur_lineno a physical or logical count?)
1301  */
1302  if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1303  cstate->cur_lineno++;
1304  }
1305 
1306  /* Process \r */
1307  if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1308  {
1309  /* Check for \r\n on first line, _and_ handle \r\n. */
1310  if (cstate->eol_type == EOL_UNKNOWN ||
1311  cstate->eol_type == EOL_CRNL)
1312  {
1313  /*
1314  * If need more data, go back to loop top to load it.
1315  *
1316  * Note that if we are at EOF, c will wind up as '\0' because
1317  * of the guaranteed pad of input_buf.
1318  */
1320 
1321  /* get next char */
1322  c = copy_input_buf[input_buf_ptr];
1323 
1324  if (c == '\n')
1325  {
1326  input_buf_ptr++; /* eat newline */
1327  cstate->eol_type = EOL_CRNL; /* in case not set yet */
1328  }
1329  else
1330  {
1331  /* found \r, but no \n */
1332  if (cstate->eol_type == EOL_CRNL)
1333  ereport(ERROR,
1334  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1335  !cstate->opts.csv_mode ?
1336  errmsg("literal carriage return found in data") :
1337  errmsg("unquoted carriage return found in data"),
1338  !cstate->opts.csv_mode ?
1339  errhint("Use \"\\r\" to represent carriage return.") :
1340  errhint("Use quoted CSV field to represent carriage return.")));
1341 
1342  /*
1343  * if we got here, it is the first line and we didn't find
1344  * \n, so don't consume the peeked character
1345  */
1346  cstate->eol_type = EOL_CR;
1347  }
1348  }
1349  else if (cstate->eol_type == EOL_NL)
1350  ereport(ERROR,
1351  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1352  !cstate->opts.csv_mode ?
1353  errmsg("literal carriage return found in data") :
1354  errmsg("unquoted carriage return found in data"),
1355  !cstate->opts.csv_mode ?
1356  errhint("Use \"\\r\" to represent carriage return.") :
1357  errhint("Use quoted CSV field to represent carriage return.")));
1358  /* If reach here, we have found the line terminator */
1359  break;
1360  }
1361 
1362  /* Process \n */
1363  if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1364  {
1365  if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1366  ereport(ERROR,
1367  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1368  !cstate->opts.csv_mode ?
1369  errmsg("literal newline found in data") :
1370  errmsg("unquoted newline found in data"),
1371  !cstate->opts.csv_mode ?
1372  errhint("Use \"\\n\" to represent newline.") :
1373  errhint("Use quoted CSV field to represent newline.")));
1374  cstate->eol_type = EOL_NL; /* in case not set yet */
1375  /* If reach here, we have found the line terminator */
1376  break;
1377  }
1378 
1379  /*
1380  * In CSV mode, we only recognize \. alone on a line. This is because
1381  * \. is a valid CSV data value.
1382  */
1383  if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
1384  {
1385  char c2;
1386 
1389 
1390  /* -----
1391  * get next character
1392  * Note: we do not change c so if it isn't \., we can fall
1393  * through and continue processing.
1394  * -----
1395  */
1396  c2 = copy_input_buf[input_buf_ptr];
1397 
1398  if (c2 == '.')
1399  {
1400  input_buf_ptr++; /* consume the '.' */
1401 
1402  /*
1403  * Note: if we loop back for more data here, it does not
1404  * matter that the CSV state change checks are re-executed; we
1405  * will come back here with no important state changed.
1406  */
1407  if (cstate->eol_type == EOL_CRNL)
1408  {
1409  /* Get the next character */
1411  /* if hit_eof, c2 will become '\0' */
1412  c2 = copy_input_buf[input_buf_ptr++];
1413 
1414  if (c2 == '\n')
1415  {
1416  if (!cstate->opts.csv_mode)
1417  ereport(ERROR,
1418  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1419  errmsg("end-of-copy marker does not match previous newline style")));
1420  else
1422  }
1423  else if (c2 != '\r')
1424  {
1425  if (!cstate->opts.csv_mode)
1426  ereport(ERROR,
1427  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1428  errmsg("end-of-copy marker corrupt")));
1429  else
1431  }
1432  }
1433 
1434  /* Get the next character */
1436  /* if hit_eof, c2 will become '\0' */
1437  c2 = copy_input_buf[input_buf_ptr++];
1438 
1439  if (c2 != '\r' && c2 != '\n')
1440  {
1441  if (!cstate->opts.csv_mode)
1442  ereport(ERROR,
1443  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1444  errmsg("end-of-copy marker corrupt")));
1445  else
1447  }
1448 
1449  if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1450  (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1451  (cstate->eol_type == EOL_CR && c2 != '\r'))
1452  {
1453  ereport(ERROR,
1454  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1455  errmsg("end-of-copy marker does not match previous newline style")));
1456  }
1457 
1458  /*
1459  * Transfer only the data before the \. into line_buf, then
1460  * discard the data and the \. sequence.
1461  */
1462  if (prev_raw_ptr > cstate->input_buf_index)
1464  cstate->input_buf + cstate->input_buf_index,
1465  prev_raw_ptr - cstate->input_buf_index);
1466  cstate->input_buf_index = input_buf_ptr;
1467  result = true; /* report EOF */
1468  break;
1469  }
1470  else if (!cstate->opts.csv_mode)
1471  {
1472  /*
1473  * If we are here, it means we found a backslash followed by
1474  * something other than a period. In non-CSV mode, anything
1475  * after a backslash is special, so we skip over that second
1476  * character too. If we didn't do that \\. would be
1477  * considered an eof-of copy, while in non-CSV mode it is a
1478  * literal backslash followed by a period. In CSV mode,
1479  * backslashes are not special, so we want to process the
1480  * character after the backslash just like a normal character,
1481  * so we don't increment in those cases.
1482  */
1483  input_buf_ptr++;
1484  }
1485  }
1486 
1487  /*
1488  * This label is for CSV cases where \. appears at the start of a
1489  * line, but there is more text after it, meaning it was a data value.
1490  * We are more strict for \. in CSV mode because \. could be a data
1491  * value, while in non-CSV mode, \. cannot be a data value.
1492  */
1493 not_end_of_copy:
1494  first_char_in_line = false;
1495  } /* end of outer loop */
1496 
1497  /*
1498  * Transfer any still-uncopied data to line_buf.
1499  */
1501 
1502  return result;
1503 }
1504 
1505 /*
1506  * Return decimal value for a hexadecimal digit
1507  */
1508 static int
1510 {
1511  if (isdigit((unsigned char) hex))
1512  return hex - '0';
1513  else
1514  return tolower((unsigned char) hex) - 'a' + 10;
1515 }
1516 
1517 /*
1518  * Parse the current line into separate attributes (fields),
1519  * performing de-escaping as needed.
1520  *
1521  * The input is in line_buf. We use attribute_buf to hold the result
1522  * strings. cstate->raw_fields[k] is set to point to the k'th attribute
1523  * string, or NULL when the input matches the null marker string.
1524  * This array is expanded as necessary.
1525  *
1526  * (Note that the caller cannot check for nulls since the returned
1527  * string would be the post-de-escaping equivalent, which may look
1528  * the same as some valid data string.)
1529  *
1530  * delim is the column delimiter string (must be just one byte for now).
1531  * null_print is the null marker string. Note that this is compared to
1532  * the pre-de-escaped input string.
1533  *
1534  * The return value is the number of fields actually read.
1535  */
1536 static int
1538 {
1539  char delimc = cstate->opts.delim[0];
1540  int fieldno;
1541  char *output_ptr;
1542  char *cur_ptr;
1543  char *line_end_ptr;
1544 
1545  /*
1546  * We need a special case for zero-column tables: check that the input
1547  * line is empty, and return.
1548  */
1549  if (cstate->max_fields <= 0)
1550  {
1551  if (cstate->line_buf.len != 0)
1552  ereport(ERROR,
1553  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1554  errmsg("extra data after last expected column")));
1555  return 0;
1556  }
1557 
1558  resetStringInfo(&cstate->attribute_buf);
1559 
1560  /*
1561  * The de-escaped attributes will certainly not be longer than the input
1562  * data line, so we can just force attribute_buf to be large enough and
1563  * then transfer data without any checks for enough space. We need to do
1564  * it this way because enlarging attribute_buf mid-stream would invalidate
1565  * pointers already stored into cstate->raw_fields[].
1566  */
1567  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1568  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1569  output_ptr = cstate->attribute_buf.data;
1570 
1571  /* set pointer variables for loop */
1572  cur_ptr = cstate->line_buf.data;
1573  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1574 
1575  /* Outer loop iterates over fields */
1576  fieldno = 0;
1577  for (;;)
1578  {
1579  bool found_delim = false;
1580  char *start_ptr;
1581  char *end_ptr;
1582  int input_len;
1583  bool saw_non_ascii = false;
1584 
1585  /* Make sure there is enough space for the next value */
1586  if (fieldno >= cstate->max_fields)
1587  {
1588  cstate->max_fields *= 2;
1589  cstate->raw_fields =
1590  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1591  }
1592 
1593  /* Remember start of field on both input and output sides */
1594  start_ptr = cur_ptr;
1595  cstate->raw_fields[fieldno] = output_ptr;
1596 
1597  /*
1598  * Scan data for field.
1599  *
1600  * Note that in this loop, we are scanning to locate the end of field
1601  * and also speculatively performing de-escaping. Once we find the
1602  * end-of-field, we can match the raw field contents against the null
1603  * marker string. Only after that comparison fails do we know that
1604  * de-escaping is actually the right thing to do; therefore we *must
1605  * not* throw any syntax errors before we've done the null-marker
1606  * check.
1607  */
1608  for (;;)
1609  {
1610  char c;
1611 
1612  end_ptr = cur_ptr;
1613  if (cur_ptr >= line_end_ptr)
1614  break;
1615  c = *cur_ptr++;
1616  if (c == delimc)
1617  {
1618  found_delim = true;
1619  break;
1620  }
1621  if (c == '\\')
1622  {
1623  if (cur_ptr >= line_end_ptr)
1624  break;
1625  c = *cur_ptr++;
1626  switch (c)
1627  {
1628  case '0':
1629  case '1':
1630  case '2':
1631  case '3':
1632  case '4':
1633  case '5':
1634  case '6':
1635  case '7':
1636  {
1637  /* handle \013 */
1638  int val;
1639 
1640  val = OCTVALUE(c);
1641  if (cur_ptr < line_end_ptr)
1642  {
1643  c = *cur_ptr;
1644  if (ISOCTAL(c))
1645  {
1646  cur_ptr++;
1647  val = (val << 3) + OCTVALUE(c);
1648  if (cur_ptr < line_end_ptr)
1649  {
1650  c = *cur_ptr;
1651  if (ISOCTAL(c))
1652  {
1653  cur_ptr++;
1654  val = (val << 3) + OCTVALUE(c);
1655  }
1656  }
1657  }
1658  }
1659  c = val & 0377;
1660  if (c == '\0' || IS_HIGHBIT_SET(c))
1661  saw_non_ascii = true;
1662  }
1663  break;
1664  case 'x':
1665  /* Handle \x3F */
1666  if (cur_ptr < line_end_ptr)
1667  {
1668  char hexchar = *cur_ptr;
1669 
1670  if (isxdigit((unsigned char) hexchar))
1671  {
1672  int val = GetDecimalFromHex(hexchar);
1673 
1674  cur_ptr++;
1675  if (cur_ptr < line_end_ptr)
1676  {
1677  hexchar = *cur_ptr;
1678  if (isxdigit((unsigned char) hexchar))
1679  {
1680  cur_ptr++;
1681  val = (val << 4) + GetDecimalFromHex(hexchar);
1682  }
1683  }
1684  c = val & 0xff;
1685  if (c == '\0' || IS_HIGHBIT_SET(c))
1686  saw_non_ascii = true;
1687  }
1688  }
1689  break;
1690  case 'b':
1691  c = '\b';
1692  break;
1693  case 'f':
1694  c = '\f';
1695  break;
1696  case 'n':
1697  c = '\n';
1698  break;
1699  case 'r':
1700  c = '\r';
1701  break;
1702  case 't':
1703  c = '\t';
1704  break;
1705  case 'v':
1706  c = '\v';
1707  break;
1708 
1709  /*
1710  * in all other cases, take the char after '\'
1711  * literally
1712  */
1713  }
1714  }
1715 
1716  /* Add c to output string */
1717  *output_ptr++ = c;
1718  }
1719 
1720  /* Check whether raw input matched null marker */
1721  input_len = end_ptr - start_ptr;
1722  if (input_len == cstate->opts.null_print_len &&
1723  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1724  cstate->raw_fields[fieldno] = NULL;
1725  /* Check whether raw input matched default marker */
1726  else if (fieldno < list_length(cstate->attnumlist) &&
1727  cstate->opts.default_print &&
1728  input_len == cstate->opts.default_print_len &&
1729  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1730  {
1731  /* fieldno is 0-indexed and attnum is 1-indexed */
1732  int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1733 
1734  if (cstate->defexprs[m] != NULL)
1735  {
1736  /* defaults contain entries for all physical attributes */
1737  cstate->defaults[m] = true;
1738  }
1739  else
1740  {
1741  TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1742  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1743 
1744  ereport(ERROR,
1745  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1746  errmsg("unexpected default marker in COPY data"),
1747  errdetail("Column \"%s\" has no default value.",
1748  NameStr(att->attname))));
1749  }
1750  }
1751  else
1752  {
1753  /*
1754  * At this point we know the field is supposed to contain data.
1755  *
1756  * If we de-escaped any non-7-bit-ASCII chars, make sure the
1757  * resulting string is valid data for the db encoding.
1758  */
1759  if (saw_non_ascii)
1760  {
1761  char *fld = cstate->raw_fields[fieldno];
1762 
1763  pg_verifymbstr(fld, output_ptr - fld, false);
1764  }
1765  }
1766 
1767  /* Terminate attribute value in output area */
1768  *output_ptr++ = '\0';
1769 
1770  fieldno++;
1771  /* Done if we hit EOL instead of a delim */
1772  if (!found_delim)
1773  break;
1774  }
1775 
1776  /* Clean up state of attribute_buf */
1777  output_ptr--;
1778  Assert(*output_ptr == '\0');
1779  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1780 
1781  return fieldno;
1782 }
1783 
1784 /*
1785  * Parse the current line into separate attributes (fields),
1786  * performing de-escaping as needed. This has exactly the same API as
1787  * CopyReadAttributesText, except we parse the fields according to
1788  * "standard" (i.e. common) CSV usage.
1789  */
1790 static int
1792 {
1793  char delimc = cstate->opts.delim[0];
1794  char quotec = cstate->opts.quote[0];
1795  char escapec = cstate->opts.escape[0];
1796  int fieldno;
1797  char *output_ptr;
1798  char *cur_ptr;
1799  char *line_end_ptr;
1800 
1801  /*
1802  * We need a special case for zero-column tables: check that the input
1803  * line is empty, and return.
1804  */
1805  if (cstate->max_fields <= 0)
1806  {
1807  if (cstate->line_buf.len != 0)
1808  ereport(ERROR,
1809  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1810  errmsg("extra data after last expected column")));
1811  return 0;
1812  }
1813 
1814  resetStringInfo(&cstate->attribute_buf);
1815 
1816  /*
1817  * The de-escaped attributes will certainly not be longer than the input
1818  * data line, so we can just force attribute_buf to be large enough and
1819  * then transfer data without any checks for enough space. We need to do
1820  * it this way because enlarging attribute_buf mid-stream would invalidate
1821  * pointers already stored into cstate->raw_fields[].
1822  */
1823  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1824  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1825  output_ptr = cstate->attribute_buf.data;
1826 
1827  /* set pointer variables for loop */
1828  cur_ptr = cstate->line_buf.data;
1829  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1830 
1831  /* Outer loop iterates over fields */
1832  fieldno = 0;
1833  for (;;)
1834  {
1835  bool found_delim = false;
1836  bool saw_quote = false;
1837  char *start_ptr;
1838  char *end_ptr;
1839  int input_len;
1840 
1841  /* Make sure there is enough space for the next value */
1842  if (fieldno >= cstate->max_fields)
1843  {
1844  cstate->max_fields *= 2;
1845  cstate->raw_fields =
1846  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1847  }
1848 
1849  /* Remember start of field on both input and output sides */
1850  start_ptr = cur_ptr;
1851  cstate->raw_fields[fieldno] = output_ptr;
1852 
1853  /*
1854  * Scan data for field,
1855  *
1856  * The loop starts in "not quote" mode and then toggles between that
1857  * and "in quote" mode. The loop exits normally if it is in "not
1858  * quote" mode and a delimiter or line end is seen.
1859  */
1860  for (;;)
1861  {
1862  char c;
1863 
1864  /* Not in quote */
1865  for (;;)
1866  {
1867  end_ptr = cur_ptr;
1868  if (cur_ptr >= line_end_ptr)
1869  goto endfield;
1870  c = *cur_ptr++;
1871  /* unquoted field delimiter */
1872  if (c == delimc)
1873  {
1874  found_delim = true;
1875  goto endfield;
1876  }
1877  /* start of quoted field (or part of field) */
1878  if (c == quotec)
1879  {
1880  saw_quote = true;
1881  break;
1882  }
1883  /* Add c to output string */
1884  *output_ptr++ = c;
1885  }
1886 
1887  /* In quote */
1888  for (;;)
1889  {
1890  end_ptr = cur_ptr;
1891  if (cur_ptr >= line_end_ptr)
1892  ereport(ERROR,
1893  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1894  errmsg("unterminated CSV quoted field")));
1895 
1896  c = *cur_ptr++;
1897 
1898  /* escape within a quoted field */
1899  if (c == escapec)
1900  {
1901  /*
1902  * peek at the next char if available, and escape it if it
1903  * is an escape char or a quote char
1904  */
1905  if (cur_ptr < line_end_ptr)
1906  {
1907  char nextc = *cur_ptr;
1908 
1909  if (nextc == escapec || nextc == quotec)
1910  {
1911  *output_ptr++ = nextc;
1912  cur_ptr++;
1913  continue;
1914  }
1915  }
1916  }
1917 
1918  /*
1919  * end of quoted field. Must do this test after testing for
1920  * escape in case quote char and escape char are the same
1921  * (which is the common case).
1922  */
1923  if (c == quotec)
1924  break;
1925 
1926  /* Add c to output string */
1927  *output_ptr++ = c;
1928  }
1929  }
1930 endfield:
1931 
1932  /* Terminate attribute value in output area */
1933  *output_ptr++ = '\0';
1934 
1935  /* Check whether raw input matched null marker */
1936  input_len = end_ptr - start_ptr;
1937  if (!saw_quote && input_len == cstate->opts.null_print_len &&
1938  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1939  cstate->raw_fields[fieldno] = NULL;
1940  /* Check whether raw input matched default marker */
1941  else if (fieldno < list_length(cstate->attnumlist) &&
1942  cstate->opts.default_print &&
1943  input_len == cstate->opts.default_print_len &&
1944  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1945  {
1946  /* fieldno is 0-index and attnum is 1-index */
1947  int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1948 
1949  if (cstate->defexprs[m] != NULL)
1950  {
1951  /* defaults contain entries for all physical attributes */
1952  cstate->defaults[m] = true;
1953  }
1954  else
1955  {
1956  TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1957  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1958 
1959  ereport(ERROR,
1960  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1961  errmsg("unexpected default marker in COPY data"),
1962  errdetail("Column \"%s\" has no default value.",
1963  NameStr(att->attname))));
1964  }
1965  }
1966 
1967  fieldno++;
1968  /* Done if we hit EOL instead of a delim */
1969  if (!found_delim)
1970  break;
1971  }
1972 
1973  /* Clean up state of attribute_buf */
1974  output_ptr--;
1975  Assert(*output_ptr == '\0');
1976  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1977 
1978  return fieldno;
1979 }
1980 
1981 
1982 /*
1983  * Read a binary attribute
1984  */
1985 static Datum
1987  Oid typioparam, int32 typmod,
1988  bool *isnull)
1989 {
1990  int32 fld_size;
1991  Datum result;
1992 
1993  if (!CopyGetInt32(cstate, &fld_size))
1994  ereport(ERROR,
1995  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1996  errmsg("unexpected EOF in COPY data")));
1997  if (fld_size == -1)
1998  {
1999  *isnull = true;
2000  return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
2001  }
2002  if (fld_size < 0)
2003  ereport(ERROR,
2004  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2005  errmsg("invalid field size")));
2006 
2007  /* reset attribute_buf to empty, and load raw data in it */
2008  resetStringInfo(&cstate->attribute_buf);
2009 
2010  enlargeStringInfo(&cstate->attribute_buf, fld_size);
2011  if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
2012  fld_size) != fld_size)
2013  ereport(ERROR,
2014  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2015  errmsg("unexpected EOF in COPY data")));
2016 
2017  cstate->attribute_buf.len = fld_size;
2018  cstate->attribute_buf.data[fld_size] = '\0';
2019 
2020  /* Call the column type's binary input converter */
2021  result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
2022  typioparam, typmod);
2023 
2024  /* Trouble if it didn't eat the whole buffer */
2025  if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
2026  ereport(ERROR,
2027  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
2028  errmsg("incorrect binary data format")));
2029 
2030  *isnull = false;
2031  return result;
2032 }
int16 AttrNumber
Definition: attnum.h:21
void pgstat_progress_update_param(int index, int64 val)
static Datum values[MAXATTR]
Definition: bootstrap.c:152
#define NameStr(name)
Definition: c.h:746
unsigned short uint16
Definition: c.h:505
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
signed short int16
Definition: c.h:493
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1155
signed int int32
Definition: c.h:494
#define Assert(condition)
Definition: c.h:858
#define MemSet(start, val, len)
Definition: c.h:1020
char * CopyLimitPrintoutLength(const char *str)
Definition: copyfrom.c:191
#define RAW_BUF_BYTES(cstate)
#define INPUT_BUF_SIZE
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
#define INPUT_BUF_BYTES(cstate)
#define RAW_BUF_SIZE
static int CopyReadAttributesCSV(CopyFromState cstate)
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
static void CopyConversionError(CopyFromState cstate)
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
static void CopyLoadRawBuf(CopyFromState cstate)
#define OCTVALUE(c)
Definition: copyfromparse.c:80
#define REFILL_LINEBUF
#define NO_END_OF_COPY_GOTO
static void CopyLoadInputBuf(CopyFromState cstate)
#define ISOCTAL(c)
Definition: copyfromparse.c:79
void ReceiveCopyBinaryHeader(CopyFromState cstate)
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
static bool CopyReadLineText(CopyFromState cstate)
static int GetDecimalFromHex(char hex)
void ReceiveCopyBegin(CopyFromState cstate)
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
static int CopyReadAttributesText(CopyFromState cstate)
static const char BinarySignature[11]
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:98
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
static bool CopyReadLine(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)
bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
@ COPY_FILE
Definition: copyto.c:45
@ COPY_CALLBACK
Definition: copyto.c:47
@ COPY_FRONTEND
Definition: copyto.c:46
struct cursor * cur
Definition: ecpg.c:28
int errcode_for_file_access(void)
Definition: elog.c:882
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:333
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, fmNodePtr escontext, Datum *result)
Definition: fmgr.c:1585
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1697
@ COPY_ON_ERROR_STOP
Definition: copy.h:39
@ COPY_LOG_VERBOSITY_VERBOSE
Definition: copy.h:49
@ COPY_HEADER_MATCH
Definition: copy.h:30
long val
Definition: informix.c:670
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
#define pq_flush()
Definition: libpq.h:46
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:469
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
void pfree(void *pointer)
Definition: mcxt.c:1520
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:143
int namestrcmp(Name name, const char *str)
Definition: name.c:247
int16 attnum
Definition: pg_attribute.h:74
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
static char format
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static int list_length(const List *l)
Definition: pg_list.h:152
#define lfirst_int(lc)
Definition: pg_list.h:173
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
static char * buf
Definition: pg_test_fsync.c:73
#define MAX_CONVERSION_INPUT_LENGTH
Definition: pg_wchar.h:320
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1202
int pq_getbyte(void)
Definition: pqcomm.c:963
void pq_startmsgread(void)
Definition: pqcomm.c:1140
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:579
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:528
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:296
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:88
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:160
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:136
char * c
char string[11]
Definition: preproc-type.c:52
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:139
#define PqMsg_CopyDone
Definition: protocol.h:64
#define PqMsg_CopyData
Definition: protocol.h:65
#define PqMsg_CopyInResponse
Definition: protocol.h:45
#define PqMsg_Sync
Definition: protocol.h:27
#define PqMsg_CopyFail
Definition: protocol.h:29
#define PqMsg_Flush
Definition: protocol.h:24
#define RelationGetDescr(relation)
Definition: rel.h:531
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:78
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:289
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
int default_print_len
Definition: copy.h:70
bool binary
Definition: copy.h:62
int null_print_len
Definition: copy.h:67
CopyLogVerbosityChoice log_verbosity
Definition: copy.h:85
char * quote
Definition: copy.h:72
CopyOnErrorChoice on_error
Definition: copy.h:84
CopyHeaderChoice header_line
Definition: copy.h:65
char * escape
Definition: copy.h:73
char * null_print
Definition: copy.h:66
char * delim
Definition: copy.h:71
bool * force_notnull_flags
Definition: copy.h:79
bool csv_mode
Definition: copy.h:64
bool * force_null_flags
Definition: copy.h:82
char * default_print
Definition: copy.h:69
copy_data_source_cb data_source_cb
StringInfoData line_buf
CopyFormatOptions opts
StringInfoData attribute_buf
const char * cur_attval
const char * cur_attname
ErrorSaveContext * escontext
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:263
Definition: fmgr.h:57
Definition: nodes.h:129
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition: wchar.c:2116
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2127