PostgreSQL Source Code git master
copyfromparse.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * copyfromparse.c
4 * Parse CSV/text/binary format for COPY FROM.
5 *
6 * This file contains routines to parse the text, CSV and binary input
7 * formats. The main entry point is NextCopyFrom(), which parses the
8 * next input line and returns it as Datums.
9 *
10 * In text/CSV mode, the parsing happens in multiple stages:
11 *
12 * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
13 * 1. 2. 3. 4.
14 *
15 * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
16 * places it into 'raw_buf'.
17 *
18 * 2. CopyConvertBuf() calls the encoding conversion function to convert
19 * the data in 'raw_buf' from client to server encoding, placing the
20 * converted result in 'input_buf'.
21 *
22 * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
23 * It is responsible for finding the next newline marker, taking quote and
24 * escape characters into account according to the COPY options. The line
25 * is copied into 'line_buf', with quotes and escape characters still
26 * intact.
27 *
28 * 4. CopyReadAttributesText/CSV() function takes the input line from
29 * 'line_buf', and splits it into fields, unescaping the data as required.
30 * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
31 * pointers to each field.
32 *
33 * If encoding conversion is not required, a shortcut is taken in step 2 to
34 * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
35 * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
36 * directly into 'input_buf'. CopyConvertBuf() then merely validates that
37 * the data is valid in the current encoding.
38 *
39 * In binary mode, the pipeline is much simpler. Input is loaded into
40 * 'raw_buf', and encoding conversion is done in the datatype-specific
41 * receive functions, if required. 'input_buf' and 'line_buf' are not used,
42 * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
43 * data when it's passed the receive function.
44 *
45 * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also
46 * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'
47 * and 'attribute_buf' are expanded on demand, to hold the longest line
48 * encountered so far.
49 *
50 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
51 * Portions Copyright (c) 1994, Regents of the University of California
52 *
53 *
54 * IDENTIFICATION
55 * src/backend/commands/copyfromparse.c
56 *
57 *-------------------------------------------------------------------------
58 */
59#include "postgres.h"
60
61#include <ctype.h>
62#include <unistd.h>
63#include <sys/stat.h>
64
65#include "commands/copy.h"
67#include "commands/progress.h"
68#include "executor/executor.h"
69#include "libpq/libpq.h"
70#include "libpq/pqformat.h"
71#include "mb/pg_wchar.h"
72#include "miscadmin.h"
73#include "pgstat.h"
74#include "port/pg_bswap.h"
75#include "utils/builtins.h"
76#include "utils/rel.h"
77
78#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
79#define OCTVALUE(c) ((c) - '0')
80
81/*
82 * These macros centralize code used to process line_buf and input_buf buffers.
83 * They are macros because they often do continue/break control and to avoid
84 * function call overhead in tight COPY loops.
85 *
86 * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
87 * prevent the continue/break processing from working. We end the "if (1)"
88 * with "else ((void) 0)" to ensure the "if" does not unintentionally match
89 * any "else" in the calling code, and to avoid any compiler warnings about
90 * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
91 */
92
93/*
94 * This keeps the character read at the top of the loop in the buffer
95 * even if there is more than one read-ahead.
96 */
97#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
98if (1) \
99{ \
100 if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
101 { \
102 input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
103 need_data = true; \
104 continue; \
105 } \
106} else ((void) 0)
107
108/* This consumes the remainder of the buffer and breaks */
109#define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
110if (1) \
111{ \
112 if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
113 { \
114 if (extralen) \
115 input_buf_ptr = copy_buf_len; /* consume the partial character */ \
116 /* backslash just before EOF, treat as data char */ \
117 result = true; \
118 break; \
119 } \
120} else ((void) 0)
121
122/*
123 * Transfer any approved data to line_buf; must do this to be sure
124 * there is some room in input_buf.
125 */
126#define REFILL_LINEBUF \
127if (1) \
128{ \
129 if (input_buf_ptr > cstate->input_buf_index) \
130 { \
131 appendBinaryStringInfo(&cstate->line_buf, \
132 cstate->input_buf + cstate->input_buf_index, \
133 input_buf_ptr - cstate->input_buf_index); \
134 cstate->input_buf_index = input_buf_ptr; \
135 } \
136} else ((void) 0)
137
138/* NOTE: there's a copy of this in copyto.c */
139static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
140
141
142/* non-export function prototypes */
143static bool CopyReadLine(CopyFromState cstate);
144static bool CopyReadLineText(CopyFromState cstate);
145static int CopyReadAttributesText(CopyFromState cstate);
146static int CopyReadAttributesCSV(CopyFromState cstate);
148 Oid typioparam, int32 typmod,
149 bool *isnull);
150
151
152/* Low-level communications functions */
153static int CopyGetData(CopyFromState cstate, void *databuf,
154 int minread, int maxread);
155static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
156static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
157static void CopyLoadInputBuf(CopyFromState cstate);
158static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
159
160void
162{
164 int natts = list_length(cstate->attnumlist);
165 int16 format = (cstate->opts.binary ? 1 : 0);
166 int i;
167
169 pq_sendbyte(&buf, format); /* overall format */
170 pq_sendint16(&buf, natts);
171 for (i = 0; i < natts; i++)
172 pq_sendint16(&buf, format); /* per-column formats */
174 cstate->copy_src = COPY_FRONTEND;
175 cstate->fe_msgbuf = makeStringInfo();
176 /* We *must* flush here to ensure FE knows it can send. */
177 pq_flush();
178}
179
180void
182{
183 char readSig[11];
184 int32 tmp;
185
186 /* Signature */
187 if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
188 memcmp(readSig, BinarySignature, 11) != 0)
190 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
191 errmsg("COPY file signature not recognized")));
192 /* Flags field */
193 if (!CopyGetInt32(cstate, &tmp))
195 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
196 errmsg("invalid COPY file header (missing flags)")));
197 if ((tmp & (1 << 16)) != 0)
199 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
200 errmsg("invalid COPY file header (WITH OIDS)")));
201 tmp &= ~(1 << 16);
202 if ((tmp >> 16) != 0)
204 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
205 errmsg("unrecognized critical flags in COPY file header")));
206 /* Header extension length */
207 if (!CopyGetInt32(cstate, &tmp) ||
208 tmp < 0)
210 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
211 errmsg("invalid COPY file header (missing length)")));
212 /* Skip extension header, if present */
213 while (tmp-- > 0)
214 {
215 if (CopyReadBinaryData(cstate, readSig, 1) != 1)
217 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
218 errmsg("invalid COPY file header (wrong length)")));
219 }
220}
221
222/*
223 * CopyGetData reads data from the source (file or frontend)
224 *
225 * We attempt to read at least minread, and at most maxread, bytes from
226 * the source. The actual number of bytes read is returned; if this is
227 * less than minread, EOF was detected.
228 *
229 * Note: when copying from the frontend, we expect a proper EOF mark per
230 * protocol; if the frontend simply drops the connection, we raise error.
231 * It seems unwise to allow the COPY IN to complete normally in that case.
232 *
233 * NB: no data conversion is applied here.
234 */
235static int
236CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
237{
238 int bytesread = 0;
239
240 switch (cstate->copy_src)
241 {
242 case COPY_FILE:
243 bytesread = fread(databuf, 1, maxread, cstate->copy_file);
244 if (ferror(cstate->copy_file))
247 errmsg("could not read from COPY file: %m")));
248 if (bytesread == 0)
249 cstate->raw_reached_eof = true;
250 break;
251 case COPY_FRONTEND:
252 while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
253 {
254 int avail;
255
256 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
257 {
258 /* Try to receive another message */
259 int mtype;
260 int maxmsglen;
261
262 readmessage:
265 mtype = pq_getbyte();
266 if (mtype == EOF)
268 (errcode(ERRCODE_CONNECTION_FAILURE),
269 errmsg("unexpected EOF on client connection with an open transaction")));
270 /* Validate message type and set packet size limit */
271 switch (mtype)
272 {
273 case PqMsg_CopyData:
274 maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
275 break;
276 case PqMsg_CopyDone:
277 case PqMsg_CopyFail:
278 case PqMsg_Flush:
279 case PqMsg_Sync:
280 maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
281 break;
282 default:
284 (errcode(ERRCODE_PROTOCOL_VIOLATION),
285 errmsg("unexpected message type 0x%02X during COPY from stdin",
286 mtype)));
287 maxmsglen = 0; /* keep compiler quiet */
288 break;
289 }
290 /* Now collect the message body */
291 if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
293 (errcode(ERRCODE_CONNECTION_FAILURE),
294 errmsg("unexpected EOF on client connection with an open transaction")));
296 /* ... and process it */
297 switch (mtype)
298 {
299 case PqMsg_CopyData:
300 break;
301 case PqMsg_CopyDone:
302 /* COPY IN correctly terminated by frontend */
303 cstate->raw_reached_eof = true;
304 return bytesread;
305 case PqMsg_CopyFail:
307 (errcode(ERRCODE_QUERY_CANCELED),
308 errmsg("COPY from stdin failed: %s",
309 pq_getmsgstring(cstate->fe_msgbuf))));
310 break;
311 case PqMsg_Flush:
312 case PqMsg_Sync:
313
314 /*
315 * Ignore Flush/Sync for the convenience of client
316 * libraries (such as libpq) that may send those
317 * without noticing that the command they just
318 * sent was COPY.
319 */
320 goto readmessage;
321 default:
322 Assert(false); /* NOT REACHED */
323 }
324 }
325 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
326 if (avail > maxread)
327 avail = maxread;
328 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
329 databuf = (void *) ((char *) databuf + avail);
330 maxread -= avail;
331 bytesread += avail;
332 }
333 break;
334 case COPY_CALLBACK:
335 bytesread = cstate->data_source_cb(databuf, minread, maxread);
336 break;
337 }
338
339 return bytesread;
340}
341
342
343/*
344 * These functions do apply some data conversion
345 */
346
347/*
348 * CopyGetInt32 reads an int32 that appears in network byte order
349 *
350 * Returns true if OK, false if EOF
351 */
352static inline bool
354{
355 uint32 buf;
356
357 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
358 {
359 *val = 0; /* suppress compiler warning */
360 return false;
361 }
362 *val = (int32) pg_ntoh32(buf);
363 return true;
364}
365
366/*
367 * CopyGetInt16 reads an int16 that appears in network byte order
368 */
369static inline bool
371{
372 uint16 buf;
373
374 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
375 {
376 *val = 0; /* suppress compiler warning */
377 return false;
378 }
379 *val = (int16) pg_ntoh16(buf);
380 return true;
381}
382
383
384/*
385 * Perform encoding conversion on data in 'raw_buf', writing the converted
386 * data into 'input_buf'.
387 *
388 * On entry, there must be some data to convert in 'raw_buf'.
389 */
390static void
392{
393 /*
394 * If the file and server encoding are the same, no encoding conversion is
395 * required. However, we still need to verify that the input is valid for
396 * the encoding.
397 */
398 if (!cstate->need_transcoding)
399 {
400 /*
401 * When conversion is not required, input_buf and raw_buf are the
402 * same. raw_buf_len is the total number of bytes in the buffer, and
403 * input_buf_len tracks how many of those bytes have already been
404 * verified.
405 */
406 int preverifiedlen = cstate->input_buf_len;
407 int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
408 int nverified;
409
410 if (unverifiedlen == 0)
411 {
412 /*
413 * If no more raw data is coming, report the EOF to the caller.
414 */
415 if (cstate->raw_reached_eof)
416 cstate->input_reached_eof = true;
417 return;
418 }
419
420 /*
421 * Verify the new data, including any residual unverified bytes from
422 * previous round.
423 */
424 nverified = pg_encoding_verifymbstr(cstate->file_encoding,
425 cstate->raw_buf + preverifiedlen,
426 unverifiedlen);
427 if (nverified == 0)
428 {
429 /*
430 * Could not verify anything.
431 *
432 * If there is no more raw input data coming, it means that there
433 * was an incomplete multi-byte sequence at the end. Also, if
434 * there's "enough" input left, we should be able to verify at
435 * least one character, and a failure to do so means that we've
436 * hit an invalid byte sequence.
437 */
438 if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
439 cstate->input_reached_error = true;
440 return;
441 }
442 cstate->input_buf_len += nverified;
443 }
444 else
445 {
446 /*
447 * Encoding conversion is needed.
448 */
449 int nbytes;
450 unsigned char *src;
451 int srclen;
452 unsigned char *dst;
453 int dstlen;
454 int convertedlen;
455
456 if (RAW_BUF_BYTES(cstate) == 0)
457 {
458 /*
459 * If no more raw data is coming, report the EOF to the caller.
460 */
461 if (cstate->raw_reached_eof)
462 cstate->input_reached_eof = true;
463 return;
464 }
465
466 /*
467 * First, copy down any unprocessed data.
468 */
469 nbytes = INPUT_BUF_BYTES(cstate);
470 if (nbytes > 0 && cstate->input_buf_index > 0)
471 memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
472 nbytes);
473 cstate->input_buf_index = 0;
474 cstate->input_buf_len = nbytes;
475 cstate->input_buf[nbytes] = '\0';
476
477 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
478 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
479 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
480 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
481
482 /*
483 * Do the conversion. This might stop short, if there is an invalid
484 * byte sequence in the input. We'll convert as much as we can in
485 * that case.
486 *
487 * Note: Even if we hit an invalid byte sequence, we don't report the
488 * error until all the valid bytes have been consumed. The input
489 * might contain an end-of-input marker (\.), and we don't want to
490 * report an error if the invalid byte sequence is after the
491 * end-of-input marker. We might unnecessarily convert some data
492 * after the end-of-input marker as long as it's valid for the
493 * encoding, but that's harmless.
494 */
495 convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
496 cstate->file_encoding,
498 src, srclen,
499 dst, dstlen,
500 true);
501 if (convertedlen == 0)
502 {
503 /*
504 * Could not convert anything. If there is no more raw input data
505 * coming, it means that there was an incomplete multi-byte
506 * sequence at the end. Also, if there is plenty of input left,
507 * we should be able to convert at least one character, so a
508 * failure to do so must mean that we've hit a byte sequence
509 * that's invalid.
510 */
511 if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
512 cstate->input_reached_error = true;
513 return;
514 }
515 cstate->raw_buf_index += convertedlen;
516 cstate->input_buf_len += strlen((char *) dst);
517 }
518}
519
520/*
521 * Report an encoding or conversion error.
522 */
523static void
525{
526 Assert(cstate->raw_buf_len > 0);
528
529 if (!cstate->need_transcoding)
530 {
531 /*
532 * Everything up to input_buf_len was successfully verified, and
533 * input_buf_len points to the invalid or incomplete character.
534 */
536 cstate->raw_buf + cstate->input_buf_len,
537 cstate->raw_buf_len - cstate->input_buf_len);
538 }
539 else
540 {
541 /*
542 * raw_buf_index points to the invalid or untranslatable character. We
543 * let the conversion routine report the error, because it can provide
544 * a more specific error message than we could here. An earlier call
545 * to the conversion routine in CopyConvertBuf() detected that there
546 * is an error, now we call the conversion routine again with
547 * noError=false, to have it throw the error.
548 */
549 unsigned char *src;
550 int srclen;
551 unsigned char *dst;
552 int dstlen;
553
554 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
555 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
556 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
557 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
558
560 cstate->file_encoding,
562 src, srclen,
563 dst, dstlen,
564 false);
565
566 /*
567 * The conversion routine should have reported an error, so this
568 * should not be reached.
569 */
570 elog(ERROR, "encoding conversion failed without error");
571 }
572}
573
574/*
575 * Load more data from data source to raw_buf.
576 *
577 * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
578 * beginning of the buffer, and we load new data after that.
579 */
580static void
582{
583 int nbytes;
584 int inbytes;
585
586 /*
587 * In text mode, if encoding conversion is not required, raw_buf and
588 * input_buf point to the same buffer. Their len/index better agree, too.
589 */
590 if (cstate->raw_buf == cstate->input_buf)
591 {
592 Assert(!cstate->need_transcoding);
593 Assert(cstate->raw_buf_index == cstate->input_buf_index);
594 Assert(cstate->input_buf_len <= cstate->raw_buf_len);
595 }
596
597 /*
598 * Copy down the unprocessed data if any.
599 */
600 nbytes = RAW_BUF_BYTES(cstate);
601 if (nbytes > 0 && cstate->raw_buf_index > 0)
602 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
603 nbytes);
604 cstate->raw_buf_len -= cstate->raw_buf_index;
605 cstate->raw_buf_index = 0;
606
607 /*
608 * If raw_buf and input_buf are in fact the same buffer, adjust the
609 * input_buf variables, too.
610 */
611 if (cstate->raw_buf == cstate->input_buf)
612 {
613 cstate->input_buf_len -= cstate->input_buf_index;
614 cstate->input_buf_index = 0;
615 }
616
617 /* Load more data */
618 inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
619 1, RAW_BUF_SIZE - cstate->raw_buf_len);
620 nbytes += inbytes;
621 cstate->raw_buf[nbytes] = '\0';
622 cstate->raw_buf_len = nbytes;
623
624 cstate->bytes_processed += inbytes;
626
627 if (inbytes == 0)
628 cstate->raw_reached_eof = true;
629}
630
631/*
632 * CopyLoadInputBuf loads some more data into input_buf
633 *
634 * On return, at least one more input character is loaded into
635 * input_buf, or input_reached_eof is set.
636 *
637 * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
638 * of the buffer and then we load more data after that.
639 */
640static void
642{
643 int nbytes = INPUT_BUF_BYTES(cstate);
644
645 /*
646 * The caller has updated input_buf_index to indicate how much of the
647 * input has been consumed and isn't needed anymore. If input_buf is the
648 * same physical area as raw_buf, update raw_buf_index accordingly.
649 */
650 if (cstate->raw_buf == cstate->input_buf)
651 {
652 Assert(!cstate->need_transcoding);
653 Assert(cstate->input_buf_index >= cstate->raw_buf_index);
654 cstate->raw_buf_index = cstate->input_buf_index;
655 }
656
657 for (;;)
658 {
659 /* If we now have some unconverted data, try to convert it */
660 CopyConvertBuf(cstate);
661
662 /* If we now have some more input bytes ready, return them */
663 if (INPUT_BUF_BYTES(cstate) > nbytes)
664 return;
665
666 /*
667 * If we reached an invalid byte sequence, or we're at an incomplete
668 * multi-byte character but there is no more raw input data, report
669 * conversion error.
670 */
671 if (cstate->input_reached_error)
672 CopyConversionError(cstate);
673
674 /* no more input, and everything has been converted */
675 if (cstate->input_reached_eof)
676 break;
677
678 /* Try to load more raw data */
679 Assert(!cstate->raw_reached_eof);
680 CopyLoadRawBuf(cstate);
681 }
682}
683
684/*
685 * CopyReadBinaryData
686 *
687 * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
688 * and writes them to 'dest'. Returns the number of bytes read (which
689 * would be less than 'nbytes' only if we reach EOF).
690 */
691static int
692CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
693{
694 int copied_bytes = 0;
695
696 if (RAW_BUF_BYTES(cstate) >= nbytes)
697 {
698 /* Enough bytes are present in the buffer. */
699 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
700 cstate->raw_buf_index += nbytes;
701 copied_bytes = nbytes;
702 }
703 else
704 {
705 /*
706 * Not enough bytes in the buffer, so must read from the file. Need
707 * to loop since 'nbytes' could be larger than the buffer size.
708 */
709 do
710 {
711 int copy_bytes;
712
713 /* Load more data if buffer is empty. */
714 if (RAW_BUF_BYTES(cstate) == 0)
715 {
716 CopyLoadRawBuf(cstate);
717 if (cstate->raw_reached_eof)
718 break; /* EOF */
719 }
720
721 /* Transfer some bytes. */
722 copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
723 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
724 cstate->raw_buf_index += copy_bytes;
725 dest += copy_bytes;
726 copied_bytes += copy_bytes;
727 } while (copied_bytes < nbytes);
728 }
729
730 return copied_bytes;
731}
732
733/*
734 * Read raw fields in the next line for COPY FROM in text or csv mode.
735 * Return false if no more lines.
736 *
737 * An internal temporary buffer is returned via 'fields'. It is valid until
738 * the next call of the function. Since the function returns all raw fields
739 * in the input file, 'nfields' could be different from the number of columns
740 * in the relation.
741 *
742 * NOTE: force_not_null option are not applied to the returned fields.
743 */
744bool
745NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
746{
747 int fldct;
748 bool done;
749
750 /* only available for text or csv input */
751 Assert(!cstate->opts.binary);
752
753 /* on input check that the header line is correct if needed */
754 if (cstate->cur_lineno == 0 && cstate->opts.header_line)
755 {
756 ListCell *cur;
757 TupleDesc tupDesc;
758
759 tupDesc = RelationGetDescr(cstate->rel);
760
761 cstate->cur_lineno++;
762 done = CopyReadLine(cstate);
763
764 if (cstate->opts.header_line == COPY_HEADER_MATCH)
765 {
766 int fldnum;
767
768 if (cstate->opts.csv_mode)
769 fldct = CopyReadAttributesCSV(cstate);
770 else
771 fldct = CopyReadAttributesText(cstate);
772
773 if (fldct != list_length(cstate->attnumlist))
775 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
776 errmsg("wrong number of fields in header line: got %d, expected %d",
777 fldct, list_length(cstate->attnumlist))));
778
779 fldnum = 0;
780 foreach(cur, cstate->attnumlist)
781 {
782 int attnum = lfirst_int(cur);
783 char *colName;
784 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
785
786 Assert(fldnum < cstate->max_fields);
787
788 colName = cstate->raw_fields[fldnum++];
789 if (colName == NULL)
791 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
792 errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
793 fldnum, cstate->opts.null_print, NameStr(attr->attname))));
794
795 if (namestrcmp(&attr->attname, colName) != 0)
796 {
798 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
799 errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
800 fldnum, colName, NameStr(attr->attname))));
801 }
802 }
803 }
804
805 if (done)
806 return false;
807 }
808
809 cstate->cur_lineno++;
810
811 /* Actually read the line into memory here */
812 done = CopyReadLine(cstate);
813
814 /*
815 * EOF at start of line means we're done. If we see EOF after some
816 * characters, we act as though it was newline followed by EOF, ie,
817 * process the line and then exit loop on next iteration.
818 */
819 if (done && cstate->line_buf.len == 0)
820 return false;
821
822 /* Parse the line into de-escaped field values */
823 if (cstate->opts.csv_mode)
824 fldct = CopyReadAttributesCSV(cstate);
825 else
826 fldct = CopyReadAttributesText(cstate);
827
828 *fields = cstate->raw_fields;
829 *nfields = fldct;
830 return true;
831}
832
833/*
834 * Read next tuple from file for COPY FROM. Return false if no more tuples.
835 *
836 * 'econtext' is used to evaluate default expression for each column that is
837 * either not read from the file or is using the DEFAULT option of COPY FROM.
838 * It can be NULL when no default values are used, i.e. when all columns are
839 * read from the file, and DEFAULT option is unset.
840 *
841 * 'values' and 'nulls' arrays must be the same length as columns of the
842 * relation passed to BeginCopyFrom. This function fills the arrays.
843 */
844bool
846 Datum *values, bool *nulls)
847{
848 TupleDesc tupDesc;
849 AttrNumber num_phys_attrs,
850 attr_count,
851 num_defaults = cstate->num_defaults;
852 FmgrInfo *in_functions = cstate->in_functions;
853 Oid *typioparams = cstate->typioparams;
854 int i;
855 int *defmap = cstate->defmap;
856 ExprState **defexprs = cstate->defexprs;
857
858 tupDesc = RelationGetDescr(cstate->rel);
859 num_phys_attrs = tupDesc->natts;
860 attr_count = list_length(cstate->attnumlist);
861
862 /* Initialize all values for row to NULL */
863 MemSet(values, 0, num_phys_attrs * sizeof(Datum));
864 MemSet(nulls, true, num_phys_attrs * sizeof(bool));
865 MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
866
867 if (!cstate->opts.binary)
868 {
869 char **field_strings;
870 ListCell *cur;
871 int fldct;
872 int fieldno;
873 char *string;
874
875 /* read raw fields in the next line */
876 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
877 return false;
878
879 /* check for overflowing fields */
880 if (attr_count > 0 && fldct > attr_count)
882 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
883 errmsg("extra data after last expected column")));
884
885 fieldno = 0;
886
887 /* Loop to read the user attributes on the line. */
888 foreach(cur, cstate->attnumlist)
889 {
890 int attnum = lfirst_int(cur);
891 int m = attnum - 1;
892 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
893
894 if (fieldno >= fldct)
896 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
897 errmsg("missing data for column \"%s\"",
898 NameStr(att->attname))));
899 string = field_strings[fieldno++];
900
901 if (cstate->convert_select_flags &&
902 !cstate->convert_select_flags[m])
903 {
904 /* ignore input field, leaving column as NULL */
905 continue;
906 }
907
908 if (cstate->opts.csv_mode)
909 {
910 if (string == NULL &&
911 cstate->opts.force_notnull_flags[m])
912 {
913 /*
914 * FORCE_NOT_NULL option is set and column is NULL -
915 * convert it to the NULL string.
916 */
917 string = cstate->opts.null_print;
918 }
919 else if (string != NULL && cstate->opts.force_null_flags[m]
920 && strcmp(string, cstate->opts.null_print) == 0)
921 {
922 /*
923 * FORCE_NULL option is set and column matches the NULL
924 * string. It must have been quoted, or otherwise the
925 * string would already have been set to NULL. Convert it
926 * to NULL as specified.
927 */
928 string = NULL;
929 }
930 }
931
932 cstate->cur_attname = NameStr(att->attname);
933 cstate->cur_attval = string;
934
935 if (string != NULL)
936 nulls[m] = false;
937
938 if (cstate->defaults[m])
939 {
940 /*
941 * The caller must supply econtext and have switched into the
942 * per-tuple memory context in it.
943 */
944 Assert(econtext != NULL);
946
947 values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
948 }
949
950 /*
951 * If ON_ERROR is specified with IGNORE, skip rows with soft
952 * errors
953 */
954 else if (!InputFunctionCallSafe(&in_functions[m],
955 string,
956 typioparams[m],
957 att->atttypmod,
958 (Node *) cstate->escontext,
959 &values[m]))
960 {
962
963 cstate->num_errors++;
964
966 {
967 /*
968 * Since we emit line number and column info in the below
969 * notice message, we suppress error context information
970 * other than the relation name.
971 */
972 Assert(!cstate->relname_only);
973 cstate->relname_only = true;
974
975 if (cstate->cur_attval)
976 {
977 char *attval;
978
979 attval = CopyLimitPrintoutLength(cstate->cur_attval);
981 errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": \"%s\"",
982 (unsigned long long) cstate->cur_lineno,
983 cstate->cur_attname,
984 attval));
985 pfree(attval);
986 }
987 else
989 errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": null input",
990 (unsigned long long) cstate->cur_lineno,
991 cstate->cur_attname));
992
993 /* reset relname_only */
994 cstate->relname_only = false;
995 }
996
997 return true;
998 }
999
1000 cstate->cur_attname = NULL;
1001 cstate->cur_attval = NULL;
1002 }
1003
1004 Assert(fieldno == attr_count);
1005 }
1006 else
1007 {
1008 /* binary */
1009 int16 fld_count;
1010 ListCell *cur;
1011
1012 cstate->cur_lineno++;
1013
1014 if (!CopyGetInt16(cstate, &fld_count))
1015 {
1016 /* EOF detected (end of file, or protocol-level EOF) */
1017 return false;
1018 }
1019
1020 if (fld_count == -1)
1021 {
1022 /*
1023 * Received EOF marker. Wait for the protocol-level EOF, and
1024 * complain if it doesn't come immediately. In COPY FROM STDIN,
1025 * this ensures that we correctly handle CopyFail, if client
1026 * chooses to send that now. When copying from file, we could
1027 * ignore the rest of the file like in text mode, but we choose to
1028 * be consistent with the COPY FROM STDIN case.
1029 */
1030 char dummy;
1031
1032 if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1033 ereport(ERROR,
1034 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1035 errmsg("received copy data after EOF marker")));
1036 return false;
1037 }
1038
1039 if (fld_count != attr_count)
1040 ereport(ERROR,
1041 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1042 errmsg("row field count is %d, expected %d",
1043 (int) fld_count, attr_count)));
1044
1045 foreach(cur, cstate->attnumlist)
1046 {
1047 int attnum = lfirst_int(cur);
1048 int m = attnum - 1;
1049 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1050
1051 cstate->cur_attname = NameStr(att->attname);
1052 values[m] = CopyReadBinaryAttribute(cstate,
1053 &in_functions[m],
1054 typioparams[m],
1055 att->atttypmod,
1056 &nulls[m]);
1057 cstate->cur_attname = NULL;
1058 }
1059 }
1060
1061 /*
1062 * Now compute and insert any defaults available for the columns not
1063 * provided by the input data. Anything not processed here or above will
1064 * remain NULL.
1065 */
1066 for (i = 0; i < num_defaults; i++)
1067 {
1068 /*
1069 * The caller must supply econtext and have switched into the
1070 * per-tuple memory context in it.
1071 */
1072 Assert(econtext != NULL);
1074
1075 values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
1076 &nulls[defmap[i]]);
1077 }
1078
1079 return true;
1080}
1081
1082/*
1083 * Read the next input line and stash it in line_buf.
1084 *
1085 * Result is true if read was terminated by EOF, false if terminated
1086 * by newline. The terminating newline or EOF marker is not included
1087 * in the final value of line_buf.
1088 */
1089static bool
1091{
1092 bool result;
1093
1094 resetStringInfo(&cstate->line_buf);
1095 cstate->line_buf_valid = false;
1096
1097 /* Parse data and transfer into line_buf */
1098 result = CopyReadLineText(cstate);
1099
1100 if (result)
1101 {
1102 /*
1103 * Reached EOF. In protocol version 3, we should ignore anything
1104 * after \. up to the protocol end of copy data. (XXX maybe better
1105 * not to treat \. as special?)
1106 */
1107 if (cstate->copy_src == COPY_FRONTEND)
1108 {
1109 int inbytes;
1110
1111 do
1112 {
1113 inbytes = CopyGetData(cstate, cstate->input_buf,
1114 1, INPUT_BUF_SIZE);
1115 } while (inbytes > 0);
1116 cstate->input_buf_index = 0;
1117 cstate->input_buf_len = 0;
1118 cstate->raw_buf_index = 0;
1119 cstate->raw_buf_len = 0;
1120 }
1121 }
1122 else
1123 {
1124 /*
1125 * If we didn't hit EOF, then we must have transferred the EOL marker
1126 * to line_buf along with the data. Get rid of it.
1127 */
1128 switch (cstate->eol_type)
1129 {
1130 case EOL_NL:
1131 Assert(cstate->line_buf.len >= 1);
1132 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1133 cstate->line_buf.len--;
1134 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1135 break;
1136 case EOL_CR:
1137 Assert(cstate->line_buf.len >= 1);
1138 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1139 cstate->line_buf.len--;
1140 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1141 break;
1142 case EOL_CRNL:
1143 Assert(cstate->line_buf.len >= 2);
1144 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1145 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1146 cstate->line_buf.len -= 2;
1147 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1148 break;
1149 case EOL_UNKNOWN:
1150 /* shouldn't get here */
1151 Assert(false);
1152 break;
1153 }
1154 }
1155
1156 /* Now it's safe to use the buffer in error messages */
1157 cstate->line_buf_valid = true;
1158
1159 return result;
1160}
1161
1162/*
1163 * CopyReadLineText - inner loop of CopyReadLine for text mode
1164 */
1165static bool
1167{
1168 char *copy_input_buf;
1169 int input_buf_ptr;
1170 int copy_buf_len;
1171 bool need_data = false;
1172 bool hit_eof = false;
1173 bool result = false;
1174
1175 /* CSV variables */
1176 bool in_quote = false,
1177 last_was_esc = false;
1178 char quotec = '\0';
1179 char escapec = '\0';
1180
1181 if (cstate->opts.csv_mode)
1182 {
1183 quotec = cstate->opts.quote[0];
1184 escapec = cstate->opts.escape[0];
1185 /* ignore special escape processing if it's the same as quotec */
1186 if (quotec == escapec)
1187 escapec = '\0';
1188 }
1189
1190 /*
1191 * The objective of this loop is to transfer the entire next input line
1192 * into line_buf. Hence, we only care for detecting newlines (\r and/or
1193 * \n) and the end-of-copy marker (\.).
1194 *
1195 * In CSV mode, \r and \n inside a quoted field are just part of the data
1196 * value and are put in line_buf. We keep just enough state to know if we
1197 * are currently in a quoted field or not.
1198 *
1199 * The input has already been converted to the database encoding. All
1200 * supported server encodings have the property that all bytes in a
1201 * multi-byte sequence have the high bit set, so a multibyte character
1202 * cannot contain any newline or escape characters embedded in the
1203 * multibyte sequence. Therefore, we can process the input byte-by-byte,
1204 * regardless of the encoding.
1205 *
1206 * For speed, we try to move data from input_buf to line_buf in chunks
1207 * rather than one character at a time. input_buf_ptr points to the next
1208 * character to examine; any characters from input_buf_index to
1209 * input_buf_ptr have been determined to be part of the line, but not yet
1210 * transferred to line_buf.
1211 *
1212 * For a little extra speed within the loop, we copy input_buf and
1213 * input_buf_len into local variables.
1214 */
1215 copy_input_buf = cstate->input_buf;
1216 input_buf_ptr = cstate->input_buf_index;
1217 copy_buf_len = cstate->input_buf_len;
1218
1219 for (;;)
1220 {
1221 int prev_raw_ptr;
1222 char c;
1223
1224 /*
1225 * Load more data if needed.
1226 *
1227 * TODO: We could just force four bytes of read-ahead and avoid the
1228 * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1229 * unsafe with the old v2 COPY protocol, but we don't support that
1230 * anymore.
1231 */
1232 if (input_buf_ptr >= copy_buf_len || need_data)
1233 {
1235
1236 CopyLoadInputBuf(cstate);
1237 /* update our local variables */
1238 hit_eof = cstate->input_reached_eof;
1239 input_buf_ptr = cstate->input_buf_index;
1240 copy_buf_len = cstate->input_buf_len;
1241
1242 /*
1243 * If we are completely out of data, break out of the loop,
1244 * reporting EOF.
1245 */
1246 if (INPUT_BUF_BYTES(cstate) <= 0)
1247 {
1248 result = true;
1249 break;
1250 }
1251 need_data = false;
1252 }
1253
1254 /* OK to fetch a character */
1255 prev_raw_ptr = input_buf_ptr;
1256 c = copy_input_buf[input_buf_ptr++];
1257
1258 if (cstate->opts.csv_mode)
1259 {
1260 /*
1261 * If character is '\r', we may need to look ahead below. Force
1262 * fetch of the next character if we don't already have it. We
1263 * need to do this before changing CSV state, in case '\r' is also
1264 * the quote or escape character.
1265 */
1266 if (c == '\r')
1267 {
1269 }
1270
1271 /*
1272 * Dealing with quotes and escapes here is mildly tricky. If the
1273 * quote char is also the escape char, there's no problem - we
1274 * just use the char as a toggle. If they are different, we need
1275 * to ensure that we only take account of an escape inside a
1276 * quoted field and immediately preceding a quote char, and not
1277 * the second in an escape-escape sequence.
1278 */
1279 if (in_quote && c == escapec)
1280 last_was_esc = !last_was_esc;
1281 if (c == quotec && !last_was_esc)
1282 in_quote = !in_quote;
1283 if (c != escapec)
1284 last_was_esc = false;
1285
1286 /*
1287 * Updating the line count for embedded CR and/or LF chars is
1288 * necessarily a little fragile - this test is probably about the
1289 * best we can do. (XXX it's arguable whether we should do this
1290 * at all --- is cur_lineno a physical or logical count?)
1291 */
1292 if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1293 cstate->cur_lineno++;
1294 }
1295
1296 /* Process \r */
1297 if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1298 {
1299 /* Check for \r\n on first line, _and_ handle \r\n. */
1300 if (cstate->eol_type == EOL_UNKNOWN ||
1301 cstate->eol_type == EOL_CRNL)
1302 {
1303 /*
1304 * If need more data, go back to loop top to load it.
1305 *
1306 * Note that if we are at EOF, c will wind up as '\0' because
1307 * of the guaranteed pad of input_buf.
1308 */
1310
1311 /* get next char */
1312 c = copy_input_buf[input_buf_ptr];
1313
1314 if (c == '\n')
1315 {
1316 input_buf_ptr++; /* eat newline */
1317 cstate->eol_type = EOL_CRNL; /* in case not set yet */
1318 }
1319 else
1320 {
1321 /* found \r, but no \n */
1322 if (cstate->eol_type == EOL_CRNL)
1323 ereport(ERROR,
1324 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1325 !cstate->opts.csv_mode ?
1326 errmsg("literal carriage return found in data") :
1327 errmsg("unquoted carriage return found in data"),
1328 !cstate->opts.csv_mode ?
1329 errhint("Use \"\\r\" to represent carriage return.") :
1330 errhint("Use quoted CSV field to represent carriage return.")));
1331
1332 /*
1333 * if we got here, it is the first line and we didn't find
1334 * \n, so don't consume the peeked character
1335 */
1336 cstate->eol_type = EOL_CR;
1337 }
1338 }
1339 else if (cstate->eol_type == EOL_NL)
1340 ereport(ERROR,
1341 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1342 !cstate->opts.csv_mode ?
1343 errmsg("literal carriage return found in data") :
1344 errmsg("unquoted carriage return found in data"),
1345 !cstate->opts.csv_mode ?
1346 errhint("Use \"\\r\" to represent carriage return.") :
1347 errhint("Use quoted CSV field to represent carriage return.")));
1348 /* If reach here, we have found the line terminator */
1349 break;
1350 }
1351
1352 /* Process \n */
1353 if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1354 {
1355 if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1356 ereport(ERROR,
1357 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1358 !cstate->opts.csv_mode ?
1359 errmsg("literal newline found in data") :
1360 errmsg("unquoted newline found in data"),
1361 !cstate->opts.csv_mode ?
1362 errhint("Use \"\\n\" to represent newline.") :
1363 errhint("Use quoted CSV field to represent newline.")));
1364 cstate->eol_type = EOL_NL; /* in case not set yet */
1365 /* If reach here, we have found the line terminator */
1366 break;
1367 }
1368
1369 /*
1370 * Process backslash, except in CSV mode where backslash is a normal
1371 * character.
1372 */
1373 if (c == '\\' && !cstate->opts.csv_mode)
1374 {
1375 char c2;
1376
1379
1380 /* -----
1381 * get next character
1382 * Note: we do not change c so if it isn't \., we can fall
1383 * through and continue processing.
1384 * -----
1385 */
1386 c2 = copy_input_buf[input_buf_ptr];
1387
1388 if (c2 == '.')
1389 {
1390 input_buf_ptr++; /* consume the '.' */
1391 if (cstate->eol_type == EOL_CRNL)
1392 {
1393 /* Get the next character */
1395 /* if hit_eof, c2 will become '\0' */
1396 c2 = copy_input_buf[input_buf_ptr++];
1397
1398 if (c2 == '\n')
1399 ereport(ERROR,
1400 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1401 errmsg("end-of-copy marker does not match previous newline style")));
1402 else if (c2 != '\r')
1403 ereport(ERROR,
1404 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1405 errmsg("end-of-copy marker is not alone on its line")));
1406 }
1407
1408 /* Get the next character */
1410 /* if hit_eof, c2 will become '\0' */
1411 c2 = copy_input_buf[input_buf_ptr++];
1412
1413 if (c2 != '\r' && c2 != '\n')
1414 ereport(ERROR,
1415 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1416 errmsg("end-of-copy marker is not alone on its line")));
1417
1418 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1419 (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1420 (cstate->eol_type == EOL_CR && c2 != '\r'))
1421 ereport(ERROR,
1422 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1423 errmsg("end-of-copy marker does not match previous newline style")));
1424
1425 /*
1426 * If there is any data on this line before the \., complain.
1427 */
1428 if (cstate->line_buf.len > 0 ||
1429 prev_raw_ptr > cstate->input_buf_index)
1430 ereport(ERROR,
1431 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1432 errmsg("end-of-copy marker is not alone on its line")));
1433
1434 /*
1435 * Discard the \. and newline, then report EOF.
1436 */
1437 cstate->input_buf_index = input_buf_ptr;
1438 result = true; /* report EOF */
1439 break;
1440 }
1441 else
1442 {
1443 /*
1444 * If we are here, it means we found a backslash followed by
1445 * something other than a period. In non-CSV mode, anything
1446 * after a backslash is special, so we skip over that second
1447 * character too. If we didn't do that \\. would be
1448 * considered an eof-of copy, while in non-CSV mode it is a
1449 * literal backslash followed by a period.
1450 */
1451 input_buf_ptr++;
1452 }
1453 }
1454 } /* end of outer loop */
1455
1456 /*
1457 * Transfer any still-uncopied data to line_buf.
1458 */
1460
1461 return result;
1462}
1463
1464/*
1465 * Return decimal value for a hexadecimal digit
1466 */
1467static int
1469{
1470 if (isdigit((unsigned char) hex))
1471 return hex - '0';
1472 else
1473 return tolower((unsigned char) hex) - 'a' + 10;
1474}
1475
1476/*
1477 * Parse the current line into separate attributes (fields),
1478 * performing de-escaping as needed.
1479 *
1480 * The input is in line_buf. We use attribute_buf to hold the result
1481 * strings. cstate->raw_fields[k] is set to point to the k'th attribute
1482 * string, or NULL when the input matches the null marker string.
1483 * This array is expanded as necessary.
1484 *
1485 * (Note that the caller cannot check for nulls since the returned
1486 * string would be the post-de-escaping equivalent, which may look
1487 * the same as some valid data string.)
1488 *
1489 * delim is the column delimiter string (must be just one byte for now).
1490 * null_print is the null marker string. Note that this is compared to
1491 * the pre-de-escaped input string.
1492 *
1493 * The return value is the number of fields actually read.
1494 */
1495static int
1497{
1498 char delimc = cstate->opts.delim[0];
1499 int fieldno;
1500 char *output_ptr;
1501 char *cur_ptr;
1502 char *line_end_ptr;
1503
1504 /*
1505 * We need a special case for zero-column tables: check that the input
1506 * line is empty, and return.
1507 */
1508 if (cstate->max_fields <= 0)
1509 {
1510 if (cstate->line_buf.len != 0)
1511 ereport(ERROR,
1512 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1513 errmsg("extra data after last expected column")));
1514 return 0;
1515 }
1516
1518
1519 /*
1520 * The de-escaped attributes will certainly not be longer than the input
1521 * data line, so we can just force attribute_buf to be large enough and
1522 * then transfer data without any checks for enough space. We need to do
1523 * it this way because enlarging attribute_buf mid-stream would invalidate
1524 * pointers already stored into cstate->raw_fields[].
1525 */
1526 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1527 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1528 output_ptr = cstate->attribute_buf.data;
1529
1530 /* set pointer variables for loop */
1531 cur_ptr = cstate->line_buf.data;
1532 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1533
1534 /* Outer loop iterates over fields */
1535 fieldno = 0;
1536 for (;;)
1537 {
1538 bool found_delim = false;
1539 char *start_ptr;
1540 char *end_ptr;
1541 int input_len;
1542 bool saw_non_ascii = false;
1543
1544 /* Make sure there is enough space for the next value */
1545 if (fieldno >= cstate->max_fields)
1546 {
1547 cstate->max_fields *= 2;
1548 cstate->raw_fields =
1549 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1550 }
1551
1552 /* Remember start of field on both input and output sides */
1553 start_ptr = cur_ptr;
1554 cstate->raw_fields[fieldno] = output_ptr;
1555
1556 /*
1557 * Scan data for field.
1558 *
1559 * Note that in this loop, we are scanning to locate the end of field
1560 * and also speculatively performing de-escaping. Once we find the
1561 * end-of-field, we can match the raw field contents against the null
1562 * marker string. Only after that comparison fails do we know that
1563 * de-escaping is actually the right thing to do; therefore we *must
1564 * not* throw any syntax errors before we've done the null-marker
1565 * check.
1566 */
1567 for (;;)
1568 {
1569 char c;
1570
1571 end_ptr = cur_ptr;
1572 if (cur_ptr >= line_end_ptr)
1573 break;
1574 c = *cur_ptr++;
1575 if (c == delimc)
1576 {
1577 found_delim = true;
1578 break;
1579 }
1580 if (c == '\\')
1581 {
1582 if (cur_ptr >= line_end_ptr)
1583 break;
1584 c = *cur_ptr++;
1585 switch (c)
1586 {
1587 case '0':
1588 case '1':
1589 case '2':
1590 case '3':
1591 case '4':
1592 case '5':
1593 case '6':
1594 case '7':
1595 {
1596 /* handle \013 */
1597 int val;
1598
1599 val = OCTVALUE(c);
1600 if (cur_ptr < line_end_ptr)
1601 {
1602 c = *cur_ptr;
1603 if (ISOCTAL(c))
1604 {
1605 cur_ptr++;
1606 val = (val << 3) + OCTVALUE(c);
1607 if (cur_ptr < line_end_ptr)
1608 {
1609 c = *cur_ptr;
1610 if (ISOCTAL(c))
1611 {
1612 cur_ptr++;
1613 val = (val << 3) + OCTVALUE(c);
1614 }
1615 }
1616 }
1617 }
1618 c = val & 0377;
1619 if (c == '\0' || IS_HIGHBIT_SET(c))
1620 saw_non_ascii = true;
1621 }
1622 break;
1623 case 'x':
1624 /* Handle \x3F */
1625 if (cur_ptr < line_end_ptr)
1626 {
1627 char hexchar = *cur_ptr;
1628
1629 if (isxdigit((unsigned char) hexchar))
1630 {
1631 int val = GetDecimalFromHex(hexchar);
1632
1633 cur_ptr++;
1634 if (cur_ptr < line_end_ptr)
1635 {
1636 hexchar = *cur_ptr;
1637 if (isxdigit((unsigned char) hexchar))
1638 {
1639 cur_ptr++;
1640 val = (val << 4) + GetDecimalFromHex(hexchar);
1641 }
1642 }
1643 c = val & 0xff;
1644 if (c == '\0' || IS_HIGHBIT_SET(c))
1645 saw_non_ascii = true;
1646 }
1647 }
1648 break;
1649 case 'b':
1650 c = '\b';
1651 break;
1652 case 'f':
1653 c = '\f';
1654 break;
1655 case 'n':
1656 c = '\n';
1657 break;
1658 case 'r':
1659 c = '\r';
1660 break;
1661 case 't':
1662 c = '\t';
1663 break;
1664 case 'v':
1665 c = '\v';
1666 break;
1667
1668 /*
1669 * in all other cases, take the char after '\'
1670 * literally
1671 */
1672 }
1673 }
1674
1675 /* Add c to output string */
1676 *output_ptr++ = c;
1677 }
1678
1679 /* Check whether raw input matched null marker */
1680 input_len = end_ptr - start_ptr;
1681 if (input_len == cstate->opts.null_print_len &&
1682 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1683 cstate->raw_fields[fieldno] = NULL;
1684 /* Check whether raw input matched default marker */
1685 else if (fieldno < list_length(cstate->attnumlist) &&
1686 cstate->opts.default_print &&
1687 input_len == cstate->opts.default_print_len &&
1688 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1689 {
1690 /* fieldno is 0-indexed and attnum is 1-indexed */
1691 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1692
1693 if (cstate->defexprs[m] != NULL)
1694 {
1695 /* defaults contain entries for all physical attributes */
1696 cstate->defaults[m] = true;
1697 }
1698 else
1699 {
1700 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1701 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1702
1703 ereport(ERROR,
1704 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1705 errmsg("unexpected default marker in COPY data"),
1706 errdetail("Column \"%s\" has no default value.",
1707 NameStr(att->attname))));
1708 }
1709 }
1710 else
1711 {
1712 /*
1713 * At this point we know the field is supposed to contain data.
1714 *
1715 * If we de-escaped any non-7-bit-ASCII chars, make sure the
1716 * resulting string is valid data for the db encoding.
1717 */
1718 if (saw_non_ascii)
1719 {
1720 char *fld = cstate->raw_fields[fieldno];
1721
1722 pg_verifymbstr(fld, output_ptr - fld, false);
1723 }
1724 }
1725
1726 /* Terminate attribute value in output area */
1727 *output_ptr++ = '\0';
1728
1729 fieldno++;
1730 /* Done if we hit EOL instead of a delim */
1731 if (!found_delim)
1732 break;
1733 }
1734
1735 /* Clean up state of attribute_buf */
1736 output_ptr--;
1737 Assert(*output_ptr == '\0');
1738 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1739
1740 return fieldno;
1741}
1742
1743/*
1744 * Parse the current line into separate attributes (fields),
1745 * performing de-escaping as needed. This has exactly the same API as
1746 * CopyReadAttributesText, except we parse the fields according to
1747 * "standard" (i.e. common) CSV usage.
1748 */
1749static int
1751{
1752 char delimc = cstate->opts.delim[0];
1753 char quotec = cstate->opts.quote[0];
1754 char escapec = cstate->opts.escape[0];
1755 int fieldno;
1756 char *output_ptr;
1757 char *cur_ptr;
1758 char *line_end_ptr;
1759
1760 /*
1761 * We need a special case for zero-column tables: check that the input
1762 * line is empty, and return.
1763 */
1764 if (cstate->max_fields <= 0)
1765 {
1766 if (cstate->line_buf.len != 0)
1767 ereport(ERROR,
1768 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1769 errmsg("extra data after last expected column")));
1770 return 0;
1771 }
1772
1774
1775 /*
1776 * The de-escaped attributes will certainly not be longer than the input
1777 * data line, so we can just force attribute_buf to be large enough and
1778 * then transfer data without any checks for enough space. We need to do
1779 * it this way because enlarging attribute_buf mid-stream would invalidate
1780 * pointers already stored into cstate->raw_fields[].
1781 */
1782 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1783 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1784 output_ptr = cstate->attribute_buf.data;
1785
1786 /* set pointer variables for loop */
1787 cur_ptr = cstate->line_buf.data;
1788 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1789
1790 /* Outer loop iterates over fields */
1791 fieldno = 0;
1792 for (;;)
1793 {
1794 bool found_delim = false;
1795 bool saw_quote = false;
1796 char *start_ptr;
1797 char *end_ptr;
1798 int input_len;
1799
1800 /* Make sure there is enough space for the next value */
1801 if (fieldno >= cstate->max_fields)
1802 {
1803 cstate->max_fields *= 2;
1804 cstate->raw_fields =
1805 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1806 }
1807
1808 /* Remember start of field on both input and output sides */
1809 start_ptr = cur_ptr;
1810 cstate->raw_fields[fieldno] = output_ptr;
1811
1812 /*
1813 * Scan data for field,
1814 *
1815 * The loop starts in "not quote" mode and then toggles between that
1816 * and "in quote" mode. The loop exits normally if it is in "not
1817 * quote" mode and a delimiter or line end is seen.
1818 */
1819 for (;;)
1820 {
1821 char c;
1822
1823 /* Not in quote */
1824 for (;;)
1825 {
1826 end_ptr = cur_ptr;
1827 if (cur_ptr >= line_end_ptr)
1828 goto endfield;
1829 c = *cur_ptr++;
1830 /* unquoted field delimiter */
1831 if (c == delimc)
1832 {
1833 found_delim = true;
1834 goto endfield;
1835 }
1836 /* start of quoted field (or part of field) */
1837 if (c == quotec)
1838 {
1839 saw_quote = true;
1840 break;
1841 }
1842 /* Add c to output string */
1843 *output_ptr++ = c;
1844 }
1845
1846 /* In quote */
1847 for (;;)
1848 {
1849 end_ptr = cur_ptr;
1850 if (cur_ptr >= line_end_ptr)
1851 ereport(ERROR,
1852 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1853 errmsg("unterminated CSV quoted field")));
1854
1855 c = *cur_ptr++;
1856
1857 /* escape within a quoted field */
1858 if (c == escapec)
1859 {
1860 /*
1861 * peek at the next char if available, and escape it if it
1862 * is an escape char or a quote char
1863 */
1864 if (cur_ptr < line_end_ptr)
1865 {
1866 char nextc = *cur_ptr;
1867
1868 if (nextc == escapec || nextc == quotec)
1869 {
1870 *output_ptr++ = nextc;
1871 cur_ptr++;
1872 continue;
1873 }
1874 }
1875 }
1876
1877 /*
1878 * end of quoted field. Must do this test after testing for
1879 * escape in case quote char and escape char are the same
1880 * (which is the common case).
1881 */
1882 if (c == quotec)
1883 break;
1884
1885 /* Add c to output string */
1886 *output_ptr++ = c;
1887 }
1888 }
1889endfield:
1890
1891 /* Terminate attribute value in output area */
1892 *output_ptr++ = '\0';
1893
1894 /* Check whether raw input matched null marker */
1895 input_len = end_ptr - start_ptr;
1896 if (!saw_quote && input_len == cstate->opts.null_print_len &&
1897 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1898 cstate->raw_fields[fieldno] = NULL;
1899 /* Check whether raw input matched default marker */
1900 else if (fieldno < list_length(cstate->attnumlist) &&
1901 cstate->opts.default_print &&
1902 input_len == cstate->opts.default_print_len &&
1903 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1904 {
1905 /* fieldno is 0-index and attnum is 1-index */
1906 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1907
1908 if (cstate->defexprs[m] != NULL)
1909 {
1910 /* defaults contain entries for all physical attributes */
1911 cstate->defaults[m] = true;
1912 }
1913 else
1914 {
1915 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1916 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1917
1918 ereport(ERROR,
1919 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1920 errmsg("unexpected default marker in COPY data"),
1921 errdetail("Column \"%s\" has no default value.",
1922 NameStr(att->attname))));
1923 }
1924 }
1925
1926 fieldno++;
1927 /* Done if we hit EOL instead of a delim */
1928 if (!found_delim)
1929 break;
1930 }
1931
1932 /* Clean up state of attribute_buf */
1933 output_ptr--;
1934 Assert(*output_ptr == '\0');
1935 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1936
1937 return fieldno;
1938}
1939
1940
1941/*
1942 * Read a binary attribute
1943 */
1944static Datum
1946 Oid typioparam, int32 typmod,
1947 bool *isnull)
1948{
1949 int32 fld_size;
1950 Datum result;
1951
1952 if (!CopyGetInt32(cstate, &fld_size))
1953 ereport(ERROR,
1954 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1955 errmsg("unexpected EOF in COPY data")));
1956 if (fld_size == -1)
1957 {
1958 *isnull = true;
1959 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1960 }
1961 if (fld_size < 0)
1962 ereport(ERROR,
1963 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1964 errmsg("invalid field size")));
1965
1966 /* reset attribute_buf to empty, and load raw data in it */
1968
1969 enlargeStringInfo(&cstate->attribute_buf, fld_size);
1970 if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1971 fld_size) != fld_size)
1972 ereport(ERROR,
1973 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1974 errmsg("unexpected EOF in COPY data")));
1975
1976 cstate->attribute_buf.len = fld_size;
1977 cstate->attribute_buf.data[fld_size] = '\0';
1978
1979 /* Call the column type's binary input converter */
1980 result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1981 typioparam, typmod);
1982
1983 /* Trouble if it didn't eat the whole buffer */
1984 if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1985 ereport(ERROR,
1986 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1987 errmsg("incorrect binary data format")));
1988
1989 *isnull = false;
1990 return result;
1991}
int16 AttrNumber
Definition: attnum.h:21
void pgstat_progress_update_param(int index, int64 val)
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define NameStr(name)
Definition: c.h:703
#define Min(x, y)
Definition: c.h:961
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1112
#define Assert(condition)
Definition: c.h:815
int16_t int16
Definition: c.h:483
int32_t int32
Definition: c.h:484
uint16_t uint16
Definition: c.h:487
uint32_t uint32
Definition: c.h:488
#define MemSet(start, val, len)
Definition: c.h:977
char * CopyLimitPrintoutLength(const char *str)
Definition: copyfrom.c:194
#define RAW_BUF_BYTES(cstate)
#define INPUT_BUF_SIZE
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
#define INPUT_BUF_BYTES(cstate)
#define RAW_BUF_SIZE
static int CopyReadAttributesCSV(CopyFromState cstate)
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
static void CopyConversionError(CopyFromState cstate)
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
static void CopyLoadRawBuf(CopyFromState cstate)
#define OCTVALUE(c)
Definition: copyfromparse.c:79
#define REFILL_LINEBUF
static void CopyLoadInputBuf(CopyFromState cstate)
#define ISOCTAL(c)
Definition: copyfromparse.c:78
void ReceiveCopyBinaryHeader(CopyFromState cstate)
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
static bool CopyReadLineText(CopyFromState cstate)
static int GetDecimalFromHex(char hex)
void ReceiveCopyBegin(CopyFromState cstate)
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
static int CopyReadAttributesText(CopyFromState cstate)
static const char BinarySignature[11]
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:97
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
static bool CopyReadLine(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)
bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
@ COPY_FILE
Definition: copyto.c:45
@ COPY_CALLBACK
Definition: copyto.c:47
@ COPY_FRONTEND
Definition: copyto.c:46
struct cursor * cur
Definition: ecpg.c:29
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:346
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, fmNodePtr escontext, Datum *result)
Definition: fmgr.c:1585
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1697
@ COPY_ON_ERROR_STOP
Definition: copy.h:39
@ COPY_LOG_VERBOSITY_VERBOSE
Definition: copy.h:51
@ COPY_HEADER_MATCH
Definition: copy.h:30
long val
Definition: informix.c:689
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
#define pq_flush()
Definition: libpq.h:46
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:469
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:143
int namestrcmp(Name name, const char *str)
Definition: name.c:247
int16 attnum
Definition: pg_attribute.h:74
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
static char format
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static int list_length(const List *l)
Definition: pg_list.h:152
#define lfirst_int(lc)
Definition: pg_list.h:173
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
static char * buf
Definition: pg_test_fsync.c:72
#define MAX_CONVERSION_INPUT_LENGTH
Definition: pg_wchar.h:320
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:32
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1203
int pq_getbyte(void)
Definition: pqcomm.c:964
void pq_startmsgread(void)
Definition: pqcomm.c:1141
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:528
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:579
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:296
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:88
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:160
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:136
char * c
char string[11]
Definition: preproc-type.c:52
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:142
#define PqMsg_CopyDone
Definition: protocol.h:64
#define PqMsg_CopyData
Definition: protocol.h:65
#define PqMsg_CopyInResponse
Definition: protocol.h:45
#define PqMsg_Sync
Definition: protocol.h:27
#define PqMsg_CopyFail
Definition: protocol.h:29
#define PqMsg_Flush
Definition: protocol.h:24
#define RelationGetDescr(relation)
Definition: rel.h:538
StringInfo makeStringInfo(void)
Definition: stringinfo.c:72
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:126
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:337
int default_print_len
Definition: copy.h:72
bool binary
Definition: copy.h:64
int null_print_len
Definition: copy.h:69
CopyLogVerbosityChoice log_verbosity
Definition: copy.h:87
char * quote
Definition: copy.h:74
CopyOnErrorChoice on_error
Definition: copy.h:86
CopyHeaderChoice header_line
Definition: copy.h:67
char * escape
Definition: copy.h:75
char * null_print
Definition: copy.h:68
char * delim
Definition: copy.h:73
bool * force_notnull_flags
Definition: copy.h:81
bool csv_mode
Definition: copy.h:66
bool * force_null_flags
Definition: copy.h:84
char * default_print
Definition: copy.h:71
copy_data_source_cb data_source_cb
StringInfoData line_buf
CopyFormatOptions opts
StringInfoData attribute_buf
const char * cur_attval
const char * cur_attname
ErrorSaveContext * escontext
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:275
Definition: fmgr.h:57
Definition: nodes.h:129
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:154
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition: wchar.c:2163
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2174