PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
copyfromparse.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include "commands/copy.h"
#include "commands/copyfrom_internal.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/rel.h"
Include dependency graph for copyfromparse.c:

Go to the source code of this file.

Macros

#define ISOCTAL(c)   (((c) >= '0') && ((c) <= '7'))
 
#define OCTVALUE(c)   ((c) - '0')
 
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
 
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
 
#define REFILL_LINEBUF
 

Functions

static bool CopyReadLine (CopyFromState cstate)
 
static bool CopyReadLineText (CopyFromState cstate)
 
static int CopyReadAttributesText (CopyFromState cstate)
 
static int CopyReadAttributesCSV (CopyFromState cstate)
 
static Datum CopyReadBinaryAttribute (CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
 
static int CopyGetData (CopyFromState cstate, void *databuf, int minread, int maxread)
 
static bool CopyGetInt32 (CopyFromState cstate, int32 *val)
 
static bool CopyGetInt16 (CopyFromState cstate, int16 *val)
 
static void CopyLoadInputBuf (CopyFromState cstate)
 
static int CopyReadBinaryData (CopyFromState cstate, char *dest, int nbytes)
 
void ReceiveCopyBegin (CopyFromState cstate)
 
void ReceiveCopyBinaryHeader (CopyFromState cstate)
 
static void CopyConvertBuf (CopyFromState cstate)
 
static void CopyConversionError (CopyFromState cstate)
 
static void CopyLoadRawBuf (CopyFromState cstate)
 
bool NextCopyFromRawFields (CopyFromState cstate, char ***fields, int *nfields)
 
bool NextCopyFrom (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
static int GetDecimalFromHex (char hex)
 

Variables

static const char BinarySignature [11] = "PGCOPY\n\377\r\n\0"
 

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK (   extralen)
Value:
if (1) \
{ \
if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
{ \
if (extralen) \
input_buf_ptr = copy_buf_len; /* consume the partial character */ \
/* backslash just before EOF, treat as data char */ \
result = true; \
break; \
} \
} else ((void) 0)

Definition at line 109 of file copyfromparse.c.

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE (   extralen)
Value:
if (1) \
{ \
if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
{ \
input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
need_data = true; \
continue; \
} \
} else ((void) 0)

Definition at line 97 of file copyfromparse.c.

◆ ISOCTAL

#define ISOCTAL (   c)    (((c) >= '0') && ((c) <= '7'))

Definition at line 78 of file copyfromparse.c.

◆ OCTVALUE

#define OCTVALUE (   c)    ((c) - '0')

Definition at line 79 of file copyfromparse.c.

◆ REFILL_LINEBUF

#define REFILL_LINEBUF
Value:
if (1) \
{ \
if (input_buf_ptr > cstate->input_buf_index) \
{ \
appendBinaryStringInfo(&cstate->line_buf, \
cstate->input_buf + cstate->input_buf_index, \
input_buf_ptr - cstate->input_buf_index); \
cstate->input_buf_index = input_buf_ptr; \
} \
} else ((void) 0)

Definition at line 126 of file copyfromparse.c.

Function Documentation

◆ CopyConversionError()

static void CopyConversionError ( CopyFromState  cstate)
static

Definition at line 524 of file copyfromparse.c.

525{
526 Assert(cstate->raw_buf_len > 0);
528
529 if (!cstate->need_transcoding)
530 {
531 /*
532 * Everything up to input_buf_len was successfully verified, and
533 * input_buf_len points to the invalid or incomplete character.
534 */
536 cstate->raw_buf + cstate->input_buf_len,
537 cstate->raw_buf_len - cstate->input_buf_len);
538 }
539 else
540 {
541 /*
542 * raw_buf_index points to the invalid or untranslatable character. We
543 * let the conversion routine report the error, because it can provide
544 * a more specific error message than we could here. An earlier call
545 * to the conversion routine in CopyConvertBuf() detected that there
546 * is an error, now we call the conversion routine again with
547 * noError=false, to have it throw the error.
548 */
549 unsigned char *src;
550 int srclen;
551 unsigned char *dst;
552 int dstlen;
553
554 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
555 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
556 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
557 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
558
560 cstate->file_encoding,
562 src, srclen,
563 dst, dstlen,
564 false);
565
566 /*
567 * The conversion routine should have reported an error, so this
568 * should not be reached.
569 */
570 elog(ERROR, "encoding conversion failed without error");
571 }
572}
#define Assert(condition)
Definition: c.h:812
#define INPUT_BUF_SIZE
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:469
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698

References Assert, CopyFromStateData::conversion_proc, elog, ERROR, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and report_invalid_encoding().

Referenced by CopyLoadInputBuf().

◆ CopyConvertBuf()

static void CopyConvertBuf ( CopyFromState  cstate)
static

Definition at line 391 of file copyfromparse.c.

392{
393 /*
394 * If the file and server encoding are the same, no encoding conversion is
395 * required. However, we still need to verify that the input is valid for
396 * the encoding.
397 */
398 if (!cstate->need_transcoding)
399 {
400 /*
401 * When conversion is not required, input_buf and raw_buf are the
402 * same. raw_buf_len is the total number of bytes in the buffer, and
403 * input_buf_len tracks how many of those bytes have already been
404 * verified.
405 */
406 int preverifiedlen = cstate->input_buf_len;
407 int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
408 int nverified;
409
410 if (unverifiedlen == 0)
411 {
412 /*
413 * If no more raw data is coming, report the EOF to the caller.
414 */
415 if (cstate->raw_reached_eof)
416 cstate->input_reached_eof = true;
417 return;
418 }
419
420 /*
421 * Verify the new data, including any residual unverified bytes from
422 * previous round.
423 */
424 nverified = pg_encoding_verifymbstr(cstate->file_encoding,
425 cstate->raw_buf + preverifiedlen,
426 unverifiedlen);
427 if (nverified == 0)
428 {
429 /*
430 * Could not verify anything.
431 *
432 * If there is no more raw input data coming, it means that there
433 * was an incomplete multi-byte sequence at the end. Also, if
434 * there's "enough" input left, we should be able to verify at
435 * least one character, and a failure to do so means that we've
436 * hit an invalid byte sequence.
437 */
438 if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
439 cstate->input_reached_error = true;
440 return;
441 }
442 cstate->input_buf_len += nverified;
443 }
444 else
445 {
446 /*
447 * Encoding conversion is needed.
448 */
449 int nbytes;
450 unsigned char *src;
451 int srclen;
452 unsigned char *dst;
453 int dstlen;
454 int convertedlen;
455
456 if (RAW_BUF_BYTES(cstate) == 0)
457 {
458 /*
459 * If no more raw data is coming, report the EOF to the caller.
460 */
461 if (cstate->raw_reached_eof)
462 cstate->input_reached_eof = true;
463 return;
464 }
465
466 /*
467 * First, copy down any unprocessed data.
468 */
469 nbytes = INPUT_BUF_BYTES(cstate);
470 if (nbytes > 0 && cstate->input_buf_index > 0)
471 memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
472 nbytes);
473 cstate->input_buf_index = 0;
474 cstate->input_buf_len = nbytes;
475 cstate->input_buf[nbytes] = '\0';
476
477 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
478 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
479 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
480 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
481
482 /*
483 * Do the conversion. This might stop short, if there is an invalid
484 * byte sequence in the input. We'll convert as much as we can in
485 * that case.
486 *
487 * Note: Even if we hit an invalid byte sequence, we don't report the
488 * error until all the valid bytes have been consumed. The input
489 * might contain an end-of-input marker (\.), and we don't want to
490 * report an error if the invalid byte sequence is after the
491 * end-of-input marker. We might unnecessarily convert some data
492 * after the end-of-input marker as long as it's valid for the
493 * encoding, but that's harmless.
494 */
495 convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
496 cstate->file_encoding,
498 src, srclen,
499 dst, dstlen,
500 true);
501 if (convertedlen == 0)
502 {
503 /*
504 * Could not convert anything. If there is no more raw input data
505 * coming, it means that there was an incomplete multi-byte
506 * sequence at the end. Also, if there is plenty of input left,
507 * we should be able to convert at least one character, so a
508 * failure to do so must mean that we've hit a byte sequence
509 * that's invalid.
510 */
511 if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
512 cstate->input_reached_error = true;
513 return;
514 }
515 cstate->raw_buf_index += convertedlen;
516 cstate->input_buf_len += strlen((char *) dst);
517 }
518}
#define RAW_BUF_BYTES(cstate)
#define INPUT_BUF_BYTES(cstate)
#define MAX_CONVERSION_INPUT_LENGTH
Definition: pg_wchar.h:320
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition: wchar.c:2116
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2127

References CopyFromStateData::conversion_proc, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, MAX_CONVERSION_INPUT_LENGTH, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), pg_encoding_max_length(), pg_encoding_verifymbstr(), CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf().

◆ CopyGetData()

static int CopyGetData ( CopyFromState  cstate,
void *  databuf,
int  minread,
int  maxread 
)
static

Definition at line 236 of file copyfromparse.c.

237{
238 int bytesread = 0;
239
240 switch (cstate->copy_src)
241 {
242 case COPY_FILE:
243 bytesread = fread(databuf, 1, maxread, cstate->copy_file);
244 if (ferror(cstate->copy_file))
247 errmsg("could not read from COPY file: %m")));
248 if (bytesread == 0)
249 cstate->raw_reached_eof = true;
250 break;
251 case COPY_FRONTEND:
252 while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
253 {
254 int avail;
255
256 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
257 {
258 /* Try to receive another message */
259 int mtype;
260 int maxmsglen;
261
262 readmessage:
265 mtype = pq_getbyte();
266 if (mtype == EOF)
268 (errcode(ERRCODE_CONNECTION_FAILURE),
269 errmsg("unexpected EOF on client connection with an open transaction")));
270 /* Validate message type and set packet size limit */
271 switch (mtype)
272 {
273 case PqMsg_CopyData:
274 maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
275 break;
276 case PqMsg_CopyDone:
277 case PqMsg_CopyFail:
278 case PqMsg_Flush:
279 case PqMsg_Sync:
280 maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
281 break;
282 default:
284 (errcode(ERRCODE_PROTOCOL_VIOLATION),
285 errmsg("unexpected message type 0x%02X during COPY from stdin",
286 mtype)));
287 maxmsglen = 0; /* keep compiler quiet */
288 break;
289 }
290 /* Now collect the message body */
291 if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
293 (errcode(ERRCODE_CONNECTION_FAILURE),
294 errmsg("unexpected EOF on client connection with an open transaction")));
296 /* ... and process it */
297 switch (mtype)
298 {
299 case PqMsg_CopyData:
300 break;
301 case PqMsg_CopyDone:
302 /* COPY IN correctly terminated by frontend */
303 cstate->raw_reached_eof = true;
304 return bytesread;
305 case PqMsg_CopyFail:
307 (errcode(ERRCODE_QUERY_CANCELED),
308 errmsg("COPY from stdin failed: %s",
309 pq_getmsgstring(cstate->fe_msgbuf))));
310 break;
311 case PqMsg_Flush:
312 case PqMsg_Sync:
313
314 /*
315 * Ignore Flush/Sync for the convenience of client
316 * libraries (such as libpq) that may send those
317 * without noticing that the command they just
318 * sent was COPY.
319 */
320 goto readmessage;
321 default:
322 Assert(false); /* NOT REACHED */
323 }
324 }
325 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
326 if (avail > maxread)
327 avail = maxread;
328 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
329 databuf = (void *) ((char *) databuf + avail);
330 maxread -= avail;
331 bytesread += avail;
332 }
333 break;
334 case COPY_CALLBACK:
335 bytesread = cstate->data_source_cb(databuf, minread, maxread);
336 break;
337 }
338
339 return bytesread;
340}
@ COPY_FILE
Definition: copyto.c:45
@ COPY_CALLBACK
Definition: copyto.c:47
@ COPY_FRONTEND
Definition: copyto.c:46
int errcode_for_file_access(void)
Definition: elog.c:876
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:143
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1203
int pq_getbyte(void)
Definition: pqcomm.c:964
void pq_startmsgread(void)
Definition: pqcomm.c:1141
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:528
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:579
#define PqMsg_CopyDone
Definition: protocol.h:64
#define PqMsg_CopyData
Definition: protocol.h:65
#define PqMsg_Sync
Definition: protocol.h:27
#define PqMsg_CopyFail
Definition: protocol.h:29
#define PqMsg_Flush
Definition: protocol.h:24
copy_data_source_cb data_source_cb

References Assert, COPY_CALLBACK, COPY_FILE, CopyFromStateData::copy_file, COPY_FRONTEND, CopyFromStateData::copy_src, StringInfoData::cursor, CopyFromStateData::data_source_cb, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, CopyFromStateData::fe_msgbuf, HOLD_CANCEL_INTERRUPTS, StringInfoData::len, pq_copymsgbytes(), pq_getbyte(), pq_getmessage(), pq_getmsgstring(), PQ_LARGE_MESSAGE_LIMIT, PQ_SMALL_MESSAGE_LIMIT, pq_startmsgread(), PqMsg_CopyData, PqMsg_CopyDone, PqMsg_CopyFail, PqMsg_Flush, PqMsg_Sync, CopyFromStateData::raw_reached_eof, and RESUME_CANCEL_INTERRUPTS.

Referenced by CopyLoadRawBuf(), and CopyReadLine().

◆ CopyGetInt16()

static bool CopyGetInt16 ( CopyFromState  cstate,
int16 val 
)
inlinestatic

Definition at line 370 of file copyfromparse.c.

371{
372 uint16 buf;
373
374 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
375 {
376 *val = 0; /* suppress compiler warning */
377 return false;
378 }
379 *val = (int16) pg_ntoh16(buf);
380 return true;
381}
int16_t int16
Definition: c.h:480
uint16_t uint16
Definition: c.h:484
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
long val
Definition: informix.c:689
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static char * buf
Definition: pg_test_fsync.c:72

References buf, CopyReadBinaryData(), pg_ntoh16, and val.

Referenced by NextCopyFrom().

◆ CopyGetInt32()

static bool CopyGetInt32 ( CopyFromState  cstate,
int32 val 
)
inlinestatic

Definition at line 353 of file copyfromparse.c.

354{
355 uint32 buf;
356
357 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
358 {
359 *val = 0; /* suppress compiler warning */
360 return false;
361 }
362 *val = (int32) pg_ntoh32(buf);
363 return true;
364}
int32_t int32
Definition: c.h:481
uint32_t uint32
Definition: c.h:485
#define pg_ntoh32(x)
Definition: pg_bswap.h:125

References buf, CopyReadBinaryData(), pg_ntoh32, and val.

Referenced by CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

◆ CopyLoadInputBuf()

static void CopyLoadInputBuf ( CopyFromState  cstate)
static

Definition at line 641 of file copyfromparse.c.

642{
643 int nbytes = INPUT_BUF_BYTES(cstate);
644
645 /*
646 * The caller has updated input_buf_index to indicate how much of the
647 * input has been consumed and isn't needed anymore. If input_buf is the
648 * same physical area as raw_buf, update raw_buf_index accordingly.
649 */
650 if (cstate->raw_buf == cstate->input_buf)
651 {
652 Assert(!cstate->need_transcoding);
653 Assert(cstate->input_buf_index >= cstate->raw_buf_index);
654 cstate->raw_buf_index = cstate->input_buf_index;
655 }
656
657 for (;;)
658 {
659 /* If we now have some unconverted data, try to convert it */
660 CopyConvertBuf(cstate);
661
662 /* If we now have some more input bytes ready, return them */
663 if (INPUT_BUF_BYTES(cstate) > nbytes)
664 return;
665
666 /*
667 * If we reached an invalid byte sequence, or we're at an incomplete
668 * multi-byte character but there is no more raw input data, report
669 * conversion error.
670 */
671 if (cstate->input_reached_error)
672 CopyConversionError(cstate);
673
674 /* no more input, and everything has been converted */
675 if (cstate->input_reached_eof)
676 break;
677
678 /* Try to load more raw data */
679 Assert(!cstate->raw_reached_eof);
680 CopyLoadRawBuf(cstate);
681 }
682}
static void CopyConversionError(CopyFromState cstate)
static void CopyLoadRawBuf(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)

References Assert, CopyConversionError(), CopyConvertBuf(), CopyLoadRawBuf(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyReadLineText().

◆ CopyLoadRawBuf()

static void CopyLoadRawBuf ( CopyFromState  cstate)
static

Definition at line 581 of file copyfromparse.c.

582{
583 int nbytes;
584 int inbytes;
585
586 /*
587 * In text mode, if encoding conversion is not required, raw_buf and
588 * input_buf point to the same buffer. Their len/index better agree, too.
589 */
590 if (cstate->raw_buf == cstate->input_buf)
591 {
592 Assert(!cstate->need_transcoding);
593 Assert(cstate->raw_buf_index == cstate->input_buf_index);
594 Assert(cstate->input_buf_len <= cstate->raw_buf_len);
595 }
596
597 /*
598 * Copy down the unprocessed data if any.
599 */
600 nbytes = RAW_BUF_BYTES(cstate);
601 if (nbytes > 0 && cstate->raw_buf_index > 0)
602 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
603 nbytes);
604 cstate->raw_buf_len -= cstate->raw_buf_index;
605 cstate->raw_buf_index = 0;
606
607 /*
608 * If raw_buf and input_buf are in fact the same buffer, adjust the
609 * input_buf variables, too.
610 */
611 if (cstate->raw_buf == cstate->input_buf)
612 {
613 cstate->input_buf_len -= cstate->input_buf_index;
614 cstate->input_buf_index = 0;
615 }
616
617 /* Load more data */
618 inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
619 1, RAW_BUF_SIZE - cstate->raw_buf_len);
620 nbytes += inbytes;
621 cstate->raw_buf[nbytes] = '\0';
622 cstate->raw_buf_len = nbytes;
623
624 cstate->bytes_processed += inbytes;
626
627 if (inbytes == 0)
628 cstate->raw_reached_eof = true;
629}
void pgstat_progress_update_param(int index, int64 val)
#define RAW_BUF_SIZE
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:140

References Assert, CopyFromStateData::bytes_processed, CopyGetData(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::need_transcoding, pgstat_progress_update_param(), PROGRESS_COPY_BYTES_PROCESSED, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, RAW_BUF_SIZE, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf(), and CopyReadBinaryData().

◆ CopyReadAttributesCSV()

static int CopyReadAttributesCSV ( CopyFromState  cstate)
static

Definition at line 1750 of file copyfromparse.c.

1751{
1752 char delimc = cstate->opts.delim[0];
1753 char quotec = cstate->opts.quote[0];
1754 char escapec = cstate->opts.escape[0];
1755 int fieldno;
1756 char *output_ptr;
1757 char *cur_ptr;
1758 char *line_end_ptr;
1759
1760 /*
1761 * We need a special case for zero-column tables: check that the input
1762 * line is empty, and return.
1763 */
1764 if (cstate->max_fields <= 0)
1765 {
1766 if (cstate->line_buf.len != 0)
1767 ereport(ERROR,
1768 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1769 errmsg("extra data after last expected column")));
1770 return 0;
1771 }
1772
1774
1775 /*
1776 * The de-escaped attributes will certainly not be longer than the input
1777 * data line, so we can just force attribute_buf to be large enough and
1778 * then transfer data without any checks for enough space. We need to do
1779 * it this way because enlarging attribute_buf mid-stream would invalidate
1780 * pointers already stored into cstate->raw_fields[].
1781 */
1782 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1783 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1784 output_ptr = cstate->attribute_buf.data;
1785
1786 /* set pointer variables for loop */
1787 cur_ptr = cstate->line_buf.data;
1788 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1789
1790 /* Outer loop iterates over fields */
1791 fieldno = 0;
1792 for (;;)
1793 {
1794 bool found_delim = false;
1795 bool saw_quote = false;
1796 char *start_ptr;
1797 char *end_ptr;
1798 int input_len;
1799
1800 /* Make sure there is enough space for the next value */
1801 if (fieldno >= cstate->max_fields)
1802 {
1803 cstate->max_fields *= 2;
1804 cstate->raw_fields =
1805 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1806 }
1807
1808 /* Remember start of field on both input and output sides */
1809 start_ptr = cur_ptr;
1810 cstate->raw_fields[fieldno] = output_ptr;
1811
1812 /*
1813 * Scan data for field,
1814 *
1815 * The loop starts in "not quote" mode and then toggles between that
1816 * and "in quote" mode. The loop exits normally if it is in "not
1817 * quote" mode and a delimiter or line end is seen.
1818 */
1819 for (;;)
1820 {
1821 char c;
1822
1823 /* Not in quote */
1824 for (;;)
1825 {
1826 end_ptr = cur_ptr;
1827 if (cur_ptr >= line_end_ptr)
1828 goto endfield;
1829 c = *cur_ptr++;
1830 /* unquoted field delimiter */
1831 if (c == delimc)
1832 {
1833 found_delim = true;
1834 goto endfield;
1835 }
1836 /* start of quoted field (or part of field) */
1837 if (c == quotec)
1838 {
1839 saw_quote = true;
1840 break;
1841 }
1842 /* Add c to output string */
1843 *output_ptr++ = c;
1844 }
1845
1846 /* In quote */
1847 for (;;)
1848 {
1849 end_ptr = cur_ptr;
1850 if (cur_ptr >= line_end_ptr)
1851 ereport(ERROR,
1852 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1853 errmsg("unterminated CSV quoted field")));
1854
1855 c = *cur_ptr++;
1856
1857 /* escape within a quoted field */
1858 if (c == escapec)
1859 {
1860 /*
1861 * peek at the next char if available, and escape it if it
1862 * is an escape char or a quote char
1863 */
1864 if (cur_ptr < line_end_ptr)
1865 {
1866 char nextc = *cur_ptr;
1867
1868 if (nextc == escapec || nextc == quotec)
1869 {
1870 *output_ptr++ = nextc;
1871 cur_ptr++;
1872 continue;
1873 }
1874 }
1875 }
1876
1877 /*
1878 * end of quoted field. Must do this test after testing for
1879 * escape in case quote char and escape char are the same
1880 * (which is the common case).
1881 */
1882 if (c == quotec)
1883 break;
1884
1885 /* Add c to output string */
1886 *output_ptr++ = c;
1887 }
1888 }
1889endfield:
1890
1891 /* Terminate attribute value in output area */
1892 *output_ptr++ = '\0';
1893
1894 /* Check whether raw input matched null marker */
1895 input_len = end_ptr - start_ptr;
1896 if (!saw_quote && input_len == cstate->opts.null_print_len &&
1897 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1898 cstate->raw_fields[fieldno] = NULL;
1899 /* Check whether raw input matched default marker */
1900 else if (fieldno < list_length(cstate->attnumlist) &&
1901 cstate->opts.default_print &&
1902 input_len == cstate->opts.default_print_len &&
1903 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1904 {
1905 /* fieldno is 0-index and attnum is 1-index */
1906 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1907
1908 if (cstate->defexprs[m] != NULL)
1909 {
1910 /* defaults contain entries for all physical attributes */
1911 cstate->defaults[m] = true;
1912 }
1913 else
1914 {
1915 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1916 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1917
1918 ereport(ERROR,
1919 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1920 errmsg("unexpected default marker in COPY data"),
1921 errdetail("Column \"%s\" has no default value.",
1922 NameStr(att->attname))));
1923 }
1924 }
1925
1926 fieldno++;
1927 /* Done if we hit EOL instead of a delim */
1928 if (!found_delim)
1929 break;
1930 }
1931
1932 /* Clean up state of attribute_buf */
1933 output_ptr--;
1934 Assert(*output_ptr == '\0');
1935 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1936
1937 return fieldno;
1938}
#define NameStr(name)
Definition: c.h:700
int errdetail(const char *fmt,...)
Definition: elog.c:1203
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
static int list_length(const List *l)
Definition: pg_list.h:152
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
char * c
#define RelationGetDescr(relation)
Definition: rel.h:531
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:286
int default_print_len
Definition: copy.h:72
int null_print_len
Definition: copy.h:69
char * quote
Definition: copy.h:74
char * escape
Definition: copy.h:75
char * null_print
Definition: copy.h:68
char * delim
Definition: copy.h:73
char * default_print
Definition: copy.h:71
StringInfoData line_buf
CopyFormatOptions opts
StringInfoData attribute_buf
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:152

References Assert, CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg(), ERROR, CopyFormatOptions::escape, StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, CopyFromStateData::opts, CopyFormatOptions::quote, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), and TupleDescAttr().

Referenced by NextCopyFromRawFields().

◆ CopyReadAttributesText()

static int CopyReadAttributesText ( CopyFromState  cstate)
static

Definition at line 1496 of file copyfromparse.c.

1497{
1498 char delimc = cstate->opts.delim[0];
1499 int fieldno;
1500 char *output_ptr;
1501 char *cur_ptr;
1502 char *line_end_ptr;
1503
1504 /*
1505 * We need a special case for zero-column tables: check that the input
1506 * line is empty, and return.
1507 */
1508 if (cstate->max_fields <= 0)
1509 {
1510 if (cstate->line_buf.len != 0)
1511 ereport(ERROR,
1512 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1513 errmsg("extra data after last expected column")));
1514 return 0;
1515 }
1516
1518
1519 /*
1520 * The de-escaped attributes will certainly not be longer than the input
1521 * data line, so we can just force attribute_buf to be large enough and
1522 * then transfer data without any checks for enough space. We need to do
1523 * it this way because enlarging attribute_buf mid-stream would invalidate
1524 * pointers already stored into cstate->raw_fields[].
1525 */
1526 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1527 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1528 output_ptr = cstate->attribute_buf.data;
1529
1530 /* set pointer variables for loop */
1531 cur_ptr = cstate->line_buf.data;
1532 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1533
1534 /* Outer loop iterates over fields */
1535 fieldno = 0;
1536 for (;;)
1537 {
1538 bool found_delim = false;
1539 char *start_ptr;
1540 char *end_ptr;
1541 int input_len;
1542 bool saw_non_ascii = false;
1543
1544 /* Make sure there is enough space for the next value */
1545 if (fieldno >= cstate->max_fields)
1546 {
1547 cstate->max_fields *= 2;
1548 cstate->raw_fields =
1549 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1550 }
1551
1552 /* Remember start of field on both input and output sides */
1553 start_ptr = cur_ptr;
1554 cstate->raw_fields[fieldno] = output_ptr;
1555
1556 /*
1557 * Scan data for field.
1558 *
1559 * Note that in this loop, we are scanning to locate the end of field
1560 * and also speculatively performing de-escaping. Once we find the
1561 * end-of-field, we can match the raw field contents against the null
1562 * marker string. Only after that comparison fails do we know that
1563 * de-escaping is actually the right thing to do; therefore we *must
1564 * not* throw any syntax errors before we've done the null-marker
1565 * check.
1566 */
1567 for (;;)
1568 {
1569 char c;
1570
1571 end_ptr = cur_ptr;
1572 if (cur_ptr >= line_end_ptr)
1573 break;
1574 c = *cur_ptr++;
1575 if (c == delimc)
1576 {
1577 found_delim = true;
1578 break;
1579 }
1580 if (c == '\\')
1581 {
1582 if (cur_ptr >= line_end_ptr)
1583 break;
1584 c = *cur_ptr++;
1585 switch (c)
1586 {
1587 case '0':
1588 case '1':
1589 case '2':
1590 case '3':
1591 case '4':
1592 case '5':
1593 case '6':
1594 case '7':
1595 {
1596 /* handle \013 */
1597 int val;
1598
1599 val = OCTVALUE(c);
1600 if (cur_ptr < line_end_ptr)
1601 {
1602 c = *cur_ptr;
1603 if (ISOCTAL(c))
1604 {
1605 cur_ptr++;
1606 val = (val << 3) + OCTVALUE(c);
1607 if (cur_ptr < line_end_ptr)
1608 {
1609 c = *cur_ptr;
1610 if (ISOCTAL(c))
1611 {
1612 cur_ptr++;
1613 val = (val << 3) + OCTVALUE(c);
1614 }
1615 }
1616 }
1617 }
1618 c = val & 0377;
1619 if (c == '\0' || IS_HIGHBIT_SET(c))
1620 saw_non_ascii = true;
1621 }
1622 break;
1623 case 'x':
1624 /* Handle \x3F */
1625 if (cur_ptr < line_end_ptr)
1626 {
1627 char hexchar = *cur_ptr;
1628
1629 if (isxdigit((unsigned char) hexchar))
1630 {
1631 int val = GetDecimalFromHex(hexchar);
1632
1633 cur_ptr++;
1634 if (cur_ptr < line_end_ptr)
1635 {
1636 hexchar = *cur_ptr;
1637 if (isxdigit((unsigned char) hexchar))
1638 {
1639 cur_ptr++;
1640 val = (val << 4) + GetDecimalFromHex(hexchar);
1641 }
1642 }
1643 c = val & 0xff;
1644 if (c == '\0' || IS_HIGHBIT_SET(c))
1645 saw_non_ascii = true;
1646 }
1647 }
1648 break;
1649 case 'b':
1650 c = '\b';
1651 break;
1652 case 'f':
1653 c = '\f';
1654 break;
1655 case 'n':
1656 c = '\n';
1657 break;
1658 case 'r':
1659 c = '\r';
1660 break;
1661 case 't':
1662 c = '\t';
1663 break;
1664 case 'v':
1665 c = '\v';
1666 break;
1667
1668 /*
1669 * in all other cases, take the char after '\'
1670 * literally
1671 */
1672 }
1673 }
1674
1675 /* Add c to output string */
1676 *output_ptr++ = c;
1677 }
1678
1679 /* Check whether raw input matched null marker */
1680 input_len = end_ptr - start_ptr;
1681 if (input_len == cstate->opts.null_print_len &&
1682 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1683 cstate->raw_fields[fieldno] = NULL;
1684 /* Check whether raw input matched default marker */
1685 else if (fieldno < list_length(cstate->attnumlist) &&
1686 cstate->opts.default_print &&
1687 input_len == cstate->opts.default_print_len &&
1688 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1689 {
1690 /* fieldno is 0-indexed and attnum is 1-indexed */
1691 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1692
1693 if (cstate->defexprs[m] != NULL)
1694 {
1695 /* defaults contain entries for all physical attributes */
1696 cstate->defaults[m] = true;
1697 }
1698 else
1699 {
1700 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1701 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1702
1703 ereport(ERROR,
1704 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1705 errmsg("unexpected default marker in COPY data"),
1706 errdetail("Column \"%s\" has no default value.",
1707 NameStr(att->attname))));
1708 }
1709 }
1710 else
1711 {
1712 /*
1713 * At this point we know the field is supposed to contain data.
1714 *
1715 * If we de-escaped any non-7-bit-ASCII chars, make sure the
1716 * resulting string is valid data for the db encoding.
1717 */
1718 if (saw_non_ascii)
1719 {
1720 char *fld = cstate->raw_fields[fieldno];
1721
1722 pg_verifymbstr(fld, output_ptr - fld, false);
1723 }
1724 }
1725
1726 /* Terminate attribute value in output area */
1727 *output_ptr++ = '\0';
1728
1729 fieldno++;
1730 /* Done if we hit EOL instead of a delim */
1731 if (!found_delim)
1732 break;
1733 }
1734
1735 /* Clean up state of attribute_buf */
1736 output_ptr--;
1737 Assert(*output_ptr == '\0');
1738 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1739
1740 return fieldno;
1741}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1109
#define OCTVALUE(c)
Definition: copyfromparse.c:79
#define ISOCTAL(c)
Definition: copyfromparse.c:78
static int GetDecimalFromHex(char hex)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556

References Assert, CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg(), ERROR, GetDecimalFromHex(), if(), IS_HIGHBIT_SET, ISOCTAL, StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, OCTVALUE, CopyFromStateData::opts, pg_verifymbstr(), CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), TupleDescAttr(), and val.

Referenced by NextCopyFromRawFields().

◆ CopyReadBinaryAttribute()

static Datum CopyReadBinaryAttribute ( CopyFromState  cstate,
FmgrInfo flinfo,
Oid  typioparam,
int32  typmod,
bool *  isnull 
)
static

Definition at line 1945 of file copyfromparse.c.

1948{
1949 int32 fld_size;
1950 Datum result;
1951
1952 if (!CopyGetInt32(cstate, &fld_size))
1953 ereport(ERROR,
1954 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1955 errmsg("unexpected EOF in COPY data")));
1956 if (fld_size == -1)
1957 {
1958 *isnull = true;
1959 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1960 }
1961 if (fld_size < 0)
1962 ereport(ERROR,
1963 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1964 errmsg("invalid field size")));
1965
1966 /* reset attribute_buf to empty, and load raw data in it */
1968
1969 enlargeStringInfo(&cstate->attribute_buf, fld_size);
1970 if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1971 fld_size) != fld_size)
1972 ereport(ERROR,
1973 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1974 errmsg("unexpected EOF in COPY data")));
1975
1976 cstate->attribute_buf.len = fld_size;
1977 cstate->attribute_buf.data[fld_size] = '\0';
1978
1979 /* Call the column type's binary input converter */
1980 result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1981 typioparam, typmod);
1982
1983 /* Trouble if it didn't eat the whole buffer */
1984 if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1985 ereport(ERROR,
1986 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1987 errmsg("incorrect binary data format")));
1988
1989 *isnull = false;
1990 return result;
1991}
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1697
uintptr_t Datum
Definition: postgres.h:64

References CopyFromStateData::attribute_buf, CopyGetInt32(), CopyReadBinaryData(), StringInfoData::cursor, StringInfoData::data, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, StringInfoData::len, ReceiveFunctionCall(), and resetStringInfo().

Referenced by NextCopyFrom().

◆ CopyReadBinaryData()

static int CopyReadBinaryData ( CopyFromState  cstate,
char *  dest,
int  nbytes 
)
static

Definition at line 692 of file copyfromparse.c.

693{
694 int copied_bytes = 0;
695
696 if (RAW_BUF_BYTES(cstate) >= nbytes)
697 {
698 /* Enough bytes are present in the buffer. */
699 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
700 cstate->raw_buf_index += nbytes;
701 copied_bytes = nbytes;
702 }
703 else
704 {
705 /*
706 * Not enough bytes in the buffer, so must read from the file. Need
707 * to loop since 'nbytes' could be larger than the buffer size.
708 */
709 do
710 {
711 int copy_bytes;
712
713 /* Load more data if buffer is empty. */
714 if (RAW_BUF_BYTES(cstate) == 0)
715 {
716 CopyLoadRawBuf(cstate);
717 if (cstate->raw_reached_eof)
718 break; /* EOF */
719 }
720
721 /* Transfer some bytes. */
722 copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
723 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
724 cstate->raw_buf_index += copy_bytes;
725 dest += copy_bytes;
726 copied_bytes += copy_bytes;
727 } while (copied_bytes < nbytes);
728 }
729
730 return copied_bytes;
731}
#define Min(x, y)
Definition: c.h:958

References CopyLoadRawBuf(), generate_unaccent_rules::dest, Min, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyGetInt16(), CopyGetInt32(), CopyReadBinaryAttribute(), NextCopyFrom(), and ReceiveCopyBinaryHeader().

◆ CopyReadLine()

static bool CopyReadLine ( CopyFromState  cstate)
static

Definition at line 1090 of file copyfromparse.c.

1091{
1092 bool result;
1093
1094 resetStringInfo(&cstate->line_buf);
1095 cstate->line_buf_valid = false;
1096
1097 /* Parse data and transfer into line_buf */
1098 result = CopyReadLineText(cstate);
1099
1100 if (result)
1101 {
1102 /*
1103 * Reached EOF. In protocol version 3, we should ignore anything
1104 * after \. up to the protocol end of copy data. (XXX maybe better
1105 * not to treat \. as special?)
1106 */
1107 if (cstate->copy_src == COPY_FRONTEND)
1108 {
1109 int inbytes;
1110
1111 do
1112 {
1113 inbytes = CopyGetData(cstate, cstate->input_buf,
1114 1, INPUT_BUF_SIZE);
1115 } while (inbytes > 0);
1116 cstate->input_buf_index = 0;
1117 cstate->input_buf_len = 0;
1118 cstate->raw_buf_index = 0;
1119 cstate->raw_buf_len = 0;
1120 }
1121 }
1122 else
1123 {
1124 /*
1125 * If we didn't hit EOF, then we must have transferred the EOL marker
1126 * to line_buf along with the data. Get rid of it.
1127 */
1128 switch (cstate->eol_type)
1129 {
1130 case EOL_NL:
1131 Assert(cstate->line_buf.len >= 1);
1132 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1133 cstate->line_buf.len--;
1134 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1135 break;
1136 case EOL_CR:
1137 Assert(cstate->line_buf.len >= 1);
1138 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1139 cstate->line_buf.len--;
1140 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1141 break;
1142 case EOL_CRNL:
1143 Assert(cstate->line_buf.len >= 2);
1144 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1145 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1146 cstate->line_buf.len -= 2;
1147 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1148 break;
1149 case EOL_UNKNOWN:
1150 /* shouldn't get here */
1151 Assert(false);
1152 break;
1153 }
1154 }
1155
1156 /* Now it's safe to use the buffer in error messages */
1157 cstate->line_buf_valid = true;
1158
1159 return result;
1160}
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
static bool CopyReadLineText(CopyFromState cstate)

References Assert, COPY_FRONTEND, CopyFromStateData::copy_src, CopyGetData(), CopyReadLineText(), StringInfoData::data, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::line_buf_valid, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and resetStringInfo().

Referenced by NextCopyFromRawFields().

◆ CopyReadLineText()

static bool CopyReadLineText ( CopyFromState  cstate)
static

Definition at line 1166 of file copyfromparse.c.

1167{
1168 char *copy_input_buf;
1169 int input_buf_ptr;
1170 int copy_buf_len;
1171 bool need_data = false;
1172 bool hit_eof = false;
1173 bool result = false;
1174
1175 /* CSV variables */
1176 bool in_quote = false,
1177 last_was_esc = false;
1178 char quotec = '\0';
1179 char escapec = '\0';
1180
1181 if (cstate->opts.csv_mode)
1182 {
1183 quotec = cstate->opts.quote[0];
1184 escapec = cstate->opts.escape[0];
1185 /* ignore special escape processing if it's the same as quotec */
1186 if (quotec == escapec)
1187 escapec = '\0';
1188 }
1189
1190 /*
1191 * The objective of this loop is to transfer the entire next input line
1192 * into line_buf. Hence, we only care for detecting newlines (\r and/or
1193 * \n) and the end-of-copy marker (\.).
1194 *
1195 * In CSV mode, \r and \n inside a quoted field are just part of the data
1196 * value and are put in line_buf. We keep just enough state to know if we
1197 * are currently in a quoted field or not.
1198 *
1199 * The input has already been converted to the database encoding. All
1200 * supported server encodings have the property that all bytes in a
1201 * multi-byte sequence have the high bit set, so a multibyte character
1202 * cannot contain any newline or escape characters embedded in the
1203 * multibyte sequence. Therefore, we can process the input byte-by-byte,
1204 * regardless of the encoding.
1205 *
1206 * For speed, we try to move data from input_buf to line_buf in chunks
1207 * rather than one character at a time. input_buf_ptr points to the next
1208 * character to examine; any characters from input_buf_index to
1209 * input_buf_ptr have been determined to be part of the line, but not yet
1210 * transferred to line_buf.
1211 *
1212 * For a little extra speed within the loop, we copy input_buf and
1213 * input_buf_len into local variables.
1214 */
1215 copy_input_buf = cstate->input_buf;
1216 input_buf_ptr = cstate->input_buf_index;
1217 copy_buf_len = cstate->input_buf_len;
1218
1219 for (;;)
1220 {
1221 int prev_raw_ptr;
1222 char c;
1223
1224 /*
1225 * Load more data if needed.
1226 *
1227 * TODO: We could just force four bytes of read-ahead and avoid the
1228 * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1229 * unsafe with the old v2 COPY protocol, but we don't support that
1230 * anymore.
1231 */
1232 if (input_buf_ptr >= copy_buf_len || need_data)
1233 {
1235
1236 CopyLoadInputBuf(cstate);
1237 /* update our local variables */
1238 hit_eof = cstate->input_reached_eof;
1239 input_buf_ptr = cstate->input_buf_index;
1240 copy_buf_len = cstate->input_buf_len;
1241
1242 /*
1243 * If we are completely out of data, break out of the loop,
1244 * reporting EOF.
1245 */
1246 if (INPUT_BUF_BYTES(cstate) <= 0)
1247 {
1248 result = true;
1249 break;
1250 }
1251 need_data = false;
1252 }
1253
1254 /* OK to fetch a character */
1255 prev_raw_ptr = input_buf_ptr;
1256 c = copy_input_buf[input_buf_ptr++];
1257
1258 if (cstate->opts.csv_mode)
1259 {
1260 /*
1261 * If character is '\r', we may need to look ahead below. Force
1262 * fetch of the next character if we don't already have it. We
1263 * need to do this before changing CSV state, in case '\r' is also
1264 * the quote or escape character.
1265 */
1266 if (c == '\r')
1267 {
1269 }
1270
1271 /*
1272 * Dealing with quotes and escapes here is mildly tricky. If the
1273 * quote char is also the escape char, there's no problem - we
1274 * just use the char as a toggle. If they are different, we need
1275 * to ensure that we only take account of an escape inside a
1276 * quoted field and immediately preceding a quote char, and not
1277 * the second in an escape-escape sequence.
1278 */
1279 if (in_quote && c == escapec)
1280 last_was_esc = !last_was_esc;
1281 if (c == quotec && !last_was_esc)
1282 in_quote = !in_quote;
1283 if (c != escapec)
1284 last_was_esc = false;
1285
1286 /*
1287 * Updating the line count for embedded CR and/or LF chars is
1288 * necessarily a little fragile - this test is probably about the
1289 * best we can do. (XXX it's arguable whether we should do this
1290 * at all --- is cur_lineno a physical or logical count?)
1291 */
1292 if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1293 cstate->cur_lineno++;
1294 }
1295
1296 /* Process \r */
1297 if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1298 {
1299 /* Check for \r\n on first line, _and_ handle \r\n. */
1300 if (cstate->eol_type == EOL_UNKNOWN ||
1301 cstate->eol_type == EOL_CRNL)
1302 {
1303 /*
1304 * If need more data, go back to loop top to load it.
1305 *
1306 * Note that if we are at EOF, c will wind up as '\0' because
1307 * of the guaranteed pad of input_buf.
1308 */
1310
1311 /* get next char */
1312 c = copy_input_buf[input_buf_ptr];
1313
1314 if (c == '\n')
1315 {
1316 input_buf_ptr++; /* eat newline */
1317 cstate->eol_type = EOL_CRNL; /* in case not set yet */
1318 }
1319 else
1320 {
1321 /* found \r, but no \n */
1322 if (cstate->eol_type == EOL_CRNL)
1323 ereport(ERROR,
1324 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1325 !cstate->opts.csv_mode ?
1326 errmsg("literal carriage return found in data") :
1327 errmsg("unquoted carriage return found in data"),
1328 !cstate->opts.csv_mode ?
1329 errhint("Use \"\\r\" to represent carriage return.") :
1330 errhint("Use quoted CSV field to represent carriage return.")));
1331
1332 /*
1333 * if we got here, it is the first line and we didn't find
1334 * \n, so don't consume the peeked character
1335 */
1336 cstate->eol_type = EOL_CR;
1337 }
1338 }
1339 else if (cstate->eol_type == EOL_NL)
1340 ereport(ERROR,
1341 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1342 !cstate->opts.csv_mode ?
1343 errmsg("literal carriage return found in data") :
1344 errmsg("unquoted carriage return found in data"),
1345 !cstate->opts.csv_mode ?
1346 errhint("Use \"\\r\" to represent carriage return.") :
1347 errhint("Use quoted CSV field to represent carriage return.")));
1348 /* If reach here, we have found the line terminator */
1349 break;
1350 }
1351
1352 /* Process \n */
1353 if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1354 {
1355 if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1356 ereport(ERROR,
1357 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1358 !cstate->opts.csv_mode ?
1359 errmsg("literal newline found in data") :
1360 errmsg("unquoted newline found in data"),
1361 !cstate->opts.csv_mode ?
1362 errhint("Use \"\\n\" to represent newline.") :
1363 errhint("Use quoted CSV field to represent newline.")));
1364 cstate->eol_type = EOL_NL; /* in case not set yet */
1365 /* If reach here, we have found the line terminator */
1366 break;
1367 }
1368
1369 /*
1370 * Process backslash, except in CSV mode where backslash is a normal
1371 * character.
1372 */
1373 if (c == '\\' && !cstate->opts.csv_mode)
1374 {
1375 char c2;
1376
1379
1380 /* -----
1381 * get next character
1382 * Note: we do not change c so if it isn't \., we can fall
1383 * through and continue processing.
1384 * -----
1385 */
1386 c2 = copy_input_buf[input_buf_ptr];
1387
1388 if (c2 == '.')
1389 {
1390 input_buf_ptr++; /* consume the '.' */
1391 if (cstate->eol_type == EOL_CRNL)
1392 {
1393 /* Get the next character */
1395 /* if hit_eof, c2 will become '\0' */
1396 c2 = copy_input_buf[input_buf_ptr++];
1397
1398 if (c2 == '\n')
1399 ereport(ERROR,
1400 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1401 errmsg("end-of-copy marker does not match previous newline style")));
1402 else if (c2 != '\r')
1403 ereport(ERROR,
1404 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1405 errmsg("end-of-copy marker is not alone on its line")));
1406 }
1407
1408 /* Get the next character */
1410 /* if hit_eof, c2 will become '\0' */
1411 c2 = copy_input_buf[input_buf_ptr++];
1412
1413 if (c2 != '\r' && c2 != '\n')
1414 ereport(ERROR,
1415 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1416 errmsg("end-of-copy marker is not alone on its line")));
1417
1418 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1419 (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1420 (cstate->eol_type == EOL_CR && c2 != '\r'))
1421 ereport(ERROR,
1422 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1423 errmsg("end-of-copy marker does not match previous newline style")));
1424
1425 /*
1426 * If there is any data on this line before the \., complain.
1427 */
1428 if (cstate->line_buf.len > 0 ||
1429 prev_raw_ptr > cstate->input_buf_index)
1430 ereport(ERROR,
1431 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1432 errmsg("end-of-copy marker is not alone on its line")));
1433
1434 /*
1435 * Discard the \. and newline, then report EOF.
1436 */
1437 cstate->input_buf_index = input_buf_ptr;
1438 result = true; /* report EOF */
1439 break;
1440 }
1441 else
1442 {
1443 /*
1444 * If we are here, it means we found a backslash followed by
1445 * something other than a period. In non-CSV mode, anything
1446 * after a backslash is special, so we skip over that second
1447 * character too. If we didn't do that \\. would be
1448 * considered an eof-of copy, while in non-CSV mode it is a
1449 * literal backslash followed by a period.
1450 */
1451 input_buf_ptr++;
1452 }
1453 }
1454 } /* end of outer loop */
1455
1456 /*
1457 * Transfer any still-uncopied data to line_buf.
1458 */
1460
1461 return result;
1462}
#define REFILL_LINEBUF
static void CopyLoadInputBuf(CopyFromState cstate)
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:97
int errhint(const char *fmt,...)
Definition: elog.c:1317
bool csv_mode
Definition: copy.h:66

References CopyLoadInputBuf(), CopyFormatOptions::csv_mode, CopyFromStateData::cur_lineno, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, ereport, errcode(), errhint(), errmsg(), ERROR, CopyFormatOptions::escape, IF_NEED_REFILL_AND_EOF_BREAK, IF_NEED_REFILL_AND_NOT_EOF_CONTINUE, CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::input_reached_eof, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::opts, CopyFormatOptions::quote, and REFILL_LINEBUF.

Referenced by CopyReadLine().

◆ GetDecimalFromHex()

static int GetDecimalFromHex ( char  hex)
static

Definition at line 1468 of file copyfromparse.c.

1469{
1470 if (isdigit((unsigned char) hex))
1471 return hex - '0';
1472 else
1473 return tolower((unsigned char) hex) - 'a' + 10;
1474}

Referenced by CopyReadAttributesText().

◆ NextCopyFrom()

bool NextCopyFrom ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool *  nulls 
)

Definition at line 845 of file copyfromparse.c.

847{
848 TupleDesc tupDesc;
849 AttrNumber num_phys_attrs,
850 attr_count,
851 num_defaults = cstate->num_defaults;
852 FmgrInfo *in_functions = cstate->in_functions;
853 Oid *typioparams = cstate->typioparams;
854 int i;
855 int *defmap = cstate->defmap;
856 ExprState **defexprs = cstate->defexprs;
857
858 tupDesc = RelationGetDescr(cstate->rel);
859 num_phys_attrs = tupDesc->natts;
860 attr_count = list_length(cstate->attnumlist);
861
862 /* Initialize all values for row to NULL */
863 MemSet(values, 0, num_phys_attrs * sizeof(Datum));
864 MemSet(nulls, true, num_phys_attrs * sizeof(bool));
865 MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
866
867 if (!cstate->opts.binary)
868 {
869 char **field_strings;
870 ListCell *cur;
871 int fldct;
872 int fieldno;
873 char *string;
874
875 /* read raw fields in the next line */
876 if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
877 return false;
878
879 /* check for overflowing fields */
880 if (attr_count > 0 && fldct > attr_count)
882 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
883 errmsg("extra data after last expected column")));
884
885 fieldno = 0;
886
887 /* Loop to read the user attributes on the line. */
888 foreach(cur, cstate->attnumlist)
889 {
890 int attnum = lfirst_int(cur);
891 int m = attnum - 1;
892 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
893
894 if (fieldno >= fldct)
896 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
897 errmsg("missing data for column \"%s\"",
898 NameStr(att->attname))));
899 string = field_strings[fieldno++];
900
901 if (cstate->convert_select_flags &&
902 !cstate->convert_select_flags[m])
903 {
904 /* ignore input field, leaving column as NULL */
905 continue;
906 }
907
908 if (cstate->opts.csv_mode)
909 {
910 if (string == NULL &&
911 cstate->opts.force_notnull_flags[m])
912 {
913 /*
914 * FORCE_NOT_NULL option is set and column is NULL -
915 * convert it to the NULL string.
916 */
917 string = cstate->opts.null_print;
918 }
919 else if (string != NULL && cstate->opts.force_null_flags[m]
920 && strcmp(string, cstate->opts.null_print) == 0)
921 {
922 /*
923 * FORCE_NULL option is set and column matches the NULL
924 * string. It must have been quoted, or otherwise the
925 * string would already have been set to NULL. Convert it
926 * to NULL as specified.
927 */
928 string = NULL;
929 }
930 }
931
932 cstate->cur_attname = NameStr(att->attname);
933 cstate->cur_attval = string;
934
935 if (string != NULL)
936 nulls[m] = false;
937
938 if (cstate->defaults[m])
939 {
940 /*
941 * The caller must supply econtext and have switched into the
942 * per-tuple memory context in it.
943 */
944 Assert(econtext != NULL);
946
947 values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
948 }
949
950 /*
951 * If ON_ERROR is specified with IGNORE, skip rows with soft
952 * errors
953 */
954 else if (!InputFunctionCallSafe(&in_functions[m],
955 string,
956 typioparams[m],
957 att->atttypmod,
958 (Node *) cstate->escontext,
959 &values[m]))
960 {
962
963 cstate->num_errors++;
964
966 {
967 /*
968 * Since we emit line number and column info in the below
969 * notice message, we suppress error context information
970 * other than the relation name.
971 */
972 Assert(!cstate->relname_only);
973 cstate->relname_only = true;
974
975 if (cstate->cur_attval)
976 {
977 char *attval;
978
979 attval = CopyLimitPrintoutLength(cstate->cur_attval);
981 errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": \"%s\"",
982 (unsigned long long) cstate->cur_lineno,
983 cstate->cur_attname,
984 attval));
985 pfree(attval);
986 }
987 else
989 errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": null input",
990 (unsigned long long) cstate->cur_lineno,
991 cstate->cur_attname));
992
993 /* reset relname_only */
994 cstate->relname_only = false;
995 }
996
997 return true;
998 }
999
1000 cstate->cur_attname = NULL;
1001 cstate->cur_attval = NULL;
1002 }
1003
1004 Assert(fieldno == attr_count);
1005 }
1006 else
1007 {
1008 /* binary */
1009 int16 fld_count;
1010 ListCell *cur;
1011
1012 cstate->cur_lineno++;
1013
1014 if (!CopyGetInt16(cstate, &fld_count))
1015 {
1016 /* EOF detected (end of file, or protocol-level EOF) */
1017 return false;
1018 }
1019
1020 if (fld_count == -1)
1021 {
1022 /*
1023 * Received EOF marker. Wait for the protocol-level EOF, and
1024 * complain if it doesn't come immediately. In COPY FROM STDIN,
1025 * this ensures that we correctly handle CopyFail, if client
1026 * chooses to send that now. When copying from file, we could
1027 * ignore the rest of the file like in text mode, but we choose to
1028 * be consistent with the COPY FROM STDIN case.
1029 */
1030 char dummy;
1031
1032 if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1033 ereport(ERROR,
1034 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1035 errmsg("received copy data after EOF marker")));
1036 return false;
1037 }
1038
1039 if (fld_count != attr_count)
1040 ereport(ERROR,
1041 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1042 errmsg("row field count is %d, expected %d",
1043 (int) fld_count, attr_count)));
1044
1045 foreach(cur, cstate->attnumlist)
1046 {
1047 int attnum = lfirst_int(cur);
1048 int m = attnum - 1;
1049 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1050
1051 cstate->cur_attname = NameStr(att->attname);
1052 values[m] = CopyReadBinaryAttribute(cstate,
1053 &in_functions[m],
1054 typioparams[m],
1055 att->atttypmod,
1056 &nulls[m]);
1057 cstate->cur_attname = NULL;
1058 }
1059 }
1060
1061 /*
1062 * Now compute and insert any defaults available for the columns not
1063 * provided by the input data. Anything not processed here or above will
1064 * remain NULL.
1065 */
1066 for (i = 0; i < num_defaults; i++)
1067 {
1068 /*
1069 * The caller must supply econtext and have switched into the
1070 * per-tuple memory context in it.
1071 */
1072 Assert(econtext != NULL);
1074
1075 values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
1076 &nulls[defmap[i]]);
1077 }
1078
1079 return true;
1080}
int16 AttrNumber
Definition: attnum.h:21
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define MemSet(start, val, len)
Definition: c.h:974
char * CopyLimitPrintoutLength(const char *str)
Definition: copyfrom.c:194
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
struct cursor * cur
Definition: ecpg.c:29
#define NOTICE
Definition: elog.h:35
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:346
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, fmNodePtr escontext, Datum *result)
Definition: fmgr.c:1585
@ COPY_ON_ERROR_STOP
Definition: copy.h:39
@ COPY_LOG_VERBOSITY_VERBOSE
Definition: copy.h:51
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
int16 attnum
Definition: pg_attribute.h:74
#define lfirst_int(lc)
Definition: pg_list.h:173
unsigned int Oid
Definition: postgres_ext.h:31
char string[11]
Definition: preproc-type.c:52
bool binary
Definition: copy.h:64
CopyLogVerbosityChoice log_verbosity
Definition: copy.h:87
CopyOnErrorChoice on_error
Definition: copy.h:86
bool * force_notnull_flags
Definition: copy.h:81
bool * force_null_flags
Definition: copy.h:84
const char * cur_attval
const char * cur_attname
ErrorSaveContext * escontext
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:266
Definition: fmgr.h:57
Definition: nodes.h:129

References Assert, attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, CopyFromStateData::convert_select_flags, COPY_LOG_VERBOSITY_VERBOSE, COPY_ON_ERROR_STOP, CopyGetInt16(), CopyLimitPrintoutLength(), CopyReadBinaryAttribute(), CopyReadBinaryData(), CopyFormatOptions::csv_mode, cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_attval, CopyFromStateData::cur_lineno, CurrentMemoryContext, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFromStateData::defmap, ExprContext::ecxt_per_tuple_memory, ereport, errcode(), errmsg(), ERROR, CopyFromStateData::escontext, ExecEvalExpr(), CopyFormatOptions::force_notnull_flags, CopyFormatOptions::force_null_flags, i, CopyFromStateData::in_functions, InputFunctionCallSafe(), lfirst_int, list_length(), CopyFormatOptions::log_verbosity, MemSet, NameStr, TupleDescData::natts, NextCopyFromRawFields(), NOTICE, CopyFormatOptions::null_print, CopyFromStateData::num_defaults, CopyFromStateData::num_errors, CopyFormatOptions::on_error, CopyFromStateData::opts, pfree(), CopyFromStateData::rel, RelationGetDescr, CopyFromStateData::relname_only, TupleDescAttr(), CopyFromStateData::typioparams, and values.

Referenced by CopyFrom(), file_acquire_sample_rows(), and fileIterateForeignScan().

◆ NextCopyFromRawFields()

bool NextCopyFromRawFields ( CopyFromState  cstate,
char ***  fields,
int *  nfields 
)

Definition at line 745 of file copyfromparse.c.

746{
747 int fldct;
748 bool done;
749
750 /* only available for text or csv input */
751 Assert(!cstate->opts.binary);
752
753 /* on input check that the header line is correct if needed */
754 if (cstate->cur_lineno == 0 && cstate->opts.header_line)
755 {
756 ListCell *cur;
757 TupleDesc tupDesc;
758
759 tupDesc = RelationGetDescr(cstate->rel);
760
761 cstate->cur_lineno++;
762 done = CopyReadLine(cstate);
763
764 if (cstate->opts.header_line == COPY_HEADER_MATCH)
765 {
766 int fldnum;
767
768 if (cstate->opts.csv_mode)
769 fldct = CopyReadAttributesCSV(cstate);
770 else
771 fldct = CopyReadAttributesText(cstate);
772
773 if (fldct != list_length(cstate->attnumlist))
775 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
776 errmsg("wrong number of fields in header line: got %d, expected %d",
777 fldct, list_length(cstate->attnumlist))));
778
779 fldnum = 0;
780 foreach(cur, cstate->attnumlist)
781 {
782 int attnum = lfirst_int(cur);
783 char *colName;
784 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
785
786 Assert(fldnum < cstate->max_fields);
787
788 colName = cstate->raw_fields[fldnum++];
789 if (colName == NULL)
791 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
792 errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
793 fldnum, cstate->opts.null_print, NameStr(attr->attname))));
794
795 if (namestrcmp(&attr->attname, colName) != 0)
796 {
798 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
799 errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
800 fldnum, colName, NameStr(attr->attname))));
801 }
802 }
803 }
804
805 if (done)
806 return false;
807 }
808
809 cstate->cur_lineno++;
810
811 /* Actually read the line into memory here */
812 done = CopyReadLine(cstate);
813
814 /*
815 * EOF at start of line means we're done. If we see EOF after some
816 * characters, we act as though it was newline followed by EOF, ie,
817 * process the line and then exit loop on next iteration.
818 */
819 if (done && cstate->line_buf.len == 0)
820 return false;
821
822 /* Parse the line into de-escaped field values */
823 if (cstate->opts.csv_mode)
824 fldct = CopyReadAttributesCSV(cstate);
825 else
826 fldct = CopyReadAttributesText(cstate);
827
828 *fields = cstate->raw_fields;
829 *nfields = fldct;
830 return true;
831}
static int CopyReadAttributesCSV(CopyFromState cstate)
static int CopyReadAttributesText(CopyFromState cstate)
static bool CopyReadLine(CopyFromState cstate)
@ COPY_HEADER_MATCH
Definition: copy.h:30
int namestrcmp(Name name, const char *str)
Definition: name.c:247
CopyHeaderChoice header_line
Definition: copy.h:67

References Assert, attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, COPY_HEADER_MATCH, CopyReadAttributesCSV(), CopyReadAttributesText(), CopyReadLine(), CopyFormatOptions::csv_mode, cur, CopyFromStateData::cur_lineno, ereport, errcode(), errmsg(), ERROR, CopyFormatOptions::header_line, StringInfoData::len, lfirst_int, CopyFromStateData::line_buf, list_length(), NameStr, namestrcmp(), CopyFormatOptions::null_print, CopyFromStateData::opts, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, and TupleDescAttr().

Referenced by NextCopyFrom().

◆ ReceiveCopyBegin()

void ReceiveCopyBegin ( CopyFromState  cstate)

Definition at line 161 of file copyfromparse.c.

162{
164 int natts = list_length(cstate->attnumlist);
165 int16 format = (cstate->opts.binary ? 1 : 0);
166 int i;
167
169 pq_sendbyte(&buf, format); /* overall format */
170 pq_sendint16(&buf, natts);
171 for (i = 0; i < natts; i++)
172 pq_sendint16(&buf, format); /* per-column formats */
174 cstate->copy_src = COPY_FRONTEND;
175 cstate->fe_msgbuf = makeStringInfo();
176 /* We *must* flush here to ensure FE knows it can send. */
177 pq_flush();
178}
#define pq_flush()
Definition: libpq.h:46
static char format
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:296
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:88
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:160
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:136
#define PqMsg_CopyInResponse
Definition: protocol.h:45
StringInfo makeStringInfo(void)
Definition: stringinfo.c:38

References CopyFromStateData::attnumlist, CopyFormatOptions::binary, buf, COPY_FRONTEND, CopyFromStateData::copy_src, CopyFromStateData::fe_msgbuf, format, i, list_length(), makeStringInfo(), CopyFromStateData::opts, pq_beginmessage(), pq_endmessage(), pq_flush, pq_sendbyte(), pq_sendint16(), and PqMsg_CopyInResponse.

Referenced by BeginCopyFrom().

◆ ReceiveCopyBinaryHeader()

void ReceiveCopyBinaryHeader ( CopyFromState  cstate)

Definition at line 181 of file copyfromparse.c.

182{
183 char readSig[11];
184 int32 tmp;
185
186 /* Signature */
187 if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
188 memcmp(readSig, BinarySignature, 11) != 0)
190 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
191 errmsg("COPY file signature not recognized")));
192 /* Flags field */
193 if (!CopyGetInt32(cstate, &tmp))
195 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
196 errmsg("invalid COPY file header (missing flags)")));
197 if ((tmp & (1 << 16)) != 0)
199 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
200 errmsg("invalid COPY file header (WITH OIDS)")));
201 tmp &= ~(1 << 16);
202 if ((tmp >> 16) != 0)
204 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
205 errmsg("unrecognized critical flags in COPY file header")));
206 /* Header extension length */
207 if (!CopyGetInt32(cstate, &tmp) ||
208 tmp < 0)
210 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
211 errmsg("invalid COPY file header (missing length)")));
212 /* Skip extension header, if present */
213 while (tmp-- > 0)
214 {
215 if (CopyReadBinaryData(cstate, readSig, 1) != 1)
217 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
218 errmsg("invalid COPY file header (wrong length)")));
219 }
220}
static const char BinarySignature[11]

References BinarySignature, CopyGetInt32(), CopyReadBinaryData(), ereport, errcode(), errmsg(), and ERROR.

Referenced by BeginCopyFrom().

Variable Documentation

◆ BinarySignature

const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"
static

Definition at line 139 of file copyfromparse.c.

Referenced by ReceiveCopyBinaryHeader().