PostgreSQL Source Code  git master
copyfromparse.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include "commands/copy.h"
#include "commands/copyfrom_internal.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Include dependency graph for copyfromparse.c:

Go to the source code of this file.

Macros

#define ISOCTAL(c)   (((c) >= '0') && ((c) <= '7'))
 
#define OCTVALUE(c)   ((c) - '0')
 
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
 
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
 
#define REFILL_LINEBUF
 
#define NO_END_OF_COPY_GOTO
 

Functions

static bool CopyReadLine (CopyFromState cstate)
 
static bool CopyReadLineText (CopyFromState cstate)
 
static int CopyReadAttributesText (CopyFromState cstate)
 
static int CopyReadAttributesCSV (CopyFromState cstate)
 
static Datum CopyReadBinaryAttribute (CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
 
static int CopyGetData (CopyFromState cstate, void *databuf, int minread, int maxread)
 
static bool CopyGetInt32 (CopyFromState cstate, int32 *val)
 
static bool CopyGetInt16 (CopyFromState cstate, int16 *val)
 
static void CopyLoadInputBuf (CopyFromState cstate)
 
static int CopyReadBinaryData (CopyFromState cstate, char *dest, int nbytes)
 
void ReceiveCopyBegin (CopyFromState cstate)
 
void ReceiveCopyBinaryHeader (CopyFromState cstate)
 
static void CopyConvertBuf (CopyFromState cstate)
 
static void CopyConversionError (CopyFromState cstate)
 
static void CopyLoadRawBuf (CopyFromState cstate)
 
bool NextCopyFromRawFields (CopyFromState cstate, char ***fields, int *nfields)
 
bool NextCopyFrom (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
static int GetDecimalFromHex (char hex)
 

Variables

static const char BinarySignature [11] = "PGCOPY\n\377\r\n\0"
 

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK (   extralen)
Value:
if (1) \
{ \
if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
{ \
if (extralen) \
input_buf_ptr = copy_buf_len; /* consume the partial character */ \
/* backslash just before EOF, treat as data char */ \
result = true; \
break; \
} \
} else ((void) 0)

Definition at line 109 of file copyfromparse.c.

Referenced by CopyReadLineText().

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE (   extralen)
Value:
if (1) \
{ \
if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
{ \
input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
need_data = true; \
continue; \
} \
} else ((void) 0)

Definition at line 97 of file copyfromparse.c.

Referenced by CopyReadLineText().

◆ ISOCTAL

#define ISOCTAL (   c)    (((c) >= '0') && ((c) <= '7'))

Definition at line 78 of file copyfromparse.c.

Referenced by CopyReadAttributesText().

◆ NO_END_OF_COPY_GOTO

#define NO_END_OF_COPY_GOTO
Value:
if (1) \
{ \
input_buf_ptr = prev_raw_ptr + 1; \
goto not_end_of_copy; \
} else ((void) 0)

Definition at line 139 of file copyfromparse.c.

Referenced by CopyReadLineText().

◆ OCTVALUE

#define OCTVALUE (   c)    ((c) - '0')

Definition at line 79 of file copyfromparse.c.

Referenced by CopyReadAttributesText().

◆ REFILL_LINEBUF

#define REFILL_LINEBUF
Value:
if (1) \
{ \
if (input_buf_ptr > cstate->input_buf_index) \
{ \
appendBinaryStringInfo(&cstate->line_buf, \
cstate->input_buf + cstate->input_buf_index, \
input_buf_ptr - cstate->input_buf_index); \
cstate->input_buf_index = input_buf_ptr; \
} \
} else ((void) 0)

Definition at line 126 of file copyfromparse.c.

Referenced by CopyReadLineText().

Function Documentation

◆ CopyConversionError()

static void CopyConversionError ( CopyFromState  cstate)
static

Definition at line 532 of file copyfromparse.c.

References Assert, CopyFromStateData::conversion_proc, elog, ERROR, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and report_invalid_encoding().

Referenced by CopyLoadInputBuf().

533 {
534  Assert(cstate->raw_buf_len > 0);
535  Assert(cstate->input_reached_error);
536 
537  if (!cstate->need_transcoding)
538  {
539  /*
540  * Everything up to input_buf_len was successfully verified, and
541  * input_buf_len points to the invalid or incomplete character.
542  */
544  cstate->raw_buf + cstate->input_buf_len,
545  cstate->raw_buf_len - cstate->input_buf_len);
546  }
547  else
548  {
549  /*
550  * raw_buf_index points to the invalid or untranslatable character. We
551  * let the conversion routine report the error, because it can provide
552  * a more specific error message than we could here. An earlier call
553  * to the conversion routine in CopyConvertBuf() detected that there
554  * is an error, now we call the conversion routine again with
555  * noError=false, to have it throw the error.
556  */
557  unsigned char *src;
558  int srclen;
559  unsigned char *dst;
560  int dstlen;
561 
562  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
563  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
564  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
565  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
566 
568  cstate->file_encoding,
570  src, srclen,
571  dst, dstlen,
572  false);
573 
574  /*
575  * The conversion routine should have reported an error, so this
576  * should not be reached.
577  */
578  elog(ERROR, "encoding conversion failed without error");
579  }
580 }
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1647
#define ERROR
Definition: elog.h:46
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
#define Assert(condition)
Definition: c.h:804
#define elog(elevel,...)
Definition: elog.h:232
#define INPUT_BUF_SIZE
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:469

◆ CopyConvertBuf()

static void CopyConvertBuf ( CopyFromState  cstate)
static

Definition at line 399 of file copyfromparse.c.

References CopyFromStateData::conversion_proc, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, MAX_CONVERSION_INPUT_LENGTH, CopyFromStateData::need_transcoding, pg_database_encoding_max_length(), pg_do_encoding_conversion_buf(), pg_encoding_verifymbstr(), CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf().

400 {
401  /*
402  * If the file and server encoding are the same, no encoding conversion is
403  * required. However, we still need to verify that the input is valid for
404  * the encoding.
405  */
406  if (!cstate->need_transcoding)
407  {
408  /*
409  * When conversion is not required, input_buf and raw_buf are the
410  * same. raw_buf_len is the total number of bytes in the buffer, and
411  * input_buf_len tracks how many of those bytes have already been
412  * verified.
413  */
414  int preverifiedlen = cstate->input_buf_len;
415  int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
416  int nverified;
417 
418  if (unverifiedlen == 0)
419  {
420  /*
421  * If no more raw data is coming, report the EOF to the caller.
422  */
423  if (cstate->raw_reached_eof)
424  cstate->input_reached_eof = true;
425  return;
426  }
427 
428  /*
429  * Verify the new data, including any residual unverified bytes from
430  * previous round.
431  */
432  nverified = pg_encoding_verifymbstr(cstate->file_encoding,
433  cstate->raw_buf + preverifiedlen,
434  unverifiedlen);
435  if (nverified == 0)
436  {
437  /*
438  * Could not verify anything.
439  *
440  * If there is no more raw input data coming, it means that there
441  * was an incomplete multi-byte sequence at the end. Also, if
442  * there's "enough" input left, we should be able to verify at
443  * least one character, and a failure to do so means that we've
444  * hit an invalid byte sequence.
445  */
446  if (cstate->raw_reached_eof || unverifiedlen >= pg_database_encoding_max_length())
447  cstate->input_reached_error = true;
448  return;
449  }
450  cstate->input_buf_len += nverified;
451  }
452  else
453  {
454  /*
455  * Encoding conversion is needed.
456  */
457  int nbytes;
458  unsigned char *src;
459  int srclen;
460  unsigned char *dst;
461  int dstlen;
462  int convertedlen;
463 
464  if (RAW_BUF_BYTES(cstate) == 0)
465  {
466  /*
467  * If no more raw data is coming, report the EOF to the caller.
468  */
469  if (cstate->raw_reached_eof)
470  cstate->input_reached_eof = true;
471  return;
472  }
473 
474  /*
475  * First, copy down any unprocessed data.
476  */
477  nbytes = INPUT_BUF_BYTES(cstate);
478  if (nbytes > 0 && cstate->input_buf_index > 0)
479  memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
480  nbytes);
481  cstate->input_buf_index = 0;
482  cstate->input_buf_len = nbytes;
483  cstate->input_buf[nbytes] = '\0';
484 
485  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
486  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
487  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
488  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
489 
490  /*
491  * Do the conversion. This might stop short, if there is an invalid
492  * byte sequence in the input. We'll convert as much as we can in
493  * that case.
494  *
495  * Note: Even if we hit an invalid byte sequence, we don't report the
496  * error until all the valid bytes have been consumed. The input
497  * might contain an end-of-input marker (\.), and we don't want to
498  * report an error if the invalid byte sequence is after the
499  * end-of-input marker. We might unnecessarily convert some data
500  * after the end-of-input marker as long as it's valid for the
501  * encoding, but that's harmless.
502  */
503  convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
504  cstate->file_encoding,
506  src, srclen,
507  dst, dstlen,
508  true);
509  if (convertedlen == 0)
510  {
511  /*
512  * Could not convert anything. If there is no more raw input data
513  * coming, it means that there was an incomplete multi-byte
514  * sequence at the end. Also, if there is plenty of input left,
515  * we should be able to convert at least one character, so a
516  * failure to do so must mean that we've hit a byte sequence
517  * that's invalid.
518  */
519  if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
520  cstate->input_reached_error = true;
521  return;
522  }
523  cstate->raw_buf_index += convertedlen;
524  cstate->input_buf_len += strlen((char *) dst);
525  }
526 }
#define RAW_BUF_BYTES(cstate)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition: wchar.c:1953
#define INPUT_BUF_BYTES(cstate)
#define MAX_CONVERSION_INPUT_LENGTH
Definition: pg_wchar.h:334
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
#define INPUT_BUF_SIZE
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:469

◆ CopyGetData()

static int CopyGetData ( CopyFromState  cstate,
void *  databuf,
int  minread,
int  maxread 
)
static

Definition at line 244 of file copyfromparse.c.

References Assert, COPY_CALLBACK, COPY_FILE, CopyFromStateData::copy_file, COPY_FRONTEND, CopyFromStateData::copy_src, StringInfoData::cursor, CopyFromStateData::data_source_cb, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, CopyFromStateData::fe_msgbuf, HOLD_CANCEL_INTERRUPTS, StringInfoData::len, pq_copymsgbytes(), pq_getbyte(), pq_getmessage(), pq_getmsgstring(), PQ_LARGE_MESSAGE_LIMIT, PQ_SMALL_MESSAGE_LIMIT, pq_startmsgread(), CopyFromStateData::raw_reached_eof, and RESUME_CANCEL_INTERRUPTS.

Referenced by CopyLoadRawBuf(), and CopyReadLine().

245 {
246  int bytesread = 0;
247 
248  switch (cstate->copy_src)
249  {
250  case COPY_FILE:
251  bytesread = fread(databuf, 1, maxread, cstate->copy_file);
252  if (ferror(cstate->copy_file))
253  ereport(ERROR,
255  errmsg("could not read from COPY file: %m")));
256  if (bytesread == 0)
257  cstate->raw_reached_eof = true;
258  break;
259  case COPY_FRONTEND:
260  while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
261  {
262  int avail;
263 
264  while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
265  {
266  /* Try to receive another message */
267  int mtype;
268  int maxmsglen;
269 
270  readmessage:
272  pq_startmsgread();
273  mtype = pq_getbyte();
274  if (mtype == EOF)
275  ereport(ERROR,
276  (errcode(ERRCODE_CONNECTION_FAILURE),
277  errmsg("unexpected EOF on client connection with an open transaction")));
278  /* Validate message type and set packet size limit */
279  switch (mtype)
280  {
281  case 'd': /* CopyData */
282  maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
283  break;
284  case 'c': /* CopyDone */
285  case 'f': /* CopyFail */
286  case 'H': /* Flush */
287  case 'S': /* Sync */
288  maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
289  break;
290  default:
291  ereport(ERROR,
292  (errcode(ERRCODE_PROTOCOL_VIOLATION),
293  errmsg("unexpected message type 0x%02X during COPY from stdin",
294  mtype)));
295  maxmsglen = 0; /* keep compiler quiet */
296  break;
297  }
298  /* Now collect the message body */
299  if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
300  ereport(ERROR,
301  (errcode(ERRCODE_CONNECTION_FAILURE),
302  errmsg("unexpected EOF on client connection with an open transaction")));
304  /* ... and process it */
305  switch (mtype)
306  {
307  case 'd': /* CopyData */
308  break;
309  case 'c': /* CopyDone */
310  /* COPY IN correctly terminated by frontend */
311  cstate->raw_reached_eof = true;
312  return bytesread;
313  case 'f': /* CopyFail */
314  ereport(ERROR,
315  (errcode(ERRCODE_QUERY_CANCELED),
316  errmsg("COPY from stdin failed: %s",
317  pq_getmsgstring(cstate->fe_msgbuf))));
318  break;
319  case 'H': /* Flush */
320  case 'S': /* Sync */
321 
322  /*
323  * Ignore Flush/Sync for the convenience of client
324  * libraries (such as libpq) that may send those
325  * without noticing that the command they just
326  * sent was COPY.
327  */
328  goto readmessage;
329  default:
330  Assert(false); /* NOT REACHED */
331  }
332  }
333  avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
334  if (avail > maxread)
335  avail = maxread;
336  pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
337  databuf = (void *) ((char *) databuf + avail);
338  maxread -= avail;
339  bytesread += avail;
340  }
341  break;
342  case COPY_CALLBACK:
343  bytesread = cstate->data_source_cb(databuf, minread, maxread);
344  break;
345  }
346 
347  return bytesread;
348 }
copy_data_source_cb data_source_cb
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:139
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:581
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30
int errcode(int sqlerrcode)
Definition: elog.c:698
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31
#define ERROR
Definition: elog.h:46
void pq_startmsgread(void)
Definition: pqcomm.c:1152
int errcode_for_file_access(void)
Definition: elog.c:721
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1214
int pq_getbyte(void)
Definition: pqcomm.c:994
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:530
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141

◆ CopyGetInt16()

static bool CopyGetInt16 ( CopyFromState  cstate,
int16 val 
)
inlinestatic

Definition at line 378 of file copyfromparse.c.

References buf, CopyReadBinaryData(), and pg_ntoh16.

Referenced by NextCopyFrom().

379 {
380  uint16 buf;
381 
382  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
383  {
384  *val = 0; /* suppress compiler warning */
385  return false;
386  }
387  *val = (int16) pg_ntoh16(buf);
388  return true;
389 }
signed short int16
Definition: c.h:428
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
unsigned short uint16
Definition: c.h:440
static char * buf
Definition: pg_test_fsync.c:68
long val
Definition: informix.c:664

◆ CopyGetInt32()

static bool CopyGetInt32 ( CopyFromState  cstate,
int32 val 
)
inlinestatic

Definition at line 361 of file copyfromparse.c.

References buf, CopyReadBinaryData(), and pg_ntoh32.

Referenced by CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

362 {
363  uint32 buf;
364 
365  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
366  {
367  *val = 0; /* suppress compiler warning */
368  return false;
369  }
370  *val = (int32) pg_ntoh32(buf);
371  return true;
372 }
signed int int32
Definition: c.h:429
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
static char * buf
Definition: pg_test_fsync.c:68
unsigned int uint32
Definition: c.h:441
long val
Definition: informix.c:664

◆ CopyLoadInputBuf()

static void CopyLoadInputBuf ( CopyFromState  cstate)
static

Definition at line 649 of file copyfromparse.c.

References Assert, CopyConversionError(), CopyConvertBuf(), CopyLoadRawBuf(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyReadLineText().

650 {
651  int nbytes = INPUT_BUF_BYTES(cstate);
652 
653  /*
654  * The caller has updated input_buf_index to indicate how much of the
655  * input has been consumed and isn't needed anymore. If input_buf is the
656  * same physical area as raw_buf, update raw_buf_index accordingly.
657  */
658  if (cstate->raw_buf == cstate->input_buf)
659  {
660  Assert(!cstate->need_transcoding);
661  Assert(cstate->input_buf_index >= cstate->raw_buf_index);
662  cstate->raw_buf_index = cstate->input_buf_index;
663  }
664 
665  for (;;)
666  {
667  /* If we now have some unconverted data, try to convert it */
668  CopyConvertBuf(cstate);
669 
670  /* If we now have some more input bytes ready, return them */
671  if (INPUT_BUF_BYTES(cstate) > nbytes)
672  return;
673 
674  /*
675  * If we reached an invalid byte sequence, or we're at an incomplete
676  * multi-byte character but there is no more raw input data, report
677  * conversion error.
678  */
679  if (cstate->input_reached_error)
680  CopyConversionError(cstate);
681 
682  /* no more input, and everything has been converted */
683  if (cstate->input_reached_eof)
684  break;
685 
686  /* Try to load more raw data */
687  Assert(!cstate->raw_reached_eof);
688  CopyLoadRawBuf(cstate);
689  }
690 }
#define INPUT_BUF_BYTES(cstate)
static void CopyLoadRawBuf(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)
static void CopyConversionError(CopyFromState cstate)
#define Assert(condition)
Definition: c.h:804

◆ CopyLoadRawBuf()

static void CopyLoadRawBuf ( CopyFromState  cstate)
static

Definition at line 589 of file copyfromparse.c.

References Assert, CopyFromStateData::bytes_processed, CopyGetData(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::need_transcoding, pgstat_progress_update_param(), PROGRESS_COPY_BYTES_PROCESSED, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, RAW_BUF_SIZE, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf(), and CopyReadBinaryData().

590 {
591  int nbytes;
592  int inbytes;
593 
594  /*
595  * In text mode, if encoding conversion is not required, raw_buf and
596  * input_buf point to the same buffer. Their len/index better agree, too.
597  */
598  if (cstate->raw_buf == cstate->input_buf)
599  {
600  Assert(!cstate->need_transcoding);
601  Assert(cstate->raw_buf_index == cstate->input_buf_index);
602  Assert(cstate->input_buf_len <= cstate->raw_buf_len);
603  }
604 
605  /*
606  * Copy down the unprocessed data if any.
607  */
608  nbytes = RAW_BUF_BYTES(cstate);
609  if (nbytes > 0 && cstate->raw_buf_index > 0)
610  memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
611  nbytes);
612  cstate->raw_buf_len -= cstate->raw_buf_index;
613  cstate->raw_buf_index = 0;
614 
615  /*
616  * If raw_buf and input_buf are in fact the same buffer, adjust the
617  * input_buf variables, too.
618  */
619  if (cstate->raw_buf == cstate->input_buf)
620  {
621  cstate->input_buf_len -= cstate->input_buf_index;
622  cstate->input_buf_index = 0;
623  }
624 
625  /* Load more data */
626  inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
627  1, RAW_BUF_SIZE - cstate->raw_buf_len);
628  nbytes += inbytes;
629  cstate->raw_buf[nbytes] = '\0';
630  cstate->raw_buf_len = nbytes;
631 
632  cstate->bytes_processed += inbytes;
634 
635  if (inbytes == 0)
636  cstate->raw_reached_eof = true;
637 }
#define RAW_BUF_BYTES(cstate)
#define RAW_BUF_SIZE
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
#define Assert(condition)
Definition: c.h:804
void pgstat_progress_update_param(int index, int64 val)
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:137

◆ CopyReadAttributesCSV()

static int CopyReadAttributesCSV ( CopyFromState  cstate)
static

Definition at line 1662 of file copyfromparse.c.

References Assert, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, CopyFormatOptions::escape, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::max_fields, StringInfoData::maxlen, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, CopyFromStateData::opts, CopyFormatOptions::quote, CopyFromStateData::raw_fields, repalloc(), and resetStringInfo().

Referenced by NextCopyFromRawFields().

1663 {
1664  char delimc = cstate->opts.delim[0];
1665  char quotec = cstate->opts.quote[0];
1666  char escapec = cstate->opts.escape[0];
1667  int fieldno;
1668  char *output_ptr;
1669  char *cur_ptr;
1670  char *line_end_ptr;
1671 
1672  /*
1673  * We need a special case for zero-column tables: check that the input
1674  * line is empty, and return.
1675  */
1676  if (cstate->max_fields <= 0)
1677  {
1678  if (cstate->line_buf.len != 0)
1679  ereport(ERROR,
1680  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1681  errmsg("extra data after last expected column")));
1682  return 0;
1683  }
1684 
1685  resetStringInfo(&cstate->attribute_buf);
1686 
1687  /*
1688  * The de-escaped attributes will certainly not be longer than the input
1689  * data line, so we can just force attribute_buf to be large enough and
1690  * then transfer data without any checks for enough space. We need to do
1691  * it this way because enlarging attribute_buf mid-stream would invalidate
1692  * pointers already stored into cstate->raw_fields[].
1693  */
1694  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1695  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1696  output_ptr = cstate->attribute_buf.data;
1697 
1698  /* set pointer variables for loop */
1699  cur_ptr = cstate->line_buf.data;
1700  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1701 
1702  /* Outer loop iterates over fields */
1703  fieldno = 0;
1704  for (;;)
1705  {
1706  bool found_delim = false;
1707  bool saw_quote = false;
1708  char *start_ptr;
1709  char *end_ptr;
1710  int input_len;
1711 
1712  /* Make sure there is enough space for the next value */
1713  if (fieldno >= cstate->max_fields)
1714  {
1715  cstate->max_fields *= 2;
1716  cstate->raw_fields =
1717  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1718  }
1719 
1720  /* Remember start of field on both input and output sides */
1721  start_ptr = cur_ptr;
1722  cstate->raw_fields[fieldno] = output_ptr;
1723 
1724  /*
1725  * Scan data for field,
1726  *
1727  * The loop starts in "not quote" mode and then toggles between that
1728  * and "in quote" mode. The loop exits normally if it is in "not
1729  * quote" mode and a delimiter or line end is seen.
1730  */
1731  for (;;)
1732  {
1733  char c;
1734 
1735  /* Not in quote */
1736  for (;;)
1737  {
1738  end_ptr = cur_ptr;
1739  if (cur_ptr >= line_end_ptr)
1740  goto endfield;
1741  c = *cur_ptr++;
1742  /* unquoted field delimiter */
1743  if (c == delimc)
1744  {
1745  found_delim = true;
1746  goto endfield;
1747  }
1748  /* start of quoted field (or part of field) */
1749  if (c == quotec)
1750  {
1751  saw_quote = true;
1752  break;
1753  }
1754  /* Add c to output string */
1755  *output_ptr++ = c;
1756  }
1757 
1758  /* In quote */
1759  for (;;)
1760  {
1761  end_ptr = cur_ptr;
1762  if (cur_ptr >= line_end_ptr)
1763  ereport(ERROR,
1764  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1765  errmsg("unterminated CSV quoted field")));
1766 
1767  c = *cur_ptr++;
1768 
1769  /* escape within a quoted field */
1770  if (c == escapec)
1771  {
1772  /*
1773  * peek at the next char if available, and escape it if it
1774  * is an escape char or a quote char
1775  */
1776  if (cur_ptr < line_end_ptr)
1777  {
1778  char nextc = *cur_ptr;
1779 
1780  if (nextc == escapec || nextc == quotec)
1781  {
1782  *output_ptr++ = nextc;
1783  cur_ptr++;
1784  continue;
1785  }
1786  }
1787  }
1788 
1789  /*
1790  * end of quoted field. Must do this test after testing for
1791  * escape in case quote char and escape char are the same
1792  * (which is the common case).
1793  */
1794  if (c == quotec)
1795  break;
1796 
1797  /* Add c to output string */
1798  *output_ptr++ = c;
1799  }
1800  }
1801 endfield:
1802 
1803  /* Terminate attribute value in output area */
1804  *output_ptr++ = '\0';
1805 
1806  /* Check whether raw input matched null marker */
1807  input_len = end_ptr - start_ptr;
1808  if (!saw_quote && input_len == cstate->opts.null_print_len &&
1809  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1810  cstate->raw_fields[fieldno] = NULL;
1811 
1812  fieldno++;
1813  /* Done if we hit EOL instead of a delim */
1814  if (!found_delim)
1815  break;
1816  }
1817 
1818  /* Clean up state of attribute_buf */
1819  output_ptr--;
1820  Assert(*output_ptr == '\0');
1821  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1822 
1823  return fieldno;
1824 }
int null_print_len
Definition: copy.h:37
StringInfoData attribute_buf
StringInfoData line_buf
int errcode(int sqlerrcode)
Definition: elog.c:698
char * null_print
Definition: copy.h:36
char * quote
Definition: copy.h:40
#define ERROR
Definition: elog.h:46
char * c
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
char * delim
Definition: copy.h:39
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
CopyFormatOptions opts
char * escape
Definition: copy.h:41
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ CopyReadAttributesText()

static int CopyReadAttributesText ( CopyFromState  cstate)
static

Definition at line 1434 of file copyfromparse.c.

References Assert, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, GetDecimalFromHex(), IS_HIGHBIT_SET, ISOCTAL, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::max_fields, StringInfoData::maxlen, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, OCTVALUE, CopyFromStateData::opts, pg_verifymbstr(), CopyFromStateData::raw_fields, repalloc(), resetStringInfo(), and val.

Referenced by NextCopyFromRawFields().

1435 {
1436  char delimc = cstate->opts.delim[0];
1437  int fieldno;
1438  char *output_ptr;
1439  char *cur_ptr;
1440  char *line_end_ptr;
1441 
1442  /*
1443  * We need a special case for zero-column tables: check that the input
1444  * line is empty, and return.
1445  */
1446  if (cstate->max_fields <= 0)
1447  {
1448  if (cstate->line_buf.len != 0)
1449  ereport(ERROR,
1450  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1451  errmsg("extra data after last expected column")));
1452  return 0;
1453  }
1454 
1455  resetStringInfo(&cstate->attribute_buf);
1456 
1457  /*
1458  * The de-escaped attributes will certainly not be longer than the input
1459  * data line, so we can just force attribute_buf to be large enough and
1460  * then transfer data without any checks for enough space. We need to do
1461  * it this way because enlarging attribute_buf mid-stream would invalidate
1462  * pointers already stored into cstate->raw_fields[].
1463  */
1464  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1465  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1466  output_ptr = cstate->attribute_buf.data;
1467 
1468  /* set pointer variables for loop */
1469  cur_ptr = cstate->line_buf.data;
1470  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1471 
1472  /* Outer loop iterates over fields */
1473  fieldno = 0;
1474  for (;;)
1475  {
1476  bool found_delim = false;
1477  char *start_ptr;
1478  char *end_ptr;
1479  int input_len;
1480  bool saw_non_ascii = false;
1481 
1482  /* Make sure there is enough space for the next value */
1483  if (fieldno >= cstate->max_fields)
1484  {
1485  cstate->max_fields *= 2;
1486  cstate->raw_fields =
1487  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1488  }
1489 
1490  /* Remember start of field on both input and output sides */
1491  start_ptr = cur_ptr;
1492  cstate->raw_fields[fieldno] = output_ptr;
1493 
1494  /*
1495  * Scan data for field.
1496  *
1497  * Note that in this loop, we are scanning to locate the end of field
1498  * and also speculatively performing de-escaping. Once we find the
1499  * end-of-field, we can match the raw field contents against the null
1500  * marker string. Only after that comparison fails do we know that
1501  * de-escaping is actually the right thing to do; therefore we *must
1502  * not* throw any syntax errors before we've done the null-marker
1503  * check.
1504  */
1505  for (;;)
1506  {
1507  char c;
1508 
1509  end_ptr = cur_ptr;
1510  if (cur_ptr >= line_end_ptr)
1511  break;
1512  c = *cur_ptr++;
1513  if (c == delimc)
1514  {
1515  found_delim = true;
1516  break;
1517  }
1518  if (c == '\\')
1519  {
1520  if (cur_ptr >= line_end_ptr)
1521  break;
1522  c = *cur_ptr++;
1523  switch (c)
1524  {
1525  case '0':
1526  case '1':
1527  case '2':
1528  case '3':
1529  case '4':
1530  case '5':
1531  case '6':
1532  case '7':
1533  {
1534  /* handle \013 */
1535  int val;
1536 
1537  val = OCTVALUE(c);
1538  if (cur_ptr < line_end_ptr)
1539  {
1540  c = *cur_ptr;
1541  if (ISOCTAL(c))
1542  {
1543  cur_ptr++;
1544  val = (val << 3) + OCTVALUE(c);
1545  if (cur_ptr < line_end_ptr)
1546  {
1547  c = *cur_ptr;
1548  if (ISOCTAL(c))
1549  {
1550  cur_ptr++;
1551  val = (val << 3) + OCTVALUE(c);
1552  }
1553  }
1554  }
1555  }
1556  c = val & 0377;
1557  if (c == '\0' || IS_HIGHBIT_SET(c))
1558  saw_non_ascii = true;
1559  }
1560  break;
1561  case 'x':
1562  /* Handle \x3F */
1563  if (cur_ptr < line_end_ptr)
1564  {
1565  char hexchar = *cur_ptr;
1566 
1567  if (isxdigit((unsigned char) hexchar))
1568  {
1569  int val = GetDecimalFromHex(hexchar);
1570 
1571  cur_ptr++;
1572  if (cur_ptr < line_end_ptr)
1573  {
1574  hexchar = *cur_ptr;
1575  if (isxdigit((unsigned char) hexchar))
1576  {
1577  cur_ptr++;
1578  val = (val << 4) + GetDecimalFromHex(hexchar);
1579  }
1580  }
1581  c = val & 0xff;
1582  if (c == '\0' || IS_HIGHBIT_SET(c))
1583  saw_non_ascii = true;
1584  }
1585  }
1586  break;
1587  case 'b':
1588  c = '\b';
1589  break;
1590  case 'f':
1591  c = '\f';
1592  break;
1593  case 'n':
1594  c = '\n';
1595  break;
1596  case 'r':
1597  c = '\r';
1598  break;
1599  case 't':
1600  c = '\t';
1601  break;
1602  case 'v':
1603  c = '\v';
1604  break;
1605 
1606  /*
1607  * in all other cases, take the char after '\'
1608  * literally
1609  */
1610  }
1611  }
1612 
1613  /* Add c to output string */
1614  *output_ptr++ = c;
1615  }
1616 
1617  /* Check whether raw input matched null marker */
1618  input_len = end_ptr - start_ptr;
1619  if (input_len == cstate->opts.null_print_len &&
1620  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1621  cstate->raw_fields[fieldno] = NULL;
1622  else
1623  {
1624  /*
1625  * At this point we know the field is supposed to contain data.
1626  *
1627  * If we de-escaped any non-7-bit-ASCII chars, make sure the
1628  * resulting string is valid data for the db encoding.
1629  */
1630  if (saw_non_ascii)
1631  {
1632  char *fld = cstate->raw_fields[fieldno];
1633 
1634  pg_verifymbstr(fld, output_ptr - fld, false);
1635  }
1636  }
1637 
1638  /* Terminate attribute value in output area */
1639  *output_ptr++ = '\0';
1640 
1641  fieldno++;
1642  /* Done if we hit EOL instead of a delim */
1643  if (!found_delim)
1644  break;
1645  }
1646 
1647  /* Clean up state of attribute_buf */
1648  output_ptr--;
1649  Assert(*output_ptr == '\0');
1650  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1651 
1652  return fieldno;
1653 }
int null_print_len
Definition: copy.h:37
StringInfoData attribute_buf
StringInfoData line_buf
int errcode(int sqlerrcode)
Definition: elog.c:698
char * null_print
Definition: copy.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1156
#define ERROR
Definition: elog.h:46
char * c
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
#define ISOCTAL(c)
Definition: copyfromparse.c:78
#define OCTVALUE(c)
Definition: copyfromparse.c:79
char * delim
Definition: copy.h:39
static int GetDecimalFromHex(char hex)
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1505
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
CopyFormatOptions opts
int errmsg(const char *fmt,...)
Definition: elog.c:909
long val
Definition: informix.c:664

◆ CopyReadBinaryAttribute()

static Datum CopyReadBinaryAttribute ( CopyFromState  cstate,
FmgrInfo flinfo,
Oid  typioparam,
int32  typmod,
bool isnull 
)
static

Definition at line 1831 of file copyfromparse.c.

References CopyFromStateData::attribute_buf, CopyGetInt32(), CopyReadBinaryData(), StringInfoData::cursor, StringInfoData::data, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, StringInfoData::len, ReceiveFunctionCall(), and resetStringInfo().

Referenced by NextCopyFrom().

1834 {
1835  int32 fld_size;
1836  Datum result;
1837 
1838  if (!CopyGetInt32(cstate, &fld_size))
1839  ereport(ERROR,
1840  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1841  errmsg("unexpected EOF in COPY data")));
1842  if (fld_size == -1)
1843  {
1844  *isnull = true;
1845  return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1846  }
1847  if (fld_size < 0)
1848  ereport(ERROR,
1849  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1850  errmsg("invalid field size")));
1851 
1852  /* reset attribute_buf to empty, and load raw data in it */
1853  resetStringInfo(&cstate->attribute_buf);
1854 
1855  enlargeStringInfo(&cstate->attribute_buf, fld_size);
1856  if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1857  fld_size) != fld_size)
1858  ereport(ERROR,
1859  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1860  errmsg("unexpected EOF in COPY data")));
1861 
1862  cstate->attribute_buf.len = fld_size;
1863  cstate->attribute_buf.data[fld_size] = '\0';
1864 
1865  /* Call the column type's binary input converter */
1866  result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1867  typioparam, typmod);
1868 
1869  /* Trouble if it didn't eat the whole buffer */
1870  if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1871  ereport(ERROR,
1872  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1873  errmsg("incorrect binary data format")));
1874 
1875  *isnull = false;
1876  return result;
1877 }
StringInfoData attribute_buf
int errcode(int sqlerrcode)
Definition: elog.c:698
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
signed int int32
Definition: c.h:429
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define ERROR
Definition: elog.h:46
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1587
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
uintptr_t Datum
Definition: postgres.h:411
#define ereport(elevel,...)
Definition: elog.h:157
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ CopyReadBinaryData()

static int CopyReadBinaryData ( CopyFromState  cstate,
char *  dest,
int  nbytes 
)
static

Definition at line 700 of file copyfromparse.c.

References CopyLoadRawBuf(), Min, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyGetInt16(), CopyGetInt32(), CopyReadBinaryAttribute(), NextCopyFrom(), and ReceiveCopyBinaryHeader().

701 {
702  int copied_bytes = 0;
703 
704  if (RAW_BUF_BYTES(cstate) >= nbytes)
705  {
706  /* Enough bytes are present in the buffer. */
707  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
708  cstate->raw_buf_index += nbytes;
709  copied_bytes = nbytes;
710  }
711  else
712  {
713  /*
714  * Not enough bytes in the buffer, so must read from the file. Need
715  * to loop since 'nbytes' could be larger than the buffer size.
716  */
717  do
718  {
719  int copy_bytes;
720 
721  /* Load more data if buffer is empty. */
722  if (RAW_BUF_BYTES(cstate) == 0)
723  {
724  CopyLoadRawBuf(cstate);
725  if (cstate->raw_reached_eof)
726  break; /* EOF */
727  }
728 
729  /* Transfer some bytes. */
730  copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
731  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
732  cstate->raw_buf_index += copy_bytes;
733  dest += copy_bytes;
734  copied_bytes += copy_bytes;
735  } while (copied_bytes < nbytes);
736  }
737 
738  return copied_bytes;
739 }
#define RAW_BUF_BYTES(cstate)
#define Min(x, y)
Definition: c.h:986
static void CopyLoadRawBuf(CopyFromState cstate)

◆ CopyReadLine()

static bool CopyReadLine ( CopyFromState  cstate)
static

Definition at line 988 of file copyfromparse.c.

References Assert, COPY_FRONTEND, CopyFromStateData::copy_src, CopyGetData(), CopyReadLineText(), StringInfoData::data, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::line_buf_valid, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and resetStringInfo().

Referenced by NextCopyFromRawFields().

989 {
990  bool result;
991 
992  resetStringInfo(&cstate->line_buf);
993  cstate->line_buf_valid = false;
994 
995  /* Parse data and transfer into line_buf */
996  result = CopyReadLineText(cstate);
997 
998  if (result)
999  {
1000  /*
1001  * Reached EOF. In protocol version 3, we should ignore anything
1002  * after \. up to the protocol end of copy data. (XXX maybe better
1003  * not to treat \. as special?)
1004  */
1005  if (cstate->copy_src == COPY_FRONTEND)
1006  {
1007  int inbytes;
1008 
1009  do
1010  {
1011  inbytes = CopyGetData(cstate, cstate->input_buf,
1012  1, INPUT_BUF_SIZE);
1013  } while (inbytes > 0);
1014  cstate->input_buf_index = 0;
1015  cstate->input_buf_len = 0;
1016  cstate->raw_buf_index = 0;
1017  cstate->raw_buf_len = 0;
1018  }
1019  }
1020  else
1021  {
1022  /*
1023  * If we didn't hit EOF, then we must have transferred the EOL marker
1024  * to line_buf along with the data. Get rid of it.
1025  */
1026  switch (cstate->eol_type)
1027  {
1028  case EOL_NL:
1029  Assert(cstate->line_buf.len >= 1);
1030  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1031  cstate->line_buf.len--;
1032  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1033  break;
1034  case EOL_CR:
1035  Assert(cstate->line_buf.len >= 1);
1036  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1037  cstate->line_buf.len--;
1038  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1039  break;
1040  case EOL_CRNL:
1041  Assert(cstate->line_buf.len >= 2);
1042  Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1043  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1044  cstate->line_buf.len -= 2;
1045  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1046  break;
1047  case EOL_UNKNOWN:
1048  /* shouldn't get here */
1049  Assert(false);
1050  break;
1051  }
1052  }
1053 
1054  /* Now it's safe to use the buffer in error messages */
1055  cstate->line_buf_valid = true;
1056 
1057  return result;
1058 }
StringInfoData line_buf
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
#define Assert(condition)
Definition: c.h:804
static bool CopyReadLineText(CopyFromState cstate)
#define INPUT_BUF_SIZE

◆ CopyReadLineText()

static bool CopyReadLineText ( CopyFromState  cstate)
static

Definition at line 1064 of file copyfromparse.c.

References appendBinaryStringInfo(), CopyLoadInputBuf(), CopyFormatOptions::csv_mode, CopyFromStateData::cur_lineno, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, ereport, errcode(), errhint(), errmsg(), ERROR, CopyFormatOptions::escape, IF_NEED_REFILL_AND_EOF_BREAK, IF_NEED_REFILL_AND_NOT_EOF_CONTINUE, CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::input_reached_eof, CopyFromStateData::line_buf, NO_END_OF_COPY_GOTO, CopyFromStateData::opts, CopyFormatOptions::quote, and REFILL_LINEBUF.

Referenced by CopyReadLine().

1065 {
1066  char *copy_input_buf;
1067  int input_buf_ptr;
1068  int copy_buf_len;
1069  bool need_data = false;
1070  bool hit_eof = false;
1071  bool result = false;
1072 
1073  /* CSV variables */
1074  bool first_char_in_line = true;
1075  bool in_quote = false,
1076  last_was_esc = false;
1077  char quotec = '\0';
1078  char escapec = '\0';
1079 
1080  if (cstate->opts.csv_mode)
1081  {
1082  quotec = cstate->opts.quote[0];
1083  escapec = cstate->opts.escape[0];
1084  /* ignore special escape processing if it's the same as quotec */
1085  if (quotec == escapec)
1086  escapec = '\0';
1087  }
1088 
1089  /*
1090  * The objective of this loop is to transfer the entire next input line
1091  * into line_buf. Hence, we only care for detecting newlines (\r and/or
1092  * \n) and the end-of-copy marker (\.).
1093  *
1094  * In CSV mode, \r and \n inside a quoted field are just part of the data
1095  * value and are put in line_buf. We keep just enough state to know if we
1096  * are currently in a quoted field or not.
1097  *
1098  * These four characters, and the CSV escape and quote characters, are
1099  * assumed the same in frontend and backend encodings.
1100  *
1101  * The input has already been converted to the database encoding. All
1102  * supported server encodings have the property that all bytes in a
1103  * multi-byte sequence have the high bit set, so a multibyte character
1104  * cannot contain any newline or escape characters embedded in the
1105  * multibyte sequence. Therefore, we can process the input byte-by-byte,
1106  * regardless of the encoding.
1107  *
1108  * For speed, we try to move data from input_buf to line_buf in chunks
1109  * rather than one character at a time. input_buf_ptr points to the next
1110  * character to examine; any characters from input_buf_index to
1111  * input_buf_ptr have been determined to be part of the line, but not yet
1112  * transferred to line_buf.
1113  *
1114  * For a little extra speed within the loop, we copy input_buf and
1115  * input_buf_len into local variables.
1116  */
1117  copy_input_buf = cstate->input_buf;
1118  input_buf_ptr = cstate->input_buf_index;
1119  copy_buf_len = cstate->input_buf_len;
1120 
1121  for (;;)
1122  {
1123  int prev_raw_ptr;
1124  char c;
1125 
1126  /*
1127  * Load more data if needed. Ideally we would just force four bytes
1128  * of read-ahead and avoid the many calls to
1129  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
1130  * does not allow us to read too far ahead or we might read into the
1131  * next data, so we read-ahead only as far we know we can. One
1132  * optimization would be to read-ahead four byte here if
1133  * cstate->copy_src != COPY_OLD_FE, but it hardly seems worth it,
1134  * considering the size of the buffer.
1135  */
1136  if (input_buf_ptr >= copy_buf_len || need_data)
1137  {
1139 
1140  CopyLoadInputBuf(cstate);
1141  /* update our local variables */
1142  hit_eof = cstate->input_reached_eof;
1143  input_buf_ptr = cstate->input_buf_index;
1144  copy_buf_len = cstate->input_buf_len;
1145 
1146  /*
1147  * If we are completely out of data, break out of the loop,
1148  * reporting EOF.
1149  */
1150  if (INPUT_BUF_BYTES(cstate) <= 0)
1151  {
1152  result = true;
1153  break;
1154  }
1155  need_data = false;
1156  }
1157 
1158  /* OK to fetch a character */
1159  prev_raw_ptr = input_buf_ptr;
1160  c = copy_input_buf[input_buf_ptr++];
1161 
1162  if (cstate->opts.csv_mode)
1163  {
1164  /*
1165  * If character is '\\' or '\r', we may need to look ahead below.
1166  * Force fetch of the next character if we don't already have it.
1167  * We need to do this before changing CSV state, in case one of
1168  * these characters is also the quote or escape character.
1169  *
1170  * Note: old-protocol does not like forced prefetch, but it's OK
1171  * here since we cannot validly be at EOF.
1172  */
1173  if (c == '\\' || c == '\r')
1174  {
1176  }
1177 
1178  /*
1179  * Dealing with quotes and escapes here is mildly tricky. If the
1180  * quote char is also the escape char, there's no problem - we
1181  * just use the char as a toggle. If they are different, we need
1182  * to ensure that we only take account of an escape inside a
1183  * quoted field and immediately preceding a quote char, and not
1184  * the second in an escape-escape sequence.
1185  */
1186  if (in_quote && c == escapec)
1187  last_was_esc = !last_was_esc;
1188  if (c == quotec && !last_was_esc)
1189  in_quote = !in_quote;
1190  if (c != escapec)
1191  last_was_esc = false;
1192 
1193  /*
1194  * Updating the line count for embedded CR and/or LF chars is
1195  * necessarily a little fragile - this test is probably about the
1196  * best we can do. (XXX it's arguable whether we should do this
1197  * at all --- is cur_lineno a physical or logical count?)
1198  */
1199  if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1200  cstate->cur_lineno++;
1201  }
1202 
1203  /* Process \r */
1204  if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1205  {
1206  /* Check for \r\n on first line, _and_ handle \r\n. */
1207  if (cstate->eol_type == EOL_UNKNOWN ||
1208  cstate->eol_type == EOL_CRNL)
1209  {
1210  /*
1211  * If need more data, go back to loop top to load it.
1212  *
1213  * Note that if we are at EOF, c will wind up as '\0' because
1214  * of the guaranteed pad of input_buf.
1215  */
1217 
1218  /* get next char */
1219  c = copy_input_buf[input_buf_ptr];
1220 
1221  if (c == '\n')
1222  {
1223  input_buf_ptr++; /* eat newline */
1224  cstate->eol_type = EOL_CRNL; /* in case not set yet */
1225  }
1226  else
1227  {
1228  /* found \r, but no \n */
1229  if (cstate->eol_type == EOL_CRNL)
1230  ereport(ERROR,
1231  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1232  !cstate->opts.csv_mode ?
1233  errmsg("literal carriage return found in data") :
1234  errmsg("unquoted carriage return found in data"),
1235  !cstate->opts.csv_mode ?
1236  errhint("Use \"\\r\" to represent carriage return.") :
1237  errhint("Use quoted CSV field to represent carriage return.")));
1238 
1239  /*
1240  * if we got here, it is the first line and we didn't find
1241  * \n, so don't consume the peeked character
1242  */
1243  cstate->eol_type = EOL_CR;
1244  }
1245  }
1246  else if (cstate->eol_type == EOL_NL)
1247  ereport(ERROR,
1248  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1249  !cstate->opts.csv_mode ?
1250  errmsg("literal carriage return found in data") :
1251  errmsg("unquoted carriage return found in data"),
1252  !cstate->opts.csv_mode ?
1253  errhint("Use \"\\r\" to represent carriage return.") :
1254  errhint("Use quoted CSV field to represent carriage return.")));
1255  /* If reach here, we have found the line terminator */
1256  break;
1257  }
1258 
1259  /* Process \n */
1260  if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1261  {
1262  if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1263  ereport(ERROR,
1264  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1265  !cstate->opts.csv_mode ?
1266  errmsg("literal newline found in data") :
1267  errmsg("unquoted newline found in data"),
1268  !cstate->opts.csv_mode ?
1269  errhint("Use \"\\n\" to represent newline.") :
1270  errhint("Use quoted CSV field to represent newline.")));
1271  cstate->eol_type = EOL_NL; /* in case not set yet */
1272  /* If reach here, we have found the line terminator */
1273  break;
1274  }
1275 
1276  /*
1277  * In CSV mode, we only recognize \. alone on a line. This is because
1278  * \. is a valid CSV data value.
1279  */
1280  if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
1281  {
1282  char c2;
1283 
1286 
1287  /* -----
1288  * get next character
1289  * Note: we do not change c so if it isn't \., we can fall
1290  * through and continue processing.
1291  * -----
1292  */
1293  c2 = copy_input_buf[input_buf_ptr];
1294 
1295  if (c2 == '.')
1296  {
1297  input_buf_ptr++; /* consume the '.' */
1298 
1299  /*
1300  * Note: if we loop back for more data here, it does not
1301  * matter that the CSV state change checks are re-executed; we
1302  * will come back here with no important state changed.
1303  */
1304  if (cstate->eol_type == EOL_CRNL)
1305  {
1306  /* Get the next character */
1308  /* if hit_eof, c2 will become '\0' */
1309  c2 = copy_input_buf[input_buf_ptr++];
1310 
1311  if (c2 == '\n')
1312  {
1313  if (!cstate->opts.csv_mode)
1314  ereport(ERROR,
1315  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1316  errmsg("end-of-copy marker does not match previous newline style")));
1317  else
1319  }
1320  else if (c2 != '\r')
1321  {
1322  if (!cstate->opts.csv_mode)
1323  ereport(ERROR,
1324  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1325  errmsg("end-of-copy marker corrupt")));
1326  else
1328  }
1329  }
1330 
1331  /* Get the next character */
1333  /* if hit_eof, c2 will become '\0' */
1334  c2 = copy_input_buf[input_buf_ptr++];
1335 
1336  if (c2 != '\r' && c2 != '\n')
1337  {
1338  if (!cstate->opts.csv_mode)
1339  ereport(ERROR,
1340  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1341  errmsg("end-of-copy marker corrupt")));
1342  else
1344  }
1345 
1346  if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1347  (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1348  (cstate->eol_type == EOL_CR && c2 != '\r'))
1349  {
1350  ereport(ERROR,
1351  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1352  errmsg("end-of-copy marker does not match previous newline style")));
1353  }
1354 
1355  /*
1356  * Transfer only the data before the \. into line_buf, then
1357  * discard the data and the \. sequence.
1358  */
1359  if (prev_raw_ptr > cstate->input_buf_index)
1361  cstate->input_buf + cstate->input_buf_index,
1362  prev_raw_ptr - cstate->input_buf_index);
1363  cstate->input_buf_index = input_buf_ptr;
1364  result = true; /* report EOF */
1365  break;
1366  }
1367  else if (!cstate->opts.csv_mode)
1368  {
1369  /*
1370  * If we are here, it means we found a backslash followed by
1371  * something other than a period. In non-CSV mode, anything
1372  * after a backslash is special, so we skip over that second
1373  * character too. If we didn't do that \\. would be
1374  * considered an eof-of copy, while in non-CSV mode it is a
1375  * literal backslash followed by a period. In CSV mode,
1376  * backslashes are not special, so we want to process the
1377  * character after the backslash just like a normal character,
1378  * so we don't increment in those cases.
1379  */
1380  input_buf_ptr++;
1381  }
1382  }
1383 
1384  /*
1385  * This label is for CSV cases where \. appears at the start of a
1386  * line, but there is more text after it, meaning it was a data value.
1387  * We are more strict for \. in CSV mode because \. could be a data
1388  * value, while in non-CSV mode, \. cannot be a data value.
1389  */
1390 not_end_of_copy:
1391  first_char_in_line = false;
1392  } /* end of outer loop */
1393 
1394  /*
1395  * Transfer any still-uncopied data to line_buf.
1396  */
1398 
1399  return result;
1400 }
int errhint(const char *fmt,...)
Definition: elog.c:1156
StringInfoData line_buf
int errcode(int sqlerrcode)
Definition: elog.c:698
#define INPUT_BUF_BYTES(cstate)
char * quote
Definition: copy.h:40
static void CopyLoadInputBuf(CopyFromState cstate)
#define ERROR
Definition: elog.h:46
char * c
bool csv_mode
Definition: copy.h:34
#define NO_END_OF_COPY_GOTO
#define REFILL_LINEBUF
#define ereport(elevel,...)
Definition: elog.h:157
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
CopyFormatOptions opts
char * escape
Definition: copy.h:41
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:97
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227

◆ GetDecimalFromHex()

static int GetDecimalFromHex ( char  hex)
static

Definition at line 1406 of file copyfromparse.c.

Referenced by CopyReadAttributesText().

1407 {
1408  if (isdigit((unsigned char) hex))
1409  return hex - '0';
1410  else
1411  return tolower((unsigned char) hex) - 'a' + 10;
1412 }

◆ NextCopyFrom()

bool NextCopyFrom ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls 
)

Definition at line 804 of file copyfromparse.c.

References Assert, attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, CopyFromStateData::convert_select_flags, CopyGetInt16(), CopyReadBinaryAttribute(), CopyReadBinaryData(), CopyFormatOptions::csv_mode, cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_attval, CopyFromStateData::cur_lineno, CurrentMemoryContext, CopyFromStateData::defexprs, CopyFromStateData::defmap, ExprContext::ecxt_per_tuple_memory, ereport, errcode(), errmsg(), ERROR, ExecEvalExpr(), CopyFormatOptions::force_notnull_flags, CopyFormatOptions::force_null_flags, i, CopyFromStateData::in_functions, InputFunctionCall(), lfirst_int, list_length(), MemSet, NameStr, TupleDescData::natts, NextCopyFromRawFields(), CopyFormatOptions::null_print, CopyFromStateData::num_defaults, CopyFromStateData::opts, CopyFromStateData::rel, RelationGetDescr, TupleDescAttr, and CopyFromStateData::typioparams.

Referenced by CopyFrom(), file_acquire_sample_rows(), and fileIterateForeignScan().

806 {
807  TupleDesc tupDesc;
808  AttrNumber num_phys_attrs,
809  attr_count,
810  num_defaults = cstate->num_defaults;
811  FmgrInfo *in_functions = cstate->in_functions;
812  Oid *typioparams = cstate->typioparams;
813  int i;
814  int *defmap = cstate->defmap;
815  ExprState **defexprs = cstate->defexprs;
816 
817  tupDesc = RelationGetDescr(cstate->rel);
818  num_phys_attrs = tupDesc->natts;
819  attr_count = list_length(cstate->attnumlist);
820 
821  /* Initialize all values for row to NULL */
822  MemSet(values, 0, num_phys_attrs * sizeof(Datum));
823  MemSet(nulls, true, num_phys_attrs * sizeof(bool));
824 
825  if (!cstate->opts.binary)
826  {
827  char **field_strings;
828  ListCell *cur;
829  int fldct;
830  int fieldno;
831  char *string;
832 
833  /* read raw fields in the next line */
834  if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
835  return false;
836 
837  /* check for overflowing fields */
838  if (attr_count > 0 && fldct > attr_count)
839  ereport(ERROR,
840  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
841  errmsg("extra data after last expected column")));
842 
843  fieldno = 0;
844 
845  /* Loop to read the user attributes on the line. */
846  foreach(cur, cstate->attnumlist)
847  {
848  int attnum = lfirst_int(cur);
849  int m = attnum - 1;
850  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
851 
852  if (fieldno >= fldct)
853  ereport(ERROR,
854  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
855  errmsg("missing data for column \"%s\"",
856  NameStr(att->attname))));
857  string = field_strings[fieldno++];
858 
859  if (cstate->convert_select_flags &&
860  !cstate->convert_select_flags[m])
861  {
862  /* ignore input field, leaving column as NULL */
863  continue;
864  }
865 
866  if (cstate->opts.csv_mode)
867  {
868  if (string == NULL &&
869  cstate->opts.force_notnull_flags[m])
870  {
871  /*
872  * FORCE_NOT_NULL option is set and column is NULL -
873  * convert it to the NULL string.
874  */
875  string = cstate->opts.null_print;
876  }
877  else if (string != NULL && cstate->opts.force_null_flags[m]
878  && strcmp(string, cstate->opts.null_print) == 0)
879  {
880  /*
881  * FORCE_NULL option is set and column matches the NULL
882  * string. It must have been quoted, or otherwise the
883  * string would already have been set to NULL. Convert it
884  * to NULL as specified.
885  */
886  string = NULL;
887  }
888  }
889 
890  cstate->cur_attname = NameStr(att->attname);
891  cstate->cur_attval = string;
892  values[m] = InputFunctionCall(&in_functions[m],
893  string,
894  typioparams[m],
895  att->atttypmod);
896  if (string != NULL)
897  nulls[m] = false;
898  cstate->cur_attname = NULL;
899  cstate->cur_attval = NULL;
900  }
901 
902  Assert(fieldno == attr_count);
903  }
904  else
905  {
906  /* binary */
907  int16 fld_count;
908  ListCell *cur;
909 
910  cstate->cur_lineno++;
911 
912  if (!CopyGetInt16(cstate, &fld_count))
913  {
914  /* EOF detected (end of file, or protocol-level EOF) */
915  return false;
916  }
917 
918  if (fld_count == -1)
919  {
920  /*
921  * Received EOF marker. Wait for the protocol-level EOF, and
922  * complain if it doesn't come immediately. In COPY FROM STDIN,
923  * this ensures that we correctly handle CopyFail, if client
924  * chooses to send that now. When copying from file, we could
925  * ignore the rest of the file like in text mode, but we choose to
926  * be consistent with the COPY FROM STDIN case.
927  */
928  char dummy;
929 
930  if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
931  ereport(ERROR,
932  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
933  errmsg("received copy data after EOF marker")));
934  return false;
935  }
936 
937  if (fld_count != attr_count)
938  ereport(ERROR,
939  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
940  errmsg("row field count is %d, expected %d",
941  (int) fld_count, attr_count)));
942 
943  foreach(cur, cstate->attnumlist)
944  {
945  int attnum = lfirst_int(cur);
946  int m = attnum - 1;
947  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
948 
949  cstate->cur_attname = NameStr(att->attname);
950  values[m] = CopyReadBinaryAttribute(cstate,
951  &in_functions[m],
952  typioparams[m],
953  att->atttypmod,
954  &nulls[m]);
955  cstate->cur_attname = NULL;
956  }
957  }
958 
959  /*
960  * Now compute and insert any defaults available for the columns not
961  * provided by the input data. Anything not processed here or above will
962  * remain NULL.
963  */
964  for (i = 0; i < num_defaults; i++)
965  {
966  /*
967  * The caller must supply econtext and have switched into the
968  * per-tuple memory context in it.
969  */
970  Assert(econtext != NULL);
972 
973  values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
974  &nulls[defmap[i]]);
975  }
976 
977  return true;
978 }
signed short int16
Definition: c.h:428
Definition: fmgr.h:56
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
const char * cur_attname
#define RelationGetDescr(relation)
Definition: rel.h:495
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:234
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:698
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
#define MemSet(start, val, len)
Definition: c.h:1008
const char * cur_attval
unsigned int Oid
Definition: postgres_ext.h:31
char * null_print
Definition: copy.h:36
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
ExprState ** defexprs
#define ERROR
Definition: elog.h:46
#define lfirst_int(lc)
Definition: pg_list.h:170
bool binary
Definition: copy.h:32
bool csv_mode
Definition: copy.h:34
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:316
char string[11]
Definition: preproc-type.c:46
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:203
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
uintptr_t Datum
Definition: postgres.h:411
Datum InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
Definition: fmgr.c:1529
int16 attnum
Definition: pg_attribute.h:83
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
bool * force_null_flags
Definition: copy.h:48
static int list_length(const List *l)
Definition: pg_list.h:149
CopyFormatOptions opts
static Datum values[MAXATTR]
Definition: bootstrap.c:166
int errmsg(const char *fmt,...)
Definition: elog.c:909
int i
#define NameStr(name)
Definition: c.h:681
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
int16 AttrNumber
Definition: attnum.h:21
bool * force_notnull_flags
Definition: copy.h:46

◆ NextCopyFromRawFields()

bool NextCopyFromRawFields ( CopyFromState  cstate,
char ***  fields,
int *  nfields 
)

Definition at line 753 of file copyfromparse.c.

References Assert, CopyFormatOptions::binary, CopyReadAttributesCSV(), CopyReadAttributesText(), CopyReadLine(), CopyFormatOptions::csv_mode, CopyFromStateData::cur_lineno, CopyFormatOptions::header_line, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::opts, and CopyFromStateData::raw_fields.

Referenced by NextCopyFrom().

754 {
755  int fldct;
756  bool done;
757 
758  /* only available for text or csv input */
759  Assert(!cstate->opts.binary);
760 
761  /* on input just throw the header line away */
762  if (cstate->cur_lineno == 0 && cstate->opts.header_line)
763  {
764  cstate->cur_lineno++;
765  if (CopyReadLine(cstate))
766  return false; /* done */
767  }
768 
769  cstate->cur_lineno++;
770 
771  /* Actually read the line into memory here */
772  done = CopyReadLine(cstate);
773 
774  /*
775  * EOF at start of line means we're done. If we see EOF after some
776  * characters, we act as though it was newline followed by EOF, ie,
777  * process the line and then exit loop on next iteration.
778  */
779  if (done && cstate->line_buf.len == 0)
780  return false;
781 
782  /* Parse the line into de-escaped field values */
783  if (cstate->opts.csv_mode)
784  fldct = CopyReadAttributesCSV(cstate);
785  else
786  fldct = CopyReadAttributesText(cstate);
787 
788  *fields = cstate->raw_fields;
789  *nfields = fldct;
790  return true;
791 }
StringInfoData line_buf
static bool CopyReadLine(CopyFromState cstate)
bool binary
Definition: copy.h:32
bool csv_mode
Definition: copy.h:34
bool header_line
Definition: copy.h:35
static int CopyReadAttributesText(CopyFromState cstate)
#define Assert(condition)
Definition: c.h:804
CopyFormatOptions opts
static int CopyReadAttributesCSV(CopyFromState cstate)

◆ ReceiveCopyBegin()

void ReceiveCopyBegin ( CopyFromState  cstate)

Definition at line 169 of file copyfromparse.c.

References CopyFromStateData::attnumlist, CopyFormatOptions::binary, buf, COPY_FRONTEND, CopyFromStateData::copy_src, CopyFromStateData::fe_msgbuf, format, i, list_length(), makeStringInfo(), CopyFromStateData::opts, pq_beginmessage(), pq_endmessage(), pq_flush, pq_sendbyte(), and pq_sendint16().

Referenced by BeginCopyFrom().

170 {
172  int natts = list_length(cstate->attnumlist);
173  int16 format = (cstate->opts.binary ? 1 : 0);
174  int i;
175 
176  pq_beginmessage(&buf, 'G');
177  pq_sendbyte(&buf, format); /* overall format */
178  pq_sendint16(&buf, natts);
179  for (i = 0; i < natts; i++)
180  pq_sendint16(&buf, format); /* per-column formats */
181  pq_endmessage(&buf);
182  cstate->copy_src = COPY_FRONTEND;
183  cstate->fe_msgbuf = makeStringInfo();
184  /* We *must* flush here to ensure FE knows it can send. */
185  pq_flush();
186 }
signed short int16
Definition: c.h:428
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
#define pq_flush()
Definition: libpq.h:46
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
bool binary
Definition: copy.h:32
static char * buf
Definition: pg_test_fsync.c:68
static int list_length(const List *l)
Definition: pg_list.h:149
CopyFormatOptions opts
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
int i
static char format

◆ ReceiveCopyBinaryHeader()

void ReceiveCopyBinaryHeader ( CopyFromState  cstate)

Definition at line 189 of file copyfromparse.c.

References BinarySignature, CopyGetInt32(), CopyReadBinaryData(), ereport, errcode(), errmsg(), and ERROR.

Referenced by BeginCopyFrom().

190 {
191  char readSig[11];
192  int32 tmp;
193 
194  /* Signature */
195  if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
196  memcmp(readSig, BinarySignature, 11) != 0)
197  ereport(ERROR,
198  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
199  errmsg("COPY file signature not recognized")));
200  /* Flags field */
201  if (!CopyGetInt32(cstate, &tmp))
202  ereport(ERROR,
203  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
204  errmsg("invalid COPY file header (missing flags)")));
205  if ((tmp & (1 << 16)) != 0)
206  ereport(ERROR,
207  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
208  errmsg("invalid COPY file header (WITH OIDS)")));
209  tmp &= ~(1 << 16);
210  if ((tmp >> 16) != 0)
211  ereport(ERROR,
212  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
213  errmsg("unrecognized critical flags in COPY file header")));
214  /* Header extension length */
215  if (!CopyGetInt32(cstate, &tmp) ||
216  tmp < 0)
217  ereport(ERROR,
218  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
219  errmsg("invalid COPY file header (missing length)")));
220  /* Skip extension header, if present */
221  while (tmp-- > 0)
222  {
223  if (CopyReadBinaryData(cstate, readSig, 1) != 1)
224  ereport(ERROR,
225  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
226  errmsg("invalid COPY file header (wrong length)")));
227  }
228 }
static const char BinarySignature[11]
int errcode(int sqlerrcode)
Definition: elog.c:698
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
signed int int32
Definition: c.h:429
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define ERROR
Definition: elog.h:46
#define ereport(elevel,...)
Definition: elog.h:157
int errmsg(const char *fmt,...)
Definition: elog.c:909

Variable Documentation

◆ BinarySignature

const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"
static

Definition at line 147 of file copyfromparse.c.

Referenced by ReceiveCopyBinaryHeader().