PostgreSQL Source Code  git master
copyfromparse.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include "commands/copy.h"
#include "commands/copyfrom_internal.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/rel.h"
Include dependency graph for copyfromparse.c:

Go to the source code of this file.

Macros

#define ISOCTAL(c)   (((c) >= '0') && ((c) <= '7'))
 
#define OCTVALUE(c)   ((c) - '0')
 
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
 
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
 
#define REFILL_LINEBUF
 

Functions

static bool CopyReadLine (CopyFromState cstate)
 
static bool CopyReadLineText (CopyFromState cstate)
 
static int CopyReadAttributesText (CopyFromState cstate)
 
static int CopyReadAttributesCSV (CopyFromState cstate)
 
static Datum CopyReadBinaryAttribute (CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
 
static int CopyGetData (CopyFromState cstate, void *databuf, int minread, int maxread)
 
static bool CopyGetInt32 (CopyFromState cstate, int32 *val)
 
static bool CopyGetInt16 (CopyFromState cstate, int16 *val)
 
static void CopyLoadInputBuf (CopyFromState cstate)
 
static int CopyReadBinaryData (CopyFromState cstate, char *dest, int nbytes)
 
void ReceiveCopyBegin (CopyFromState cstate)
 
void ReceiveCopyBinaryHeader (CopyFromState cstate)
 
static void CopyConvertBuf (CopyFromState cstate)
 
static void CopyConversionError (CopyFromState cstate)
 
static void CopyLoadRawBuf (CopyFromState cstate)
 
bool NextCopyFromRawFields (CopyFromState cstate, char ***fields, int *nfields)
 
bool NextCopyFrom (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
static int GetDecimalFromHex (char hex)
 

Variables

static const char BinarySignature [11] = "PGCOPY\n\377\r\n\0"
 

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK (   extralen)
Value:
if (1) \
{ \
if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
{ \
if (extralen) \
input_buf_ptr = copy_buf_len; /* consume the partial character */ \
/* backslash just before EOF, treat as data char */ \
result = true; \
break; \
} \
} else ((void) 0)

Definition at line 109 of file copyfromparse.c.

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE (   extralen)
Value:
if (1) \
{ \
if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
{ \
input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
need_data = true; \
continue; \
} \
} else ((void) 0)

Definition at line 97 of file copyfromparse.c.

◆ ISOCTAL

#define ISOCTAL (   c)    (((c) >= '0') && ((c) <= '7'))

Definition at line 78 of file copyfromparse.c.

◆ OCTVALUE

#define OCTVALUE (   c)    ((c) - '0')

Definition at line 79 of file copyfromparse.c.

◆ REFILL_LINEBUF

#define REFILL_LINEBUF
Value:
if (1) \
{ \
if (input_buf_ptr > cstate->input_buf_index) \
{ \
appendBinaryStringInfo(&cstate->line_buf, \
cstate->input_buf + cstate->input_buf_index, \
input_buf_ptr - cstate->input_buf_index); \
cstate->input_buf_index = input_buf_ptr; \
} \
} else ((void) 0)

Definition at line 126 of file copyfromparse.c.

Function Documentation

◆ CopyConversionError()

static void CopyConversionError ( CopyFromState  cstate)
static

Definition at line 524 of file copyfromparse.c.

525 {
526  Assert(cstate->raw_buf_len > 0);
527  Assert(cstate->input_reached_error);
528 
529  if (!cstate->need_transcoding)
530  {
531  /*
532  * Everything up to input_buf_len was successfully verified, and
533  * input_buf_len points to the invalid or incomplete character.
534  */
536  cstate->raw_buf + cstate->input_buf_len,
537  cstate->raw_buf_len - cstate->input_buf_len);
538  }
539  else
540  {
541  /*
542  * raw_buf_index points to the invalid or untranslatable character. We
543  * let the conversion routine report the error, because it can provide
544  * a more specific error message than we could here. An earlier call
545  * to the conversion routine in CopyConvertBuf() detected that there
546  * is an error, now we call the conversion routine again with
547  * noError=false, to have it throw the error.
548  */
549  unsigned char *src;
550  int srclen;
551  unsigned char *dst;
552  int dstlen;
553 
554  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
555  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
556  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
557  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
558 
560  cstate->file_encoding,
562  src, srclen,
563  dst, dstlen,
564  false);
565 
566  /*
567  * The conversion routine should have reported an error, so this
568  * should not be reached.
569  */
570  elog(ERROR, "encoding conversion failed without error");
571  }
572 }
#define Assert(condition)
Definition: c.h:837
#define INPUT_BUF_SIZE
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition: mbutils.c:469
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698

References Assert, CopyFromStateData::conversion_proc, elog, ERROR, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and report_invalid_encoding().

Referenced by CopyLoadInputBuf().

◆ CopyConvertBuf()

static void CopyConvertBuf ( CopyFromState  cstate)
static

Definition at line 391 of file copyfromparse.c.

392 {
393  /*
394  * If the file and server encoding are the same, no encoding conversion is
395  * required. However, we still need to verify that the input is valid for
396  * the encoding.
397  */
398  if (!cstate->need_transcoding)
399  {
400  /*
401  * When conversion is not required, input_buf and raw_buf are the
402  * same. raw_buf_len is the total number of bytes in the buffer, and
403  * input_buf_len tracks how many of those bytes have already been
404  * verified.
405  */
406  int preverifiedlen = cstate->input_buf_len;
407  int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
408  int nverified;
409 
410  if (unverifiedlen == 0)
411  {
412  /*
413  * If no more raw data is coming, report the EOF to the caller.
414  */
415  if (cstate->raw_reached_eof)
416  cstate->input_reached_eof = true;
417  return;
418  }
419 
420  /*
421  * Verify the new data, including any residual unverified bytes from
422  * previous round.
423  */
424  nverified = pg_encoding_verifymbstr(cstate->file_encoding,
425  cstate->raw_buf + preverifiedlen,
426  unverifiedlen);
427  if (nverified == 0)
428  {
429  /*
430  * Could not verify anything.
431  *
432  * If there is no more raw input data coming, it means that there
433  * was an incomplete multi-byte sequence at the end. Also, if
434  * there's "enough" input left, we should be able to verify at
435  * least one character, and a failure to do so means that we've
436  * hit an invalid byte sequence.
437  */
438  if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
439  cstate->input_reached_error = true;
440  return;
441  }
442  cstate->input_buf_len += nverified;
443  }
444  else
445  {
446  /*
447  * Encoding conversion is needed.
448  */
449  int nbytes;
450  unsigned char *src;
451  int srclen;
452  unsigned char *dst;
453  int dstlen;
454  int convertedlen;
455 
456  if (RAW_BUF_BYTES(cstate) == 0)
457  {
458  /*
459  * If no more raw data is coming, report the EOF to the caller.
460  */
461  if (cstate->raw_reached_eof)
462  cstate->input_reached_eof = true;
463  return;
464  }
465 
466  /*
467  * First, copy down any unprocessed data.
468  */
469  nbytes = INPUT_BUF_BYTES(cstate);
470  if (nbytes > 0 && cstate->input_buf_index > 0)
471  memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
472  nbytes);
473  cstate->input_buf_index = 0;
474  cstate->input_buf_len = nbytes;
475  cstate->input_buf[nbytes] = '\0';
476 
477  src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
478  srclen = cstate->raw_buf_len - cstate->raw_buf_index;
479  dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
480  dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
481 
482  /*
483  * Do the conversion. This might stop short, if there is an invalid
484  * byte sequence in the input. We'll convert as much as we can in
485  * that case.
486  *
487  * Note: Even if we hit an invalid byte sequence, we don't report the
488  * error until all the valid bytes have been consumed. The input
489  * might contain an end-of-input marker (\.), and we don't want to
490  * report an error if the invalid byte sequence is after the
491  * end-of-input marker. We might unnecessarily convert some data
492  * after the end-of-input marker as long as it's valid for the
493  * encoding, but that's harmless.
494  */
495  convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
496  cstate->file_encoding,
498  src, srclen,
499  dst, dstlen,
500  true);
501  if (convertedlen == 0)
502  {
503  /*
504  * Could not convert anything. If there is no more raw input data
505  * coming, it means that there was an incomplete multi-byte
506  * sequence at the end. Also, if there is plenty of input left,
507  * we should be able to convert at least one character, so a
508  * failure to do so must mean that we've hit a byte sequence
509  * that's invalid.
510  */
511  if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
512  cstate->input_reached_error = true;
513  return;
514  }
515  cstate->raw_buf_index += convertedlen;
516  cstate->input_buf_len += strlen((char *) dst);
517  }
518 }
#define RAW_BUF_BYTES(cstate)
#define INPUT_BUF_BYTES(cstate)
#define MAX_CONVERSION_INPUT_LENGTH
Definition: pg_wchar.h:320
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition: wchar.c:2116
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2127

References CopyFromStateData::conversion_proc, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, MAX_CONVERSION_INPUT_LENGTH, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), pg_encoding_max_length(), pg_encoding_verifymbstr(), CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf().

◆ CopyGetData()

static int CopyGetData ( CopyFromState  cstate,
void *  databuf,
int  minread,
int  maxread 
)
static

Definition at line 236 of file copyfromparse.c.

237 {
238  int bytesread = 0;
239 
240  switch (cstate->copy_src)
241  {
242  case COPY_FILE:
243  bytesread = fread(databuf, 1, maxread, cstate->copy_file);
244  if (ferror(cstate->copy_file))
245  ereport(ERROR,
247  errmsg("could not read from COPY file: %m")));
248  if (bytesread == 0)
249  cstate->raw_reached_eof = true;
250  break;
251  case COPY_FRONTEND:
252  while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
253  {
254  int avail;
255 
256  while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
257  {
258  /* Try to receive another message */
259  int mtype;
260  int maxmsglen;
261 
262  readmessage:
264  pq_startmsgread();
265  mtype = pq_getbyte();
266  if (mtype == EOF)
267  ereport(ERROR,
268  (errcode(ERRCODE_CONNECTION_FAILURE),
269  errmsg("unexpected EOF on client connection with an open transaction")));
270  /* Validate message type and set packet size limit */
271  switch (mtype)
272  {
273  case PqMsg_CopyData:
274  maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
275  break;
276  case PqMsg_CopyDone:
277  case PqMsg_CopyFail:
278  case PqMsg_Flush:
279  case PqMsg_Sync:
280  maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
281  break;
282  default:
283  ereport(ERROR,
284  (errcode(ERRCODE_PROTOCOL_VIOLATION),
285  errmsg("unexpected message type 0x%02X during COPY from stdin",
286  mtype)));
287  maxmsglen = 0; /* keep compiler quiet */
288  break;
289  }
290  /* Now collect the message body */
291  if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
292  ereport(ERROR,
293  (errcode(ERRCODE_CONNECTION_FAILURE),
294  errmsg("unexpected EOF on client connection with an open transaction")));
296  /* ... and process it */
297  switch (mtype)
298  {
299  case PqMsg_CopyData:
300  break;
301  case PqMsg_CopyDone:
302  /* COPY IN correctly terminated by frontend */
303  cstate->raw_reached_eof = true;
304  return bytesread;
305  case PqMsg_CopyFail:
306  ereport(ERROR,
307  (errcode(ERRCODE_QUERY_CANCELED),
308  errmsg("COPY from stdin failed: %s",
309  pq_getmsgstring(cstate->fe_msgbuf))));
310  break;
311  case PqMsg_Flush:
312  case PqMsg_Sync:
313 
314  /*
315  * Ignore Flush/Sync for the convenience of client
316  * libraries (such as libpq) that may send those
317  * without noticing that the command they just
318  * sent was COPY.
319  */
320  goto readmessage;
321  default:
322  Assert(false); /* NOT REACHED */
323  }
324  }
325  avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
326  if (avail > maxread)
327  avail = maxread;
328  pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
329  databuf = (void *) ((char *) databuf + avail);
330  maxread -= avail;
331  bytesread += avail;
332  }
333  break;
334  case COPY_CALLBACK:
335  bytesread = cstate->data_source_cb(databuf, minread, maxread);
336  break;
337  }
338 
339  return bytesread;
340 }
@ COPY_FILE
Definition: copyto.c:45
@ COPY_CALLBACK
Definition: copyto.c:47
@ COPY_FRONTEND
Definition: copyto.c:46
int errcode_for_file_access(void)
Definition: elog.c:876
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
#define PQ_SMALL_MESSAGE_LIMIT
Definition: libpq.h:30
#define PQ_LARGE_MESSAGE_LIMIT
Definition: libpq.h:31
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:141
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:143
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1203
int pq_getbyte(void)
Definition: pqcomm.c:964
void pq_startmsgread(void)
Definition: pqcomm.c:1141
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:579
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:528
#define PqMsg_CopyDone
Definition: protocol.h:64
#define PqMsg_CopyData
Definition: protocol.h:65
#define PqMsg_Sync
Definition: protocol.h:27
#define PqMsg_CopyFail
Definition: protocol.h:29
#define PqMsg_Flush
Definition: protocol.h:24
copy_data_source_cb data_source_cb

References Assert, COPY_CALLBACK, COPY_FILE, CopyFromStateData::copy_file, COPY_FRONTEND, CopyFromStateData::copy_src, StringInfoData::cursor, CopyFromStateData::data_source_cb, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, CopyFromStateData::fe_msgbuf, HOLD_CANCEL_INTERRUPTS, StringInfoData::len, pq_copymsgbytes(), pq_getbyte(), pq_getmessage(), pq_getmsgstring(), PQ_LARGE_MESSAGE_LIMIT, PQ_SMALL_MESSAGE_LIMIT, pq_startmsgread(), PqMsg_CopyData, PqMsg_CopyDone, PqMsg_CopyFail, PqMsg_Flush, PqMsg_Sync, CopyFromStateData::raw_reached_eof, and RESUME_CANCEL_INTERRUPTS.

Referenced by CopyLoadRawBuf(), and CopyReadLine().

◆ CopyGetInt16()

static bool CopyGetInt16 ( CopyFromState  cstate,
int16 val 
)
inlinestatic

Definition at line 370 of file copyfromparse.c.

371 {
372  uint16 buf;
373 
374  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
375  {
376  *val = 0; /* suppress compiler warning */
377  return false;
378  }
379  *val = (int16) pg_ntoh16(buf);
380  return true;
381 }
unsigned short uint16
Definition: c.h:491
signed short int16
Definition: c.h:481
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
long val
Definition: informix.c:689
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static char * buf
Definition: pg_test_fsync.c:72

References buf, CopyReadBinaryData(), pg_ntoh16, and val.

Referenced by NextCopyFrom().

◆ CopyGetInt32()

static bool CopyGetInt32 ( CopyFromState  cstate,
int32 val 
)
inlinestatic

Definition at line 353 of file copyfromparse.c.

354 {
355  uint32 buf;
356 
357  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
358  {
359  *val = 0; /* suppress compiler warning */
360  return false;
361  }
362  *val = (int32) pg_ntoh32(buf);
363  return true;
364 }
unsigned int uint32
Definition: c.h:492
signed int int32
Definition: c.h:482
#define pg_ntoh32(x)
Definition: pg_bswap.h:125

References buf, CopyReadBinaryData(), pg_ntoh32, and val.

Referenced by CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

◆ CopyLoadInputBuf()

static void CopyLoadInputBuf ( CopyFromState  cstate)
static

Definition at line 641 of file copyfromparse.c.

642 {
643  int nbytes = INPUT_BUF_BYTES(cstate);
644 
645  /*
646  * The caller has updated input_buf_index to indicate how much of the
647  * input has been consumed and isn't needed anymore. If input_buf is the
648  * same physical area as raw_buf, update raw_buf_index accordingly.
649  */
650  if (cstate->raw_buf == cstate->input_buf)
651  {
652  Assert(!cstate->need_transcoding);
653  Assert(cstate->input_buf_index >= cstate->raw_buf_index);
654  cstate->raw_buf_index = cstate->input_buf_index;
655  }
656 
657  for (;;)
658  {
659  /* If we now have some unconverted data, try to convert it */
660  CopyConvertBuf(cstate);
661 
662  /* If we now have some more input bytes ready, return them */
663  if (INPUT_BUF_BYTES(cstate) > nbytes)
664  return;
665 
666  /*
667  * If we reached an invalid byte sequence, or we're at an incomplete
668  * multi-byte character but there is no more raw input data, report
669  * conversion error.
670  */
671  if (cstate->input_reached_error)
672  CopyConversionError(cstate);
673 
674  /* no more input, and everything has been converted */
675  if (cstate->input_reached_eof)
676  break;
677 
678  /* Try to load more raw data */
679  Assert(!cstate->raw_reached_eof);
680  CopyLoadRawBuf(cstate);
681  }
682 }
static void CopyConversionError(CopyFromState cstate)
static void CopyLoadRawBuf(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)

References Assert, CopyConversionError(), CopyConvertBuf(), CopyLoadRawBuf(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyReadLineText().

◆ CopyLoadRawBuf()

static void CopyLoadRawBuf ( CopyFromState  cstate)
static

Definition at line 581 of file copyfromparse.c.

582 {
583  int nbytes;
584  int inbytes;
585 
586  /*
587  * In text mode, if encoding conversion is not required, raw_buf and
588  * input_buf point to the same buffer. Their len/index better agree, too.
589  */
590  if (cstate->raw_buf == cstate->input_buf)
591  {
592  Assert(!cstate->need_transcoding);
593  Assert(cstate->raw_buf_index == cstate->input_buf_index);
594  Assert(cstate->input_buf_len <= cstate->raw_buf_len);
595  }
596 
597  /*
598  * Copy down the unprocessed data if any.
599  */
600  nbytes = RAW_BUF_BYTES(cstate);
601  if (nbytes > 0 && cstate->raw_buf_index > 0)
602  memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
603  nbytes);
604  cstate->raw_buf_len -= cstate->raw_buf_index;
605  cstate->raw_buf_index = 0;
606 
607  /*
608  * If raw_buf and input_buf are in fact the same buffer, adjust the
609  * input_buf variables, too.
610  */
611  if (cstate->raw_buf == cstate->input_buf)
612  {
613  cstate->input_buf_len -= cstate->input_buf_index;
614  cstate->input_buf_index = 0;
615  }
616 
617  /* Load more data */
618  inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
619  1, RAW_BUF_SIZE - cstate->raw_buf_len);
620  nbytes += inbytes;
621  cstate->raw_buf[nbytes] = '\0';
622  cstate->raw_buf_len = nbytes;
623 
624  cstate->bytes_processed += inbytes;
626 
627  if (inbytes == 0)
628  cstate->raw_reached_eof = true;
629 }
void pgstat_progress_update_param(int index, int64 val)
#define RAW_BUF_SIZE
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:140

References Assert, CopyFromStateData::bytes_processed, CopyGetData(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::need_transcoding, pgstat_progress_update_param(), PROGRESS_COPY_BYTES_PROCESSED, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, RAW_BUF_SIZE, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf(), and CopyReadBinaryData().

◆ CopyReadAttributesCSV()

static int CopyReadAttributesCSV ( CopyFromState  cstate)
static

Definition at line 1750 of file copyfromparse.c.

1751 {
1752  char delimc = cstate->opts.delim[0];
1753  char quotec = cstate->opts.quote[0];
1754  char escapec = cstate->opts.escape[0];
1755  int fieldno;
1756  char *output_ptr;
1757  char *cur_ptr;
1758  char *line_end_ptr;
1759 
1760  /*
1761  * We need a special case for zero-column tables: check that the input
1762  * line is empty, and return.
1763  */
1764  if (cstate->max_fields <= 0)
1765  {
1766  if (cstate->line_buf.len != 0)
1767  ereport(ERROR,
1768  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1769  errmsg("extra data after last expected column")));
1770  return 0;
1771  }
1772 
1773  resetStringInfo(&cstate->attribute_buf);
1774 
1775  /*
1776  * The de-escaped attributes will certainly not be longer than the input
1777  * data line, so we can just force attribute_buf to be large enough and
1778  * then transfer data without any checks for enough space. We need to do
1779  * it this way because enlarging attribute_buf mid-stream would invalidate
1780  * pointers already stored into cstate->raw_fields[].
1781  */
1782  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1783  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1784  output_ptr = cstate->attribute_buf.data;
1785 
1786  /* set pointer variables for loop */
1787  cur_ptr = cstate->line_buf.data;
1788  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1789 
1790  /* Outer loop iterates over fields */
1791  fieldno = 0;
1792  for (;;)
1793  {
1794  bool found_delim = false;
1795  bool saw_quote = false;
1796  char *start_ptr;
1797  char *end_ptr;
1798  int input_len;
1799 
1800  /* Make sure there is enough space for the next value */
1801  if (fieldno >= cstate->max_fields)
1802  {
1803  cstate->max_fields *= 2;
1804  cstate->raw_fields =
1805  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1806  }
1807 
1808  /* Remember start of field on both input and output sides */
1809  start_ptr = cur_ptr;
1810  cstate->raw_fields[fieldno] = output_ptr;
1811 
1812  /*
1813  * Scan data for field,
1814  *
1815  * The loop starts in "not quote" mode and then toggles between that
1816  * and "in quote" mode. The loop exits normally if it is in "not
1817  * quote" mode and a delimiter or line end is seen.
1818  */
1819  for (;;)
1820  {
1821  char c;
1822 
1823  /* Not in quote */
1824  for (;;)
1825  {
1826  end_ptr = cur_ptr;
1827  if (cur_ptr >= line_end_ptr)
1828  goto endfield;
1829  c = *cur_ptr++;
1830  /* unquoted field delimiter */
1831  if (c == delimc)
1832  {
1833  found_delim = true;
1834  goto endfield;
1835  }
1836  /* start of quoted field (or part of field) */
1837  if (c == quotec)
1838  {
1839  saw_quote = true;
1840  break;
1841  }
1842  /* Add c to output string */
1843  *output_ptr++ = c;
1844  }
1845 
1846  /* In quote */
1847  for (;;)
1848  {
1849  end_ptr = cur_ptr;
1850  if (cur_ptr >= line_end_ptr)
1851  ereport(ERROR,
1852  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1853  errmsg("unterminated CSV quoted field")));
1854 
1855  c = *cur_ptr++;
1856 
1857  /* escape within a quoted field */
1858  if (c == escapec)
1859  {
1860  /*
1861  * peek at the next char if available, and escape it if it
1862  * is an escape char or a quote char
1863  */
1864  if (cur_ptr < line_end_ptr)
1865  {
1866  char nextc = *cur_ptr;
1867 
1868  if (nextc == escapec || nextc == quotec)
1869  {
1870  *output_ptr++ = nextc;
1871  cur_ptr++;
1872  continue;
1873  }
1874  }
1875  }
1876 
1877  /*
1878  * end of quoted field. Must do this test after testing for
1879  * escape in case quote char and escape char are the same
1880  * (which is the common case).
1881  */
1882  if (c == quotec)
1883  break;
1884 
1885  /* Add c to output string */
1886  *output_ptr++ = c;
1887  }
1888  }
1889 endfield:
1890 
1891  /* Terminate attribute value in output area */
1892  *output_ptr++ = '\0';
1893 
1894  /* Check whether raw input matched null marker */
1895  input_len = end_ptr - start_ptr;
1896  if (!saw_quote && input_len == cstate->opts.null_print_len &&
1897  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1898  cstate->raw_fields[fieldno] = NULL;
1899  /* Check whether raw input matched default marker */
1900  else if (fieldno < list_length(cstate->attnumlist) &&
1901  cstate->opts.default_print &&
1902  input_len == cstate->opts.default_print_len &&
1903  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1904  {
1905  /* fieldno is 0-index and attnum is 1-index */
1906  int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1907 
1908  if (cstate->defexprs[m] != NULL)
1909  {
1910  /* defaults contain entries for all physical attributes */
1911  cstate->defaults[m] = true;
1912  }
1913  else
1914  {
1915  TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1916  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1917 
1918  ereport(ERROR,
1919  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1920  errmsg("unexpected default marker in COPY data"),
1921  errdetail("Column \"%s\" has no default value.",
1922  NameStr(att->attname))));
1923  }
1924  }
1925 
1926  fieldno++;
1927  /* Done if we hit EOL instead of a delim */
1928  if (!found_delim)
1929  break;
1930  }
1931 
1932  /* Clean up state of attribute_buf */
1933  output_ptr--;
1934  Assert(*output_ptr == '\0');
1935  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1936 
1937  return fieldno;
1938 }
#define NameStr(name)
Definition: c.h:725
int errdetail(const char *fmt,...)
Definition: elog.c:1203
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
static int list_length(const List *l)
Definition: pg_list.h:152
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
char * c
#define RelationGetDescr(relation)
Definition: rel.h:531
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:286
int default_print_len
Definition: copy.h:72
int null_print_len
Definition: copy.h:69
char * quote
Definition: copy.h:74
char * escape
Definition: copy.h:75
char * null_print
Definition: copy.h:68
char * delim
Definition: copy.h:73
char * default_print
Definition: copy.h:71
StringInfoData line_buf
CopyFormatOptions opts
StringInfoData attribute_buf
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92

References Assert, CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg(), ERROR, CopyFormatOptions::escape, StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, CopyFromStateData::opts, CopyFormatOptions::quote, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), and TupleDescAttr.

Referenced by NextCopyFromRawFields().

◆ CopyReadAttributesText()

static int CopyReadAttributesText ( CopyFromState  cstate)
static

Definition at line 1496 of file copyfromparse.c.

1497 {
1498  char delimc = cstate->opts.delim[0];
1499  int fieldno;
1500  char *output_ptr;
1501  char *cur_ptr;
1502  char *line_end_ptr;
1503 
1504  /*
1505  * We need a special case for zero-column tables: check that the input
1506  * line is empty, and return.
1507  */
1508  if (cstate->max_fields <= 0)
1509  {
1510  if (cstate->line_buf.len != 0)
1511  ereport(ERROR,
1512  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1513  errmsg("extra data after last expected column")));
1514  return 0;
1515  }
1516 
1517  resetStringInfo(&cstate->attribute_buf);
1518 
1519  /*
1520  * The de-escaped attributes will certainly not be longer than the input
1521  * data line, so we can just force attribute_buf to be large enough and
1522  * then transfer data without any checks for enough space. We need to do
1523  * it this way because enlarging attribute_buf mid-stream would invalidate
1524  * pointers already stored into cstate->raw_fields[].
1525  */
1526  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1527  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1528  output_ptr = cstate->attribute_buf.data;
1529 
1530  /* set pointer variables for loop */
1531  cur_ptr = cstate->line_buf.data;
1532  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1533 
1534  /* Outer loop iterates over fields */
1535  fieldno = 0;
1536  for (;;)
1537  {
1538  bool found_delim = false;
1539  char *start_ptr;
1540  char *end_ptr;
1541  int input_len;
1542  bool saw_non_ascii = false;
1543 
1544  /* Make sure there is enough space for the next value */
1545  if (fieldno >= cstate->max_fields)
1546  {
1547  cstate->max_fields *= 2;
1548  cstate->raw_fields =
1549  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1550  }
1551 
1552  /* Remember start of field on both input and output sides */
1553  start_ptr = cur_ptr;
1554  cstate->raw_fields[fieldno] = output_ptr;
1555 
1556  /*
1557  * Scan data for field.
1558  *
1559  * Note that in this loop, we are scanning to locate the end of field
1560  * and also speculatively performing de-escaping. Once we find the
1561  * end-of-field, we can match the raw field contents against the null
1562  * marker string. Only after that comparison fails do we know that
1563  * de-escaping is actually the right thing to do; therefore we *must
1564  * not* throw any syntax errors before we've done the null-marker
1565  * check.
1566  */
1567  for (;;)
1568  {
1569  char c;
1570 
1571  end_ptr = cur_ptr;
1572  if (cur_ptr >= line_end_ptr)
1573  break;
1574  c = *cur_ptr++;
1575  if (c == delimc)
1576  {
1577  found_delim = true;
1578  break;
1579  }
1580  if (c == '\\')
1581  {
1582  if (cur_ptr >= line_end_ptr)
1583  break;
1584  c = *cur_ptr++;
1585  switch (c)
1586  {
1587  case '0':
1588  case '1':
1589  case '2':
1590  case '3':
1591  case '4':
1592  case '5':
1593  case '6':
1594  case '7':
1595  {
1596  /* handle \013 */
1597  int val;
1598 
1599  val = OCTVALUE(c);
1600  if (cur_ptr < line_end_ptr)
1601  {
1602  c = *cur_ptr;
1603  if (ISOCTAL(c))
1604  {
1605  cur_ptr++;
1606  val = (val << 3) + OCTVALUE(c);
1607  if (cur_ptr < line_end_ptr)
1608  {
1609  c = *cur_ptr;
1610  if (ISOCTAL(c))
1611  {
1612  cur_ptr++;
1613  val = (val << 3) + OCTVALUE(c);
1614  }
1615  }
1616  }
1617  }
1618  c = val & 0377;
1619  if (c == '\0' || IS_HIGHBIT_SET(c))
1620  saw_non_ascii = true;
1621  }
1622  break;
1623  case 'x':
1624  /* Handle \x3F */
1625  if (cur_ptr < line_end_ptr)
1626  {
1627  char hexchar = *cur_ptr;
1628 
1629  if (isxdigit((unsigned char) hexchar))
1630  {
1631  int val = GetDecimalFromHex(hexchar);
1632 
1633  cur_ptr++;
1634  if (cur_ptr < line_end_ptr)
1635  {
1636  hexchar = *cur_ptr;
1637  if (isxdigit((unsigned char) hexchar))
1638  {
1639  cur_ptr++;
1640  val = (val << 4) + GetDecimalFromHex(hexchar);
1641  }
1642  }
1643  c = val & 0xff;
1644  if (c == '\0' || IS_HIGHBIT_SET(c))
1645  saw_non_ascii = true;
1646  }
1647  }
1648  break;
1649  case 'b':
1650  c = '\b';
1651  break;
1652  case 'f':
1653  c = '\f';
1654  break;
1655  case 'n':
1656  c = '\n';
1657  break;
1658  case 'r':
1659  c = '\r';
1660  break;
1661  case 't':
1662  c = '\t';
1663  break;
1664  case 'v':
1665  c = '\v';
1666  break;
1667 
1668  /*
1669  * in all other cases, take the char after '\'
1670  * literally
1671  */
1672  }
1673  }
1674 
1675  /* Add c to output string */
1676  *output_ptr++ = c;
1677  }
1678 
1679  /* Check whether raw input matched null marker */
1680  input_len = end_ptr - start_ptr;
1681  if (input_len == cstate->opts.null_print_len &&
1682  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1683  cstate->raw_fields[fieldno] = NULL;
1684  /* Check whether raw input matched default marker */
1685  else if (fieldno < list_length(cstate->attnumlist) &&
1686  cstate->opts.default_print &&
1687  input_len == cstate->opts.default_print_len &&
1688  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1689  {
1690  /* fieldno is 0-indexed and attnum is 1-indexed */
1691  int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1692 
1693  if (cstate->defexprs[m] != NULL)
1694  {
1695  /* defaults contain entries for all physical attributes */
1696  cstate->defaults[m] = true;
1697  }
1698  else
1699  {
1700  TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1701  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1702 
1703  ereport(ERROR,
1704  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1705  errmsg("unexpected default marker in COPY data"),
1706  errdetail("Column \"%s\" has no default value.",
1707  NameStr(att->attname))));
1708  }
1709  }
1710  else
1711  {
1712  /*
1713  * At this point we know the field is supposed to contain data.
1714  *
1715  * If we de-escaped any non-7-bit-ASCII chars, make sure the
1716  * resulting string is valid data for the db encoding.
1717  */
1718  if (saw_non_ascii)
1719  {
1720  char *fld = cstate->raw_fields[fieldno];
1721 
1722  pg_verifymbstr(fld, output_ptr - fld, false);
1723  }
1724  }
1725 
1726  /* Terminate attribute value in output area */
1727  *output_ptr++ = '\0';
1728 
1729  fieldno++;
1730  /* Done if we hit EOL instead of a delim */
1731  if (!found_delim)
1732  break;
1733  }
1734 
1735  /* Clean up state of attribute_buf */
1736  output_ptr--;
1737  Assert(*output_ptr == '\0');
1738  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1739 
1740  return fieldno;
1741 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1134
#define OCTVALUE(c)
Definition: copyfromparse.c:79
#define ISOCTAL(c)
Definition: copyfromparse.c:78
static int GetDecimalFromHex(char hex)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1556

References Assert, CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg(), ERROR, GetDecimalFromHex(), if(), IS_HIGHBIT_SET, ISOCTAL, StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, OCTVALUE, CopyFromStateData::opts, pg_verifymbstr(), CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), TupleDescAttr, and val.

Referenced by NextCopyFromRawFields().

◆ CopyReadBinaryAttribute()

static Datum CopyReadBinaryAttribute ( CopyFromState  cstate,
FmgrInfo flinfo,
Oid  typioparam,
int32  typmod,
bool *  isnull 
)
static

Definition at line 1945 of file copyfromparse.c.

1948 {
1949  int32 fld_size;
1950  Datum result;
1951 
1952  if (!CopyGetInt32(cstate, &fld_size))
1953  ereport(ERROR,
1954  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1955  errmsg("unexpected EOF in COPY data")));
1956  if (fld_size == -1)
1957  {
1958  *isnull = true;
1959  return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1960  }
1961  if (fld_size < 0)
1962  ereport(ERROR,
1963  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1964  errmsg("invalid field size")));
1965 
1966  /* reset attribute_buf to empty, and load raw data in it */
1967  resetStringInfo(&cstate->attribute_buf);
1968 
1969  enlargeStringInfo(&cstate->attribute_buf, fld_size);
1970  if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1971  fld_size) != fld_size)
1972  ereport(ERROR,
1973  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1974  errmsg("unexpected EOF in COPY data")));
1975 
1976  cstate->attribute_buf.len = fld_size;
1977  cstate->attribute_buf.data[fld_size] = '\0';
1978 
1979  /* Call the column type's binary input converter */
1980  result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1981  typioparam, typmod);
1982 
1983  /* Trouble if it didn't eat the whole buffer */
1984  if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1985  ereport(ERROR,
1986  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1987  errmsg("incorrect binary data format")));
1988 
1989  *isnull = false;
1990  return result;
1991 }
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1697
uintptr_t Datum
Definition: postgres.h:64

References CopyFromStateData::attribute_buf, CopyGetInt32(), CopyReadBinaryData(), StringInfoData::cursor, StringInfoData::data, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, StringInfoData::len, ReceiveFunctionCall(), and resetStringInfo().

Referenced by NextCopyFrom().

◆ CopyReadBinaryData()

static int CopyReadBinaryData ( CopyFromState  cstate,
char *  dest,
int  nbytes 
)
static

Definition at line 692 of file copyfromparse.c.

693 {
694  int copied_bytes = 0;
695 
696  if (RAW_BUF_BYTES(cstate) >= nbytes)
697  {
698  /* Enough bytes are present in the buffer. */
699  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
700  cstate->raw_buf_index += nbytes;
701  copied_bytes = nbytes;
702  }
703  else
704  {
705  /*
706  * Not enough bytes in the buffer, so must read from the file. Need
707  * to loop since 'nbytes' could be larger than the buffer size.
708  */
709  do
710  {
711  int copy_bytes;
712 
713  /* Load more data if buffer is empty. */
714  if (RAW_BUF_BYTES(cstate) == 0)
715  {
716  CopyLoadRawBuf(cstate);
717  if (cstate->raw_reached_eof)
718  break; /* EOF */
719  }
720 
721  /* Transfer some bytes. */
722  copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
723  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
724  cstate->raw_buf_index += copy_bytes;
725  dest += copy_bytes;
726  copied_bytes += copy_bytes;
727  } while (copied_bytes < nbytes);
728  }
729 
730  return copied_bytes;
731 }
#define Min(x, y)
Definition: c.h:983

References CopyLoadRawBuf(), generate_unaccent_rules::dest, Min, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyGetInt16(), CopyGetInt32(), CopyReadBinaryAttribute(), NextCopyFrom(), and ReceiveCopyBinaryHeader().

◆ CopyReadLine()

static bool CopyReadLine ( CopyFromState  cstate)
static

Definition at line 1090 of file copyfromparse.c.

1091 {
1092  bool result;
1093 
1094  resetStringInfo(&cstate->line_buf);
1095  cstate->line_buf_valid = false;
1096 
1097  /* Parse data and transfer into line_buf */
1098  result = CopyReadLineText(cstate);
1099 
1100  if (result)
1101  {
1102  /*
1103  * Reached EOF. In protocol version 3, we should ignore anything
1104  * after \. up to the protocol end of copy data. (XXX maybe better
1105  * not to treat \. as special?)
1106  */
1107  if (cstate->copy_src == COPY_FRONTEND)
1108  {
1109  int inbytes;
1110 
1111  do
1112  {
1113  inbytes = CopyGetData(cstate, cstate->input_buf,
1114  1, INPUT_BUF_SIZE);
1115  } while (inbytes > 0);
1116  cstate->input_buf_index = 0;
1117  cstate->input_buf_len = 0;
1118  cstate->raw_buf_index = 0;
1119  cstate->raw_buf_len = 0;
1120  }
1121  }
1122  else
1123  {
1124  /*
1125  * If we didn't hit EOF, then we must have transferred the EOL marker
1126  * to line_buf along with the data. Get rid of it.
1127  */
1128  switch (cstate->eol_type)
1129  {
1130  case EOL_NL:
1131  Assert(cstate->line_buf.len >= 1);
1132  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1133  cstate->line_buf.len--;
1134  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1135  break;
1136  case EOL_CR:
1137  Assert(cstate->line_buf.len >= 1);
1138  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1139  cstate->line_buf.len--;
1140  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1141  break;
1142  case EOL_CRNL:
1143  Assert(cstate->line_buf.len >= 2);
1144  Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1145  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1146  cstate->line_buf.len -= 2;
1147  cstate->line_buf.data[cstate->line_buf.len] = '\0';
1148  break;
1149  case EOL_UNKNOWN:
1150  /* shouldn't get here */
1151  Assert(false);
1152  break;
1153  }
1154  }
1155 
1156  /* Now it's safe to use the buffer in error messages */
1157  cstate->line_buf_valid = true;
1158 
1159  return result;
1160 }
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
static bool CopyReadLineText(CopyFromState cstate)

References Assert, COPY_FRONTEND, CopyFromStateData::copy_src, CopyGetData(), CopyReadLineText(), StringInfoData::data, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::line_buf_valid, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and resetStringInfo().

Referenced by NextCopyFromRawFields().

◆ CopyReadLineText()

static bool CopyReadLineText ( CopyFromState  cstate)
static

Definition at line 1166 of file copyfromparse.c.

1167 {
1168  char *copy_input_buf;
1169  int input_buf_ptr;
1170  int copy_buf_len;
1171  bool need_data = false;
1172  bool hit_eof = false;
1173  bool result = false;
1174 
1175  /* CSV variables */
1176  bool in_quote = false,
1177  last_was_esc = false;
1178  char quotec = '\0';
1179  char escapec = '\0';
1180 
1181  if (cstate->opts.csv_mode)
1182  {
1183  quotec = cstate->opts.quote[0];
1184  escapec = cstate->opts.escape[0];
1185  /* ignore special escape processing if it's the same as quotec */
1186  if (quotec == escapec)
1187  escapec = '\0';
1188  }
1189 
1190  /*
1191  * The objective of this loop is to transfer the entire next input line
1192  * into line_buf. Hence, we only care for detecting newlines (\r and/or
1193  * \n) and the end-of-copy marker (\.).
1194  *
1195  * In CSV mode, \r and \n inside a quoted field are just part of the data
1196  * value and are put in line_buf. We keep just enough state to know if we
1197  * are currently in a quoted field or not.
1198  *
1199  * The input has already been converted to the database encoding. All
1200  * supported server encodings have the property that all bytes in a
1201  * multi-byte sequence have the high bit set, so a multibyte character
1202  * cannot contain any newline or escape characters embedded in the
1203  * multibyte sequence. Therefore, we can process the input byte-by-byte,
1204  * regardless of the encoding.
1205  *
1206  * For speed, we try to move data from input_buf to line_buf in chunks
1207  * rather than one character at a time. input_buf_ptr points to the next
1208  * character to examine; any characters from input_buf_index to
1209  * input_buf_ptr have been determined to be part of the line, but not yet
1210  * transferred to line_buf.
1211  *
1212  * For a little extra speed within the loop, we copy input_buf and
1213  * input_buf_len into local variables.
1214  */
1215  copy_input_buf = cstate->input_buf;
1216  input_buf_ptr = cstate->input_buf_index;
1217  copy_buf_len = cstate->input_buf_len;
1218 
1219  for (;;)
1220  {
1221  int prev_raw_ptr;
1222  char c;
1223 
1224  /*
1225  * Load more data if needed.
1226  *
1227  * TODO: We could just force four bytes of read-ahead and avoid the
1228  * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1229  * unsafe with the old v2 COPY protocol, but we don't support that
1230  * anymore.
1231  */
1232  if (input_buf_ptr >= copy_buf_len || need_data)
1233  {
1235 
1236  CopyLoadInputBuf(cstate);
1237  /* update our local variables */
1238  hit_eof = cstate->input_reached_eof;
1239  input_buf_ptr = cstate->input_buf_index;
1240  copy_buf_len = cstate->input_buf_len;
1241 
1242  /*
1243  * If we are completely out of data, break out of the loop,
1244  * reporting EOF.
1245  */
1246  if (INPUT_BUF_BYTES(cstate) <= 0)
1247  {
1248  result = true;
1249  break;
1250  }
1251  need_data = false;
1252  }
1253 
1254  /* OK to fetch a character */
1255  prev_raw_ptr = input_buf_ptr;
1256  c = copy_input_buf[input_buf_ptr++];
1257 
1258  if (cstate->opts.csv_mode)
1259  {
1260  /*
1261  * If character is '\r', we may need to look ahead below. Force
1262  * fetch of the next character if we don't already have it. We
1263  * need to do this before changing CSV state, in case '\r' is also
1264  * the quote or escape character.
1265  */
1266  if (c == '\r')
1267  {
1269  }
1270 
1271  /*
1272  * Dealing with quotes and escapes here is mildly tricky. If the
1273  * quote char is also the escape char, there's no problem - we
1274  * just use the char as a toggle. If they are different, we need
1275  * to ensure that we only take account of an escape inside a
1276  * quoted field and immediately preceding a quote char, and not
1277  * the second in an escape-escape sequence.
1278  */
1279  if (in_quote && c == escapec)
1280  last_was_esc = !last_was_esc;
1281  if (c == quotec && !last_was_esc)
1282  in_quote = !in_quote;
1283  if (c != escapec)
1284  last_was_esc = false;
1285 
1286  /*
1287  * Updating the line count for embedded CR and/or LF chars is
1288  * necessarily a little fragile - this test is probably about the
1289  * best we can do. (XXX it's arguable whether we should do this
1290  * at all --- is cur_lineno a physical or logical count?)
1291  */
1292  if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1293  cstate->cur_lineno++;
1294  }
1295 
1296  /* Process \r */
1297  if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1298  {
1299  /* Check for \r\n on first line, _and_ handle \r\n. */
1300  if (cstate->eol_type == EOL_UNKNOWN ||
1301  cstate->eol_type == EOL_CRNL)
1302  {
1303  /*
1304  * If need more data, go back to loop top to load it.
1305  *
1306  * Note that if we are at EOF, c will wind up as '\0' because
1307  * of the guaranteed pad of input_buf.
1308  */
1310 
1311  /* get next char */
1312  c = copy_input_buf[input_buf_ptr];
1313 
1314  if (c == '\n')
1315  {
1316  input_buf_ptr++; /* eat newline */
1317  cstate->eol_type = EOL_CRNL; /* in case not set yet */
1318  }
1319  else
1320  {
1321  /* found \r, but no \n */
1322  if (cstate->eol_type == EOL_CRNL)
1323  ereport(ERROR,
1324  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1325  !cstate->opts.csv_mode ?
1326  errmsg("literal carriage return found in data") :
1327  errmsg("unquoted carriage return found in data"),
1328  !cstate->opts.csv_mode ?
1329  errhint("Use \"\\r\" to represent carriage return.") :
1330  errhint("Use quoted CSV field to represent carriage return.")));
1331 
1332  /*
1333  * if we got here, it is the first line and we didn't find
1334  * \n, so don't consume the peeked character
1335  */
1336  cstate->eol_type = EOL_CR;
1337  }
1338  }
1339  else if (cstate->eol_type == EOL_NL)
1340  ereport(ERROR,
1341  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1342  !cstate->opts.csv_mode ?
1343  errmsg("literal carriage return found in data") :
1344  errmsg("unquoted carriage return found in data"),
1345  !cstate->opts.csv_mode ?
1346  errhint("Use \"\\r\" to represent carriage return.") :
1347  errhint("Use quoted CSV field to represent carriage return.")));
1348  /* If reach here, we have found the line terminator */
1349  break;
1350  }
1351 
1352  /* Process \n */
1353  if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1354  {
1355  if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1356  ereport(ERROR,
1357  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1358  !cstate->opts.csv_mode ?
1359  errmsg("literal newline found in data") :
1360  errmsg("unquoted newline found in data"),
1361  !cstate->opts.csv_mode ?
1362  errhint("Use \"\\n\" to represent newline.") :
1363  errhint("Use quoted CSV field to represent newline.")));
1364  cstate->eol_type = EOL_NL; /* in case not set yet */
1365  /* If reach here, we have found the line terminator */
1366  break;
1367  }
1368 
1369  /*
1370  * Process backslash, except in CSV mode where backslash is a normal
1371  * character.
1372  */
1373  if (c == '\\' && !cstate->opts.csv_mode)
1374  {
1375  char c2;
1376 
1379 
1380  /* -----
1381  * get next character
1382  * Note: we do not change c so if it isn't \., we can fall
1383  * through and continue processing.
1384  * -----
1385  */
1386  c2 = copy_input_buf[input_buf_ptr];
1387 
1388  if (c2 == '.')
1389  {
1390  input_buf_ptr++; /* consume the '.' */
1391  if (cstate->eol_type == EOL_CRNL)
1392  {
1393  /* Get the next character */
1395  /* if hit_eof, c2 will become '\0' */
1396  c2 = copy_input_buf[input_buf_ptr++];
1397 
1398  if (c2 == '\n')
1399  ereport(ERROR,
1400  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1401  errmsg("end-of-copy marker does not match previous newline style")));
1402  else if (c2 != '\r')
1403  ereport(ERROR,
1404  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1405  errmsg("end-of-copy marker is not alone on its line")));
1406  }
1407 
1408  /* Get the next character */
1410  /* if hit_eof, c2 will become '\0' */
1411  c2 = copy_input_buf[input_buf_ptr++];
1412 
1413  if (c2 != '\r' && c2 != '\n')
1414  ereport(ERROR,
1415  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1416  errmsg("end-of-copy marker is not alone on its line")));
1417 
1418  if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1419  (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1420  (cstate->eol_type == EOL_CR && c2 != '\r'))
1421  ereport(ERROR,
1422  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1423  errmsg("end-of-copy marker does not match previous newline style")));
1424 
1425  /*
1426  * If there is any data on this line before the \., complain.
1427  */
1428  if (cstate->line_buf.len > 0 ||
1429  prev_raw_ptr > cstate->input_buf_index)
1430  ereport(ERROR,
1431  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1432  errmsg("end-of-copy marker is not alone on its line")));
1433 
1434  /*
1435  * Discard the \. and newline, then report EOF.
1436  */
1437  cstate->input_buf_index = input_buf_ptr;
1438  result = true; /* report EOF */
1439  break;
1440  }
1441  else
1442  {
1443  /*
1444  * If we are here, it means we found a backslash followed by
1445  * something other than a period. In non-CSV mode, anything
1446  * after a backslash is special, so we skip over that second
1447  * character too. If we didn't do that \\. would be
1448  * considered an eof-of copy, while in non-CSV mode it is a
1449  * literal backslash followed by a period.
1450  */
1451  input_buf_ptr++;
1452  }
1453  }
1454  } /* end of outer loop */
1455 
1456  /*
1457  * Transfer any still-uncopied data to line_buf.
1458  */
1460 
1461  return result;
1462 }
#define REFILL_LINEBUF
static void CopyLoadInputBuf(CopyFromState cstate)
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:97
int errhint(const char *fmt,...)
Definition: elog.c:1317
bool csv_mode
Definition: copy.h:66

References CopyLoadInputBuf(), CopyFormatOptions::csv_mode, CopyFromStateData::cur_lineno, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, ereport, errcode(), errhint(), errmsg(), ERROR, CopyFormatOptions::escape, IF_NEED_REFILL_AND_EOF_BREAK, IF_NEED_REFILL_AND_NOT_EOF_CONTINUE, CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::input_reached_eof, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::opts, CopyFormatOptions::quote, and REFILL_LINEBUF.

Referenced by CopyReadLine().

◆ GetDecimalFromHex()

static int GetDecimalFromHex ( char  hex)
static

Definition at line 1468 of file copyfromparse.c.

1469 {
1470  if (isdigit((unsigned char) hex))
1471  return hex - '0';
1472  else
1473  return tolower((unsigned char) hex) - 'a' + 10;
1474 }

Referenced by CopyReadAttributesText().

◆ NextCopyFrom()

bool NextCopyFrom ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool *  nulls 
)

Definition at line 845 of file copyfromparse.c.

847 {
848  TupleDesc tupDesc;
849  AttrNumber num_phys_attrs,
850  attr_count,
851  num_defaults = cstate->num_defaults;
852  FmgrInfo *in_functions = cstate->in_functions;
853  Oid *typioparams = cstate->typioparams;
854  int i;
855  int *defmap = cstate->defmap;
856  ExprState **defexprs = cstate->defexprs;
857 
858  tupDesc = RelationGetDescr(cstate->rel);
859  num_phys_attrs = tupDesc->natts;
860  attr_count = list_length(cstate->attnumlist);
861 
862  /* Initialize all values for row to NULL */
863  MemSet(values, 0, num_phys_attrs * sizeof(Datum));
864  MemSet(nulls, true, num_phys_attrs * sizeof(bool));
865  MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
866 
867  if (!cstate->opts.binary)
868  {
869  char **field_strings;
870  ListCell *cur;
871  int fldct;
872  int fieldno;
873  char *string;
874 
875  /* read raw fields in the next line */
876  if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
877  return false;
878 
879  /* check for overflowing fields */
880  if (attr_count > 0 && fldct > attr_count)
881  ereport(ERROR,
882  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
883  errmsg("extra data after last expected column")));
884 
885  fieldno = 0;
886 
887  /* Loop to read the user attributes on the line. */
888  foreach(cur, cstate->attnumlist)
889  {
890  int attnum = lfirst_int(cur);
891  int m = attnum - 1;
892  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
893 
894  if (fieldno >= fldct)
895  ereport(ERROR,
896  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
897  errmsg("missing data for column \"%s\"",
898  NameStr(att->attname))));
899  string = field_strings[fieldno++];
900 
901  if (cstate->convert_select_flags &&
902  !cstate->convert_select_flags[m])
903  {
904  /* ignore input field, leaving column as NULL */
905  continue;
906  }
907 
908  if (cstate->opts.csv_mode)
909  {
910  if (string == NULL &&
911  cstate->opts.force_notnull_flags[m])
912  {
913  /*
914  * FORCE_NOT_NULL option is set and column is NULL -
915  * convert it to the NULL string.
916  */
917  string = cstate->opts.null_print;
918  }
919  else if (string != NULL && cstate->opts.force_null_flags[m]
920  && strcmp(string, cstate->opts.null_print) == 0)
921  {
922  /*
923  * FORCE_NULL option is set and column matches the NULL
924  * string. It must have been quoted, or otherwise the
925  * string would already have been set to NULL. Convert it
926  * to NULL as specified.
927  */
928  string = NULL;
929  }
930  }
931 
932  cstate->cur_attname = NameStr(att->attname);
933  cstate->cur_attval = string;
934 
935  if (string != NULL)
936  nulls[m] = false;
937 
938  if (cstate->defaults[m])
939  {
940  /*
941  * The caller must supply econtext and have switched into the
942  * per-tuple memory context in it.
943  */
944  Assert(econtext != NULL);
946 
947  values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
948  }
949 
950  /*
951  * If ON_ERROR is specified with IGNORE, skip rows with soft
952  * errors
953  */
954  else if (!InputFunctionCallSafe(&in_functions[m],
955  string,
956  typioparams[m],
957  att->atttypmod,
958  (Node *) cstate->escontext,
959  &values[m]))
960  {
962 
963  cstate->num_errors++;
964 
966  {
967  /*
968  * Since we emit line number and column info in the below
969  * notice message, we suppress error context information
970  * other than the relation name.
971  */
972  Assert(!cstate->relname_only);
973  cstate->relname_only = true;
974 
975  if (cstate->cur_attval)
976  {
977  char *attval;
978 
979  attval = CopyLimitPrintoutLength(cstate->cur_attval);
980  ereport(NOTICE,
981  errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": \"%s\"",
982  (unsigned long long) cstate->cur_lineno,
983  cstate->cur_attname,
984  attval));
985  pfree(attval);
986  }
987  else
988  ereport(NOTICE,
989  errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": null input",
990  (unsigned long long) cstate->cur_lineno,
991  cstate->cur_attname));
992 
993  /* reset relname_only */
994  cstate->relname_only = false;
995  }
996 
997  return true;
998  }
999 
1000  cstate->cur_attname = NULL;
1001  cstate->cur_attval = NULL;
1002  }
1003 
1004  Assert(fieldno == attr_count);
1005  }
1006  else
1007  {
1008  /* binary */
1009  int16 fld_count;
1010  ListCell *cur;
1011 
1012  cstate->cur_lineno++;
1013 
1014  if (!CopyGetInt16(cstate, &fld_count))
1015  {
1016  /* EOF detected (end of file, or protocol-level EOF) */
1017  return false;
1018  }
1019 
1020  if (fld_count == -1)
1021  {
1022  /*
1023  * Received EOF marker. Wait for the protocol-level EOF, and
1024  * complain if it doesn't come immediately. In COPY FROM STDIN,
1025  * this ensures that we correctly handle CopyFail, if client
1026  * chooses to send that now. When copying from file, we could
1027  * ignore the rest of the file like in text mode, but we choose to
1028  * be consistent with the COPY FROM STDIN case.
1029  */
1030  char dummy;
1031 
1032  if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1033  ereport(ERROR,
1034  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1035  errmsg("received copy data after EOF marker")));
1036  return false;
1037  }
1038 
1039  if (fld_count != attr_count)
1040  ereport(ERROR,
1041  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1042  errmsg("row field count is %d, expected %d",
1043  (int) fld_count, attr_count)));
1044 
1045  foreach(cur, cstate->attnumlist)
1046  {
1047  int attnum = lfirst_int(cur);
1048  int m = attnum - 1;
1049  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1050 
1051  cstate->cur_attname = NameStr(att->attname);
1052  values[m] = CopyReadBinaryAttribute(cstate,
1053  &in_functions[m],
1054  typioparams[m],
1055  att->atttypmod,
1056  &nulls[m]);
1057  cstate->cur_attname = NULL;
1058  }
1059  }
1060 
1061  /*
1062  * Now compute and insert any defaults available for the columns not
1063  * provided by the input data. Anything not processed here or above will
1064  * remain NULL.
1065  */
1066  for (i = 0; i < num_defaults; i++)
1067  {
1068  /*
1069  * The caller must supply econtext and have switched into the
1070  * per-tuple memory context in it.
1071  */
1072  Assert(econtext != NULL);
1074 
1075  values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
1076  &nulls[defmap[i]]);
1077  }
1078 
1079  return true;
1080 }
int16 AttrNumber
Definition: attnum.h:21
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define MemSet(start, val, len)
Definition: c.h:999
char * CopyLimitPrintoutLength(const char *str)
Definition: copyfrom.c:194
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
struct cursor * cur
Definition: ecpg.c:29
#define NOTICE
Definition: elog.h:35
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:344
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, fmNodePtr escontext, Datum *result)
Definition: fmgr.c:1585
@ COPY_ON_ERROR_STOP
Definition: copy.h:39
@ COPY_LOG_VERBOSITY_VERBOSE
Definition: copy.h:51
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
int16 attnum
Definition: pg_attribute.h:74
#define lfirst_int(lc)
Definition: pg_list.h:173
unsigned int Oid
Definition: postgres_ext.h:31
char string[11]
Definition: preproc-type.c:52
bool binary
Definition: copy.h:64
CopyLogVerbosityChoice log_verbosity
Definition: copy.h:87
CopyOnErrorChoice on_error
Definition: copy.h:86
bool * force_notnull_flags
Definition: copy.h:81
bool * force_null_flags
Definition: copy.h:84
const char * cur_attval
const char * cur_attname
ErrorSaveContext * escontext
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:266
Definition: fmgr.h:57
Definition: nodes.h:129

References Assert, attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, CopyFromStateData::convert_select_flags, COPY_LOG_VERBOSITY_VERBOSE, COPY_ON_ERROR_STOP, CopyGetInt16(), CopyLimitPrintoutLength(), CopyReadBinaryAttribute(), CopyReadBinaryData(), CopyFormatOptions::csv_mode, cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_attval, CopyFromStateData::cur_lineno, CurrentMemoryContext, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFromStateData::defmap, ExprContext::ecxt_per_tuple_memory, ereport, errcode(), errmsg(), ERROR, CopyFromStateData::escontext, ExecEvalExpr(), CopyFormatOptions::force_notnull_flags, CopyFormatOptions::force_null_flags, i, CopyFromStateData::in_functions, InputFunctionCallSafe(), lfirst_int, list_length(), CopyFormatOptions::log_verbosity, MemSet, NameStr, TupleDescData::natts, NextCopyFromRawFields(), NOTICE, CopyFormatOptions::null_print, CopyFromStateData::num_defaults, CopyFromStateData::num_errors, CopyFormatOptions::on_error, CopyFromStateData::opts, pfree(), CopyFromStateData::rel, RelationGetDescr, CopyFromStateData::relname_only, TupleDescAttr, CopyFromStateData::typioparams, and values.

Referenced by CopyFrom(), file_acquire_sample_rows(), and fileIterateForeignScan().

◆ NextCopyFromRawFields()

bool NextCopyFromRawFields ( CopyFromState  cstate,
char ***  fields,
int *  nfields 
)

Definition at line 745 of file copyfromparse.c.

746 {
747  int fldct;
748  bool done;
749 
750  /* only available for text or csv input */
751  Assert(!cstate->opts.binary);
752 
753  /* on input check that the header line is correct if needed */
754  if (cstate->cur_lineno == 0 && cstate->opts.header_line)
755  {
756  ListCell *cur;
757  TupleDesc tupDesc;
758 
759  tupDesc = RelationGetDescr(cstate->rel);
760 
761  cstate->cur_lineno++;
762  done = CopyReadLine(cstate);
763 
764  if (cstate->opts.header_line == COPY_HEADER_MATCH)
765  {
766  int fldnum;
767 
768  if (cstate->opts.csv_mode)
769  fldct = CopyReadAttributesCSV(cstate);
770  else
771  fldct = CopyReadAttributesText(cstate);
772 
773  if (fldct != list_length(cstate->attnumlist))
774  ereport(ERROR,
775  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
776  errmsg("wrong number of fields in header line: got %d, expected %d",
777  fldct, list_length(cstate->attnumlist))));
778 
779  fldnum = 0;
780  foreach(cur, cstate->attnumlist)
781  {
782  int attnum = lfirst_int(cur);
783  char *colName;
784  Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
785 
786  Assert(fldnum < cstate->max_fields);
787 
788  colName = cstate->raw_fields[fldnum++];
789  if (colName == NULL)
790  ereport(ERROR,
791  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
792  errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
793  fldnum, cstate->opts.null_print, NameStr(attr->attname))));
794 
795  if (namestrcmp(&attr->attname, colName) != 0)
796  {
797  ereport(ERROR,
798  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
799  errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
800  fldnum, colName, NameStr(attr->attname))));
801  }
802  }
803  }
804 
805  if (done)
806  return false;
807  }
808 
809  cstate->cur_lineno++;
810 
811  /* Actually read the line into memory here */
812  done = CopyReadLine(cstate);
813 
814  /*
815  * EOF at start of line means we're done. If we see EOF after some
816  * characters, we act as though it was newline followed by EOF, ie,
817  * process the line and then exit loop on next iteration.
818  */
819  if (done && cstate->line_buf.len == 0)
820  return false;
821 
822  /* Parse the line into de-escaped field values */
823  if (cstate->opts.csv_mode)
824  fldct = CopyReadAttributesCSV(cstate);
825  else
826  fldct = CopyReadAttributesText(cstate);
827 
828  *fields = cstate->raw_fields;
829  *nfields = fldct;
830  return true;
831 }
static int CopyReadAttributesCSV(CopyFromState cstate)
static int CopyReadAttributesText(CopyFromState cstate)
static bool CopyReadLine(CopyFromState cstate)
@ COPY_HEADER_MATCH
Definition: copy.h:30
int namestrcmp(Name name, const char *str)
Definition: name.c:247
CopyHeaderChoice header_line
Definition: copy.h:67

References Assert, attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, COPY_HEADER_MATCH, CopyReadAttributesCSV(), CopyReadAttributesText(), CopyReadLine(), CopyFormatOptions::csv_mode, cur, CopyFromStateData::cur_lineno, ereport, errcode(), errmsg(), ERROR, CopyFormatOptions::header_line, StringInfoData::len, lfirst_int, CopyFromStateData::line_buf, list_length(), NameStr, namestrcmp(), CopyFormatOptions::null_print, CopyFromStateData::opts, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, and TupleDescAttr.

Referenced by NextCopyFrom().

◆ ReceiveCopyBegin()

void ReceiveCopyBegin ( CopyFromState  cstate)

Definition at line 161 of file copyfromparse.c.

162 {
164  int natts = list_length(cstate->attnumlist);
165  int16 format = (cstate->opts.binary ? 1 : 0);
166  int i;
167 
169  pq_sendbyte(&buf, format); /* overall format */
170  pq_sendint16(&buf, natts);
171  for (i = 0; i < natts; i++)
172  pq_sendint16(&buf, format); /* per-column formats */
173  pq_endmessage(&buf);
174  cstate->copy_src = COPY_FRONTEND;
175  cstate->fe_msgbuf = makeStringInfo();
176  /* We *must* flush here to ensure FE knows it can send. */
177  pq_flush();
178 }
#define pq_flush()
Definition: libpq.h:46
static char format
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:296
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:88
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:160
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:136
#define PqMsg_CopyInResponse
Definition: protocol.h:45
StringInfo makeStringInfo(void)
Definition: stringinfo.c:38

References CopyFromStateData::attnumlist, CopyFormatOptions::binary, buf, COPY_FRONTEND, CopyFromStateData::copy_src, CopyFromStateData::fe_msgbuf, format, i, list_length(), makeStringInfo(), CopyFromStateData::opts, pq_beginmessage(), pq_endmessage(), pq_flush, pq_sendbyte(), pq_sendint16(), and PqMsg_CopyInResponse.

Referenced by BeginCopyFrom().

◆ ReceiveCopyBinaryHeader()

void ReceiveCopyBinaryHeader ( CopyFromState  cstate)

Definition at line 181 of file copyfromparse.c.

182 {
183  char readSig[11];
184  int32 tmp;
185 
186  /* Signature */
187  if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
188  memcmp(readSig, BinarySignature, 11) != 0)
189  ereport(ERROR,
190  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
191  errmsg("COPY file signature not recognized")));
192  /* Flags field */
193  if (!CopyGetInt32(cstate, &tmp))
194  ereport(ERROR,
195  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
196  errmsg("invalid COPY file header (missing flags)")));
197  if ((tmp & (1 << 16)) != 0)
198  ereport(ERROR,
199  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
200  errmsg("invalid COPY file header (WITH OIDS)")));
201  tmp &= ~(1 << 16);
202  if ((tmp >> 16) != 0)
203  ereport(ERROR,
204  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
205  errmsg("unrecognized critical flags in COPY file header")));
206  /* Header extension length */
207  if (!CopyGetInt32(cstate, &tmp) ||
208  tmp < 0)
209  ereport(ERROR,
210  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
211  errmsg("invalid COPY file header (missing length)")));
212  /* Skip extension header, if present */
213  while (tmp-- > 0)
214  {
215  if (CopyReadBinaryData(cstate, readSig, 1) != 1)
216  ereport(ERROR,
217  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
218  errmsg("invalid COPY file header (wrong length)")));
219  }
220 }
static const char BinarySignature[11]

References BinarySignature, CopyGetInt32(), CopyReadBinaryData(), ereport, errcode(), errmsg(), and ERROR.

Referenced by BeginCopyFrom().

Variable Documentation

◆ BinarySignature

const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"
static

Definition at line 139 of file copyfromparse.c.

Referenced by ReceiveCopyBinaryHeader().