PostgreSQL Source Code git master
Loading...
Searching...
No Matches
copyfromparse.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include "commands/copyapi.h"
#include "commands/copyfrom_internal.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bitutils.h"
#include "port/pg_bswap.h"
#include "port/simd.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/wait_event.h"
Include dependency graph for copyfromparse.c:

Go to the source code of this file.

Macros

#define ISOCTAL(c)   (((c) >= '0') && ((c) <= '7'))
 
#define OCTVALUE(c)   ((c) - '0')
 
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
 
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
 
#define REFILL_LINEBUF
 

Functions

static bool CopyReadLine (CopyFromState cstate, bool is_csv)
 
static pg_attribute_always_inline bool CopyReadLineText (CopyFromState cstate, bool is_csv)
 
static int CopyReadAttributesText (CopyFromState cstate)
 
static int CopyReadAttributesCSV (CopyFromState cstate)
 
static Datum CopyReadBinaryAttribute (CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
 
static pg_attribute_always_inline bool CopyFromTextLikeOneRow (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls, bool is_csv)
 
static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal (CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 
static int CopyGetData (CopyFromState cstate, void *databuf, int minread, int maxread)
 
static bool CopyGetInt32 (CopyFromState cstate, int32 *val)
 
static bool CopyGetInt16 (CopyFromState cstate, int16 *val)
 
static void CopyLoadInputBuf (CopyFromState cstate)
 
static int CopyReadBinaryData (CopyFromState cstate, char *dest, int nbytes)
 
void ReceiveCopyBegin (CopyFromState cstate)
 
void ReceiveCopyBinaryHeader (CopyFromState cstate)
 
static void CopyConvertBuf (CopyFromState cstate)
 
static void CopyConversionError (CopyFromState cstate)
 
static void CopyLoadRawBuf (CopyFromState cstate)
 
bool NextCopyFromRawFields (CopyFromState cstate, char ***fields, int *nfields)
 
bool NextCopyFrom (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
bool CopyFromTextOneRow (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
bool CopyFromCSVOneRow (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
bool CopyFromBinaryOneRow (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
static int GetDecimalFromHex (char hex)
 

Variables

static const char BinarySignature [11] = "PGCOPY\n\377\r\n\0"
 

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK (   extralen)
Value:
if (1) \
{ \
{ \
input_buf_ptr = copy_buf_len; /* consume the partial character */ \
/* backslash just before EOF, treat as data char */ \
result = true; \
break; \
} \
} else ((void) 0)
static int fb(int x)

Definition at line 112 of file copyfromparse.c.

114{ \
116 { \
117 if (extralen) \
118 input_buf_ptr = copy_buf_len; /* consume the partial character */ \
119 /* backslash just before EOF, treat as data char */ \
120 result = true; \
121 break; \
122 } \
123} else ((void) 0)

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE (   extralen)
Value:
if (1) \
{ \
{ \
input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
need_data = true; \
continue; \
} \
} else ((void) 0)

Definition at line 100 of file copyfromparse.c.

102{ \
104 { \
105 input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
106 need_data = true; \
107 continue; \
108 } \
109} else ((void) 0)

◆ ISOCTAL

#define ISOCTAL (   c)    (((c) >= '0') && ((c) <= '7'))

Definition at line 81 of file copyfromparse.c.

◆ OCTVALUE

#define OCTVALUE (   c)    ((c) - '0')

Definition at line 82 of file copyfromparse.c.

◆ REFILL_LINEBUF

#define REFILL_LINEBUF
Value:
if (1) \
{ \
if (input_buf_ptr > cstate->input_buf_index) \
{ \
appendBinaryStringInfo(&cstate->line_buf, \
cstate->input_buf + cstate->input_buf_index, \
input_buf_ptr - cstate->input_buf_index); \
cstate->input_buf_index = input_buf_ptr; \
} \
} else ((void) 0)

Definition at line 129 of file copyfromparse.c.

131{ \
132 if (input_buf_ptr > cstate->input_buf_index) \
133 { \
134 appendBinaryStringInfo(&cstate->line_buf, \
135 cstate->input_buf + cstate->input_buf_index, \
136 input_buf_ptr - cstate->input_buf_index); \
137 cstate->input_buf_index = input_buf_ptr; \
138 } \
139} else ((void) 0)

Function Documentation

◆ CopyConversionError()

static void CopyConversionError ( CopyFromState  cstate)
static

Definition at line 539 of file copyfromparse.c.

540{
541 Assert(cstate->raw_buf_len > 0);
543
544 if (!cstate->need_transcoding)
545 {
546 /*
547 * Everything up to input_buf_len was successfully verified, and
548 * input_buf_len points to the invalid or incomplete character.
549 */
551 cstate->raw_buf + cstate->input_buf_len,
552 cstate->raw_buf_len - cstate->input_buf_len);
553 }
554 else
555 {
556 /*
557 * raw_buf_index points to the invalid or untranslatable character. We
558 * let the conversion routine report the error, because it can provide
559 * a more specific error message than we could here. An earlier call
560 * to the conversion routine in CopyConvertBuf() detected that there
561 * is an error, now we call the conversion routine again with
562 * noError=false, to have it throw the error.
563 */
564 unsigned char *src;
565 int srclen;
566 unsigned char *dst;
567 int dstlen;
568
569 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
570 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
571 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
572 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
573
575 cstate->file_encoding,
577 src, srclen,
578 dst, dstlen,
579 false);
580
581 /*
582 * The conversion routine should have reported an error, so this
583 * should not be reached.
584 */
585 elog(ERROR, "encoding conversion failed without error");
586 }
587}
#define Assert(condition)
Definition c.h:945
#define INPUT_BUF_SIZE
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
int GetDatabaseEncoding(void)
Definition mbutils.c:1389
int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
Definition mbutils.c:478
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition mbutils.c:1826

References Assert, CopyFromStateData::conversion_proc, elog, ERROR, fb(), CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and report_invalid_encoding().

Referenced by CopyLoadInputBuf().

◆ CopyConvertBuf()

static void CopyConvertBuf ( CopyFromState  cstate)
static

Definition at line 406 of file copyfromparse.c.

407{
408 /*
409 * If the file and server encoding are the same, no encoding conversion is
410 * required. However, we still need to verify that the input is valid for
411 * the encoding.
412 */
413 if (!cstate->need_transcoding)
414 {
415 /*
416 * When conversion is not required, input_buf and raw_buf are the
417 * same. raw_buf_len is the total number of bytes in the buffer, and
418 * input_buf_len tracks how many of those bytes have already been
419 * verified.
420 */
421 int preverifiedlen = cstate->input_buf_len;
422 int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
423 int nverified;
424
425 if (unverifiedlen == 0)
426 {
427 /*
428 * If no more raw data is coming, report the EOF to the caller.
429 */
430 if (cstate->raw_reached_eof)
431 cstate->input_reached_eof = true;
432 return;
433 }
434
435 /*
436 * Verify the new data, including any residual unverified bytes from
437 * previous round.
438 */
440 cstate->raw_buf + preverifiedlen,
442 if (nverified == 0)
443 {
444 /*
445 * Could not verify anything.
446 *
447 * If there is no more raw input data coming, it means that there
448 * was an incomplete multi-byte sequence at the end. Also, if
449 * there's "enough" input left, we should be able to verify at
450 * least one character, and a failure to do so means that we've
451 * hit an invalid byte sequence.
452 */
454 cstate->input_reached_error = true;
455 return;
456 }
457 cstate->input_buf_len += nverified;
458 }
459 else
460 {
461 /*
462 * Encoding conversion is needed.
463 */
464 int nbytes;
465 unsigned char *src;
466 int srclen;
467 unsigned char *dst;
468 int dstlen;
469 int convertedlen;
470
471 if (RAW_BUF_BYTES(cstate) == 0)
472 {
473 /*
474 * If no more raw data is coming, report the EOF to the caller.
475 */
476 if (cstate->raw_reached_eof)
477 cstate->input_reached_eof = true;
478 return;
479 }
480
481 /*
482 * First, copy down any unprocessed data.
483 */
484 nbytes = INPUT_BUF_BYTES(cstate);
485 if (nbytes > 0 && cstate->input_buf_index > 0)
486 memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
487 nbytes);
488 cstate->input_buf_index = 0;
489 cstate->input_buf_len = nbytes;
490 cstate->input_buf[nbytes] = '\0';
491
492 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
493 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
494 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
495 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
496
497 /*
498 * Do the conversion. This might stop short, if there is an invalid
499 * byte sequence in the input. We'll convert as much as we can in
500 * that case.
501 *
502 * Note: Even if we hit an invalid byte sequence, we don't report the
503 * error until all the valid bytes have been consumed. The input
504 * might contain an end-of-input marker (\.), and we don't want to
505 * report an error if the invalid byte sequence is after the
506 * end-of-input marker. We might unnecessarily convert some data
507 * after the end-of-input marker as long as it's valid for the
508 * encoding, but that's harmless.
509 */
511 cstate->file_encoding,
513 src, srclen,
514 dst, dstlen,
515 true);
516 if (convertedlen == 0)
517 {
518 /*
519 * Could not convert anything. If there is no more raw input data
520 * coming, it means that there was an incomplete multi-byte
521 * sequence at the end. Also, if there is plenty of input left,
522 * we should be able to convert at least one character, so a
523 * failure to do so must mean that we've hit a byte sequence
524 * that's invalid.
525 */
527 cstate->input_reached_error = true;
528 return;
529 }
530 cstate->raw_buf_index += convertedlen;
531 cstate->input_buf_len += strlen((char *) dst);
532 }
533}
#define RAW_BUF_BYTES(cstate)
#define INPUT_BUF_BYTES(cstate)
#define MAX_CONVERSION_INPUT_LENGTH
Definition pg_wchar.h:320
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
Definition wchar.c:2224
int pg_encoding_max_length(int encoding)
Definition wchar.c:2235

References CopyFromStateData::conversion_proc, fb(), CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, MAX_CONVERSION_INPUT_LENGTH, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), pg_encoding_max_length(), pg_encoding_verifymbstr(), CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf().

◆ CopyFromBinaryOneRow()

bool CopyFromBinaryOneRow ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls 
)

Definition at line 1164 of file copyfromparse.c.

1166{
1167 TupleDesc tupDesc;
1169 FmgrInfo *in_functions = cstate->in_functions;
1170 Oid *typioparams = cstate->typioparams;
1172 ListCell *cur;
1173
1174 tupDesc = RelationGetDescr(cstate->rel);
1176
1177 cstate->cur_lineno++;
1178
1179 if (!CopyGetInt16(cstate, &fld_count))
1180 {
1181 /* EOF detected (end of file, or protocol-level EOF) */
1182 return false;
1183 }
1184
1185 if (fld_count == -1)
1186 {
1187 /*
1188 * Received EOF marker. Wait for the protocol-level EOF, and complain
1189 * if it doesn't come immediately. In COPY FROM STDIN, this ensures
1190 * that we correctly handle CopyFail, if client chooses to send that
1191 * now. When copying from file, we could ignore the rest of the file
1192 * like in text mode, but we choose to be consistent with the COPY
1193 * FROM STDIN case.
1194 */
1195 char dummy;
1196
1197 if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1198 ereport(ERROR,
1200 errmsg("received copy data after EOF marker")));
1201 return false;
1202 }
1203
1204 if (fld_count != attr_count)
1205 ereport(ERROR,
1207 errmsg("row field count is %d, expected %d",
1209
1210 foreach(cur, cstate->attnumlist)
1211 {
1212 int attnum = lfirst_int(cur);
1213 int m = attnum - 1;
1214 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1215
1216 cstate->cur_attname = NameStr(att->attname);
1217 values[m] = CopyReadBinaryAttribute(cstate,
1218 &in_functions[m],
1219 typioparams[m],
1220 att->atttypmod,
1221 &nulls[m]);
1222 cstate->cur_attname = NULL;
1223 }
1224
1225 return true;
1226}
int16 AttrNumber
Definition attnum.h:21
static Datum values[MAXATTR]
Definition bootstrap.c:188
#define NameStr(name)
Definition c.h:837
int16_t int16
Definition c.h:613
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
struct cursor * cur
Definition ecpg.c:29
int errcode(int sqlerrcode)
Definition elog.c:874
#define ereport(elevel,...)
Definition elog.h:150
static char * errmsg
int16 attnum
FormData_pg_attribute * Form_pg_attribute
static int list_length(const List *l)
Definition pg_list.h:152
#define lfirst_int(lc)
Definition pg_list.h:173
unsigned int Oid
#define RelationGetDescr(relation)
Definition rel.h:540
const char * cur_attname
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:178

References attnum, CopyFromStateData::attnumlist, CopyGetInt16(), CopyReadBinaryAttribute(), CopyReadBinaryData(), cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_lineno, ereport, errcode(), errmsg, ERROR, fb(), CopyFromStateData::in_functions, lfirst_int, list_length(), NameStr, CopyFromStateData::rel, RelationGetDescr, TupleDescAttr(), CopyFromStateData::typioparams, and values.

◆ CopyFromCSVOneRow()

bool CopyFromCSVOneRow ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls 
)

Definition at line 940 of file copyfromparse.c.

942{
943 return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);
944}
static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls, bool is_csv)

References CopyFromTextLikeOneRow(), and values.

◆ CopyFromTextLikeOneRow()

static pg_attribute_always_inline bool CopyFromTextLikeOneRow ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls,
bool  is_csv 
)
static

Definition at line 953 of file copyfromparse.c.

955{
956 TupleDesc tupDesc;
958 FmgrInfo *in_functions = cstate->in_functions;
959 Oid *typioparams = cstate->typioparams;
960 ExprState **defexprs = cstate->defexprs;
961 char **field_strings;
962 ListCell *cur;
963 int fldct;
964 int fieldno;
965 char *string;
966 bool current_row_erroneous = false;
967
968 tupDesc = RelationGetDescr(cstate->rel);
970
971 /* read raw fields in the next line */
973 return false;
974
975 /* check for overflowing fields */
976 if (attr_count > 0 && fldct > attr_count)
979 errmsg("extra data after last expected column")));
980
981 fieldno = 0;
982
983 /* Loop to read the user attributes on the line. */
984 foreach(cur, cstate->attnumlist)
985 {
986 int attnum = lfirst_int(cur);
987 int m = attnum - 1;
988 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
989
990 if (fieldno >= fldct)
993 errmsg("missing data for column \"%s\"",
994 NameStr(att->attname))));
995 string = field_strings[fieldno++];
996
997 if (cstate->convert_select_flags &&
998 !cstate->convert_select_flags[m])
999 {
1000 /* ignore input field, leaving column as NULL */
1001 continue;
1002 }
1003
1004 if (is_csv)
1005 {
1006 if (string == NULL &&
1007 cstate->opts.force_notnull_flags[m])
1008 {
1009 /*
1010 * FORCE_NOT_NULL option is set and column is NULL - convert
1011 * it to the NULL string.
1012 */
1013 string = cstate->opts.null_print;
1014 }
1015 else if (string != NULL && cstate->opts.force_null_flags[m]
1016 && strcmp(string, cstate->opts.null_print) == 0)
1017 {
1018 /*
1019 * FORCE_NULL option is set and column matches the NULL
1020 * string. It must have been quoted, or otherwise the string
1021 * would already have been set to NULL. Convert it to NULL as
1022 * specified.
1023 */
1024 string = NULL;
1025 }
1026 }
1027
1028 cstate->cur_attname = NameStr(att->attname);
1029 cstate->cur_attval = string;
1030
1031 if (string != NULL)
1032 nulls[m] = false;
1033
1034 if (cstate->defaults[m])
1035 {
1036 /* We must have switched into the per-tuple memory context */
1037 Assert(econtext != NULL);
1039
1040 values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
1041 }
1042
1043 /*
1044 * If ON_ERROR is specified, handle the different options
1045 */
1046 else if (!InputFunctionCallSafe(&in_functions[m],
1047 string,
1048 typioparams[m],
1049 att->atttypmod,
1050 (Node *) cstate->escontext,
1051 &values[m]))
1052 {
1054
1055 if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
1056 cstate->num_errors++;
1057 else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
1058 {
1059 /*
1060 * Reset error state so the subsequent InputFunctionCallSafe
1061 * call (for domain constraint check) can properly report
1062 * whether it succeeded or failed.
1063 */
1064 cstate->escontext->error_occurred = false;
1065
1067
1068 /*
1069 * For constrained domains, we need an additional
1070 * InputFunctionCallSafe() to ensure that an error is thrown
1071 * if the domain constraint rejects null values.
1072 */
1073 if (!cstate->domain_with_constraint[m] ||
1074 InputFunctionCallSafe(&in_functions[m],
1075 NULL,
1076 typioparams[m],
1077 att->atttypmod,
1078 (Node *) cstate->escontext,
1079 &values[m]))
1080 {
1081 nulls[m] = true;
1082 values[m] = (Datum) 0;
1083 }
1084 else
1085 ereport(ERROR,
1087 errmsg("domain %s does not allow null values",
1088 format_type_be(typioparams[m])),
1089 errdetail("ON_ERROR SET_NULL cannot be applied because column \"%s\" (domain %s) does not accept null values.",
1090 cstate->cur_attname,
1091 format_type_be(typioparams[m])),
1092 errdatatype(typioparams[m]));
1093
1094 /*
1095 * We count only the number of rows (not fields) where
1096 * ON_ERROR SET_NULL was applied.
1097 */
1099 {
1100 current_row_erroneous = true;
1101 cstate->num_errors++;
1102 }
1103 }
1104
1106 {
1107 /*
1108 * Since we emit line number and column info in the below
1109 * notice message, we suppress error context information other
1110 * than the relation name.
1111 */
1112 Assert(!cstate->relname_only);
1113 cstate->relname_only = true;
1114
1115 if (cstate->cur_attval)
1116 {
1117 char *attval;
1118
1120
1121 if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
1123 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
1124 cstate->cur_lineno,
1125 cstate->cur_attname,
1126 attval));
1127 else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
1129 errmsg("setting to null due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
1130 cstate->cur_lineno,
1131 cstate->cur_attname,
1132 attval));
1133 pfree(attval);
1134 }
1135 else
1136 {
1137 if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
1139 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
1140 cstate->cur_lineno,
1141 cstate->cur_attname));
1142 }
1143 /* reset relname_only */
1144 cstate->relname_only = false;
1145 }
1146
1147 if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
1148 return true;
1149 else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
1150 continue;
1151 }
1152
1153 cstate->cur_attname = NULL;
1154 cstate->cur_attval = NULL;
1155 }
1156
1158
1159 return true;
1160}
char * CopyLimitPrintoutLength(const char *str)
Definition copyfrom.c:335
static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
int errdatatype(Oid datatypeOid)
Definition domains.c:407
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define NOTICE
Definition elog.h:35
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition executor.h:396
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, Node *escontext, Datum *result)
Definition fmgr.c:1586
char * format_type_be(Oid type_oid)
@ COPY_ON_ERROR_IGNORE
Definition copy.h:37
@ COPY_ON_ERROR_SET_NULL
Definition copy.h:38
@ COPY_ON_ERROR_STOP
Definition copy.h:36
@ COPY_LOG_VERBOSITY_VERBOSE
Definition copy.h:49
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
uint64_t Datum
Definition postgres.h:70
char string[11]
CopyLogVerbosityChoice log_verbosity
Definition copy.h:97
CopyOnErrorChoice on_error
Definition copy.h:96
char * null_print
Definition copy.h:77
bool * force_notnull_flags
Definition copy.h:90
bool * force_null_flags
Definition copy.h:94
CopyFormatOptions opts
const char * cur_attval
ErrorSaveContext * escontext
MemoryContext ecxt_per_tuple_memory
Definition execnodes.h:292
Definition nodes.h:135

References Assert, attnum, CopyFromStateData::attnumlist, CopyFromStateData::convert_select_flags, COPY_LOG_VERBOSITY_VERBOSE, COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_SET_NULL, COPY_ON_ERROR_STOP, CopyLimitPrintoutLength(), cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_attval, CopyFromStateData::cur_lineno, CurrentMemoryContext, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFromStateData::domain_with_constraint, ExprContext::ecxt_per_tuple_memory, ereport, errcode(), errdatatype(), errdetail(), errmsg, ERROR, ErrorSaveContext::error_occurred, CopyFromStateData::escontext, ExecEvalExpr(), fb(), CopyFormatOptions::force_notnull_flags, CopyFormatOptions::force_null_flags, format_type_be(), CopyFromStateData::in_functions, InputFunctionCallSafe(), lfirst_int, list_length(), CopyFormatOptions::log_verbosity, NameStr, NextCopyFromRawFieldsInternal(), NOTICE, CopyFormatOptions::null_print, CopyFromStateData::num_errors, CopyFormatOptions::on_error, CopyFromStateData::opts, pfree(), CopyFromStateData::rel, RelationGetDescr, CopyFromStateData::relname_only, TupleDescAttr(), CopyFromStateData::typioparams, and values.

Referenced by CopyFromCSVOneRow(), and CopyFromTextOneRow().

◆ CopyFromTextOneRow()

bool CopyFromTextOneRow ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls 
)

Definition at line 932 of file copyfromparse.c.

934{
935 return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);
936}

References CopyFromTextLikeOneRow(), and values.

◆ CopyGetData()

static int CopyGetData ( CopyFromState  cstate,
void databuf,
int  minread,
int  maxread 
)
static

Definition at line 249 of file copyfromparse.c.

250{
251 int bytesread = 0;
252
253 switch (cstate->copy_src)
254 {
255 case COPY_FILE:
257 bytesread = fread(databuf, 1, maxread, cstate->copy_file);
259 if (ferror(cstate->copy_file))
262 errmsg("could not read from COPY file: %m")));
263 if (bytesread == 0)
264 cstate->raw_reached_eof = true;
265 break;
266 case COPY_FRONTEND:
267 while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
268 {
269 int avail;
270
271 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
272 {
273 /* Try to receive another message */
274 int mtype;
275 int maxmsglen;
276
280 mtype = pq_getbyte();
281 if (mtype == EOF)
284 errmsg("unexpected EOF on client connection with an open transaction")));
285 /* Validate message type and set packet size limit */
286 switch (mtype)
287 {
288 case PqMsg_CopyData:
290 break;
291 case PqMsg_CopyDone:
292 case PqMsg_CopyFail:
293 case PqMsg_Flush:
294 case PqMsg_Sync:
296 break;
297 default:
300 errmsg("unexpected message type 0x%02X during COPY from stdin",
301 mtype)));
302 maxmsglen = 0; /* keep compiler quiet */
303 break;
304 }
305 /* Now collect the message body */
306 if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
309 errmsg("unexpected EOF on client connection with an open transaction")));
311 /* ... and process it */
312 switch (mtype)
313 {
314 case PqMsg_CopyData:
315 break;
316 case PqMsg_CopyDone:
317 /* COPY IN correctly terminated by frontend */
318 cstate->raw_reached_eof = true;
319 return bytesread;
320 case PqMsg_CopyFail:
323 errmsg("COPY from stdin failed: %s",
324 pq_getmsgstring(cstate->fe_msgbuf))));
325 break;
326 case PqMsg_Flush:
327 case PqMsg_Sync:
328
329 /*
330 * Ignore Flush/Sync for the convenience of client
331 * libraries (such as libpq) that may send those
332 * without noticing that the command they just
333 * sent was COPY.
334 */
335 goto readmessage;
336 default:
337 Assert(false); /* NOT REACHED */
338 }
339 }
340 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
341 if (avail > maxread)
342 avail = maxread;
343 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
344 databuf = (char *) databuf + avail;
345 maxread -= avail;
346 bytesread += avail;
347 }
348 break;
349 case COPY_CALLBACK:
351 break;
352 }
353
354 return bytesread;
355}
@ COPY_FILE
Definition copyto.c:51
@ COPY_CALLBACK
Definition copyto.c:53
@ COPY_FRONTEND
Definition copyto.c:52
int errcode_for_file_access(void)
Definition elog.c:897
#define ERRCODE_PROTOCOL_VIOLATION
Definition fe-connect.c:96
#define PQ_SMALL_MESSAGE_LIMIT
Definition libpq.h:33
#define PQ_LARGE_MESSAGE_LIMIT
Definition libpq.h:34
#define HOLD_CANCEL_INTERRUPTS()
Definition miscadmin.h:142
#define RESUME_CANCEL_INTERRUPTS()
Definition miscadmin.h:144
int pq_getmessage(StringInfo s, int maxlen)
Definition pqcomm.c:1204
int pq_getbyte(void)
Definition pqcomm.c:964
void pq_startmsgread(void)
Definition pqcomm.c:1142
const char * pq_getmsgstring(StringInfo msg)
Definition pqformat.c:578
void pq_copymsgbytes(StringInfo msg, void *buf, int datalen)
Definition pqformat.c:527
#define PqMsg_CopyDone
Definition protocol.h:64
#define PqMsg_CopyData
Definition protocol.h:65
#define PqMsg_Sync
Definition protocol.h:27
#define PqMsg_CopyFail
Definition protocol.h:29
#define PqMsg_Flush
Definition protocol.h:24
copy_data_source_cb data_source_cb
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85

References Assert, COPY_CALLBACK, COPY_FILE, CopyFromStateData::copy_file, COPY_FRONTEND, CopyFromStateData::copy_src, StringInfoData::cursor, CopyFromStateData::data_source_cb, ereport, errcode(), errcode_for_file_access(), ERRCODE_PROTOCOL_VIOLATION, errmsg, ERROR, fb(), CopyFromStateData::fe_msgbuf, HOLD_CANCEL_INTERRUPTS, StringInfoData::len, pgstat_report_wait_end(), pgstat_report_wait_start(), pq_copymsgbytes(), pq_getbyte(), pq_getmessage(), pq_getmsgstring(), PQ_LARGE_MESSAGE_LIMIT, PQ_SMALL_MESSAGE_LIMIT, pq_startmsgread(), PqMsg_CopyData, PqMsg_CopyDone, PqMsg_CopyFail, PqMsg_Flush, PqMsg_Sync, CopyFromStateData::raw_reached_eof, and RESUME_CANCEL_INTERRUPTS.

Referenced by CopyLoadRawBuf(), and CopyReadLine().

◆ CopyGetInt16()

static bool CopyGetInt16 ( CopyFromState  cstate,
int16 val 
)
inlinestatic

Definition at line 385 of file copyfromparse.c.

386{
387 uint16 buf;
388
389 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
390 {
391 *val = 0; /* suppress compiler warning */
392 return false;
393 }
394 *val = (int16) pg_ntoh16(buf);
395 return true;
396}
uint16_t uint16
Definition c.h:617
long val
Definition informix.c:689
#define pg_ntoh16(x)
Definition pg_bswap.h:124
static char buf[DEFAULT_XLOG_SEG_SIZE]

References buf, CopyReadBinaryData(), pg_ntoh16, and val.

Referenced by CopyFromBinaryOneRow().

◆ CopyGetInt32()

static bool CopyGetInt32 ( CopyFromState  cstate,
int32 val 
)
inlinestatic

Definition at line 368 of file copyfromparse.c.

369{
370 uint32 buf;
371
372 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
373 {
374 *val = 0; /* suppress compiler warning */
375 return false;
376 }
377 *val = (int32) pg_ntoh32(buf);
378 return true;
379}
int32_t int32
Definition c.h:614
uint32_t uint32
Definition c.h:618
#define pg_ntoh32(x)
Definition pg_bswap.h:125

References buf, CopyReadBinaryData(), pg_ntoh32, and val.

Referenced by CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

◆ CopyLoadInputBuf()

static void CopyLoadInputBuf ( CopyFromState  cstate)
static

Definition at line 656 of file copyfromparse.c.

657{
658 int nbytes = INPUT_BUF_BYTES(cstate);
659
660 /*
661 * The caller has updated input_buf_index to indicate how much of the
662 * input has been consumed and isn't needed anymore. If input_buf is the
663 * same physical area as raw_buf, update raw_buf_index accordingly.
664 */
665 if (cstate->raw_buf == cstate->input_buf)
666 {
667 Assert(!cstate->need_transcoding);
668 Assert(cstate->input_buf_index >= cstate->raw_buf_index);
669 cstate->raw_buf_index = cstate->input_buf_index;
670 }
671
672 for (;;)
673 {
674 /* If we now have some unconverted data, try to convert it */
675 CopyConvertBuf(cstate);
676
677 /* If we now have some more input bytes ready, return them */
678 if (INPUT_BUF_BYTES(cstate) > nbytes)
679 return;
680
681 /*
682 * If we reached an invalid byte sequence, or we're at an incomplete
683 * multi-byte character but there is no more raw input data, report
684 * conversion error.
685 */
686 if (cstate->input_reached_error)
687 CopyConversionError(cstate);
688
689 /* no more input, and everything has been converted */
690 if (cstate->input_reached_eof)
691 break;
692
693 /* Try to load more raw data */
694 Assert(!cstate->raw_reached_eof);
695 CopyLoadRawBuf(cstate);
696 }
697}
static void CopyConversionError(CopyFromState cstate)
static void CopyLoadRawBuf(CopyFromState cstate)
static void CopyConvertBuf(CopyFromState cstate)

References Assert, CopyConversionError(), CopyConvertBuf(), CopyLoadRawBuf(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyReadLineText().

◆ CopyLoadRawBuf()

static void CopyLoadRawBuf ( CopyFromState  cstate)
static

Definition at line 596 of file copyfromparse.c.

597{
598 int nbytes;
599 int inbytes;
600
601 /*
602 * In text mode, if encoding conversion is not required, raw_buf and
603 * input_buf point to the same buffer. Their len/index better agree, too.
604 */
605 if (cstate->raw_buf == cstate->input_buf)
606 {
607 Assert(!cstate->need_transcoding);
608 Assert(cstate->raw_buf_index == cstate->input_buf_index);
609 Assert(cstate->input_buf_len <= cstate->raw_buf_len);
610 }
611
612 /*
613 * Copy down the unprocessed data if any.
614 */
615 nbytes = RAW_BUF_BYTES(cstate);
616 if (nbytes > 0 && cstate->raw_buf_index > 0)
617 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
618 nbytes);
619 cstate->raw_buf_len -= cstate->raw_buf_index;
620 cstate->raw_buf_index = 0;
621
622 /*
623 * If raw_buf and input_buf are in fact the same buffer, adjust the
624 * input_buf variables, too.
625 */
626 if (cstate->raw_buf == cstate->input_buf)
627 {
628 cstate->input_buf_len -= cstate->input_buf_index;
629 cstate->input_buf_index = 0;
630 }
631
632 /* Load more data */
633 inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
634 1, RAW_BUF_SIZE - cstate->raw_buf_len);
635 nbytes += inbytes;
636 cstate->raw_buf[nbytes] = '\0';
637 cstate->raw_buf_len = nbytes;
638
639 cstate->bytes_processed += inbytes;
641
642 if (inbytes == 0)
643 cstate->raw_reached_eof = true;
644}
void pgstat_progress_update_param(int index, int64 val)
#define RAW_BUF_SIZE
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
#define PROGRESS_COPY_BYTES_PROCESSED
Definition progress.h:170

References Assert, CopyFromStateData::bytes_processed, CopyGetData(), fb(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::need_transcoding, pgstat_progress_update_param(), PROGRESS_COPY_BYTES_PROCESSED, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, RAW_BUF_SIZE, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf(), and CopyReadBinaryData().

◆ CopyReadAttributesCSV()

static int CopyReadAttributesCSV ( CopyFromState  cstate)
static

Definition at line 2083 of file copyfromparse.c.

2084{
2085 char delimc = cstate->opts.delim[0];
2086 char quotec = cstate->opts.quote[0];
2087 char escapec = cstate->opts.escape[0];
2088 int fieldno;
2089 char *output_ptr;
2090 char *cur_ptr;
2091 char *line_end_ptr;
2092
2093 /*
2094 * We need a special case for zero-column tables: check that the input
2095 * line is empty, and return.
2096 */
2097 if (cstate->max_fields <= 0)
2098 {
2099 if (cstate->line_buf.len != 0)
2100 ereport(ERROR,
2102 errmsg("extra data after last expected column")));
2103 return 0;
2104 }
2105
2107
2108 /*
2109 * The de-escaped attributes will certainly not be longer than the input
2110 * data line, so we can just force attribute_buf to be large enough and
2111 * then transfer data without any checks for enough space. We need to do
2112 * it this way because enlarging attribute_buf mid-stream would invalidate
2113 * pointers already stored into cstate->raw_fields[].
2114 */
2115 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
2116 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
2117 output_ptr = cstate->attribute_buf.data;
2118
2119 /* set pointer variables for loop */
2120 cur_ptr = cstate->line_buf.data;
2121 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
2122
2123 /* Outer loop iterates over fields */
2124 fieldno = 0;
2125 for (;;)
2126 {
2127 bool found_delim = false;
2128 bool saw_quote = false;
2129 char *start_ptr;
2130 char *end_ptr;
2131 int input_len;
2132
2133 /* Make sure there is enough space for the next value */
2134 if (fieldno >= cstate->max_fields)
2135 {
2136 cstate->max_fields *= 2;
2137 cstate->raw_fields =
2138 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
2139 }
2140
2141 /* Remember start of field on both input and output sides */
2143 cstate->raw_fields[fieldno] = output_ptr;
2144
2145 /*
2146 * Scan data for field,
2147 *
2148 * The loop starts in "not quote" mode and then toggles between that
2149 * and "in quote" mode. The loop exits normally if it is in "not
2150 * quote" mode and a delimiter or line end is seen.
2151 */
2152 for (;;)
2153 {
2154 char c;
2155
2156 /* Not in quote */
2157 for (;;)
2158 {
2159 end_ptr = cur_ptr;
2160 if (cur_ptr >= line_end_ptr)
2161 goto endfield;
2162 c = *cur_ptr++;
2163 /* unquoted field delimiter */
2164 if (c == delimc)
2165 {
2166 found_delim = true;
2167 goto endfield;
2168 }
2169 /* start of quoted field (or part of field) */
2170 if (c == quotec)
2171 {
2172 saw_quote = true;
2173 break;
2174 }
2175 /* Add c to output string */
2176 *output_ptr++ = c;
2177 }
2178
2179 /* In quote */
2180 for (;;)
2181 {
2182 end_ptr = cur_ptr;
2183 if (cur_ptr >= line_end_ptr)
2184 ereport(ERROR,
2186 errmsg("unterminated CSV quoted field")));
2187
2188 c = *cur_ptr++;
2189
2190 /* escape within a quoted field */
2191 if (c == escapec)
2192 {
2193 /*
2194 * peek at the next char if available, and escape it if it
2195 * is an escape char or a quote char
2196 */
2197 if (cur_ptr < line_end_ptr)
2198 {
2199 char nextc = *cur_ptr;
2200
2201 if (nextc == escapec || nextc == quotec)
2202 {
2203 *output_ptr++ = nextc;
2204 cur_ptr++;
2205 continue;
2206 }
2207 }
2208 }
2209
2210 /*
2211 * end of quoted field. Must do this test after testing for
2212 * escape in case quote char and escape char are the same
2213 * (which is the common case).
2214 */
2215 if (c == quotec)
2216 break;
2217
2218 /* Add c to output string */
2219 *output_ptr++ = c;
2220 }
2221 }
2222endfield:
2223
2224 /* Terminate attribute value in output area */
2225 *output_ptr++ = '\0';
2226
2227 /* Check whether raw input matched null marker */
2228 input_len = end_ptr - start_ptr;
2229 if (!saw_quote && input_len == cstate->opts.null_print_len &&
2230 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
2231 cstate->raw_fields[fieldno] = NULL;
2232 /* Check whether raw input matched default marker */
2233 else if (fieldno < list_length(cstate->attnumlist) &&
2234 cstate->opts.default_print &&
2235 input_len == cstate->opts.default_print_len &&
2236 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
2237 {
2238 /* fieldno is 0-index and attnum is 1-index */
2239 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
2240
2241 if (cstate->defexprs[m] != NULL)
2242 {
2243 /* defaults contain entries for all physical attributes */
2244 cstate->defaults[m] = true;
2245 }
2246 else
2247 {
2248 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
2249 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
2250
2251 ereport(ERROR,
2253 errmsg("unexpected default marker in COPY data"),
2254 errdetail("Column \"%s\" has no default value.",
2255 NameStr(att->attname))));
2256 }
2257 }
2258
2259 fieldno++;
2260 /* Done if we hit EOL instead of a delim */
2261 if (!found_delim)
2262 break;
2263 }
2264
2265 /* Clean up state of attribute_buf */
2266 output_ptr--;
2267 Assert(*output_ptr == '\0');
2268 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
2269
2270 return fieldno;
2271}
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
static int list_nth_int(const List *list, int n)
Definition pg_list.h:310
char * c
void resetStringInfo(StringInfo str)
Definition stringinfo.c:126
void enlargeStringInfo(StringInfo str, int needed)
Definition stringinfo.c:337
int default_print_len
Definition copy.h:81
int null_print_len
Definition copy.h:78
char * quote
Definition copy.h:83
char * escape
Definition copy.h:84
char * delim
Definition copy.h:82
char * default_print
Definition copy.h:80
StringInfoData line_buf
StringInfoData attribute_buf

References Assert, CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg, ERROR, CopyFormatOptions::escape, fb(), StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, CopyFromStateData::opts, CopyFormatOptions::quote, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), and TupleDescAttr().

Referenced by NextCopyFromRawFieldsInternal().

◆ CopyReadAttributesText()

static int CopyReadAttributesText ( CopyFromState  cstate)
static

Definition at line 1829 of file copyfromparse.c.

1830{
1831 char delimc = cstate->opts.delim[0];
1832 int fieldno;
1833 char *output_ptr;
1834 char *cur_ptr;
1835 char *line_end_ptr;
1836
1837 /*
1838 * We need a special case for zero-column tables: check that the input
1839 * line is empty, and return.
1840 */
1841 if (cstate->max_fields <= 0)
1842 {
1843 if (cstate->line_buf.len != 0)
1844 ereport(ERROR,
1846 errmsg("extra data after last expected column")));
1847 return 0;
1848 }
1849
1851
1852 /*
1853 * The de-escaped attributes will certainly not be longer than the input
1854 * data line, so we can just force attribute_buf to be large enough and
1855 * then transfer data without any checks for enough space. We need to do
1856 * it this way because enlarging attribute_buf mid-stream would invalidate
1857 * pointers already stored into cstate->raw_fields[].
1858 */
1859 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1860 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1861 output_ptr = cstate->attribute_buf.data;
1862
1863 /* set pointer variables for loop */
1864 cur_ptr = cstate->line_buf.data;
1865 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1866
1867 /* Outer loop iterates over fields */
1868 fieldno = 0;
1869 for (;;)
1870 {
1871 bool found_delim = false;
1872 char *start_ptr;
1873 char *end_ptr;
1874 int input_len;
1875 bool saw_non_ascii = false;
1876
1877 /* Make sure there is enough space for the next value */
1878 if (fieldno >= cstate->max_fields)
1879 {
1880 cstate->max_fields *= 2;
1881 cstate->raw_fields =
1882 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1883 }
1884
1885 /* Remember start of field on both input and output sides */
1887 cstate->raw_fields[fieldno] = output_ptr;
1888
1889 /*
1890 * Scan data for field.
1891 *
1892 * Note that in this loop, we are scanning to locate the end of field
1893 * and also speculatively performing de-escaping. Once we find the
1894 * end-of-field, we can match the raw field contents against the null
1895 * marker string. Only after that comparison fails do we know that
1896 * de-escaping is actually the right thing to do; therefore we *must
1897 * not* throw any syntax errors before we've done the null-marker
1898 * check.
1899 */
1900 for (;;)
1901 {
1902 char c;
1903
1904 end_ptr = cur_ptr;
1905 if (cur_ptr >= line_end_ptr)
1906 break;
1907 c = *cur_ptr++;
1908 if (c == delimc)
1909 {
1910 found_delim = true;
1911 break;
1912 }
1913 if (c == '\\')
1914 {
1915 if (cur_ptr >= line_end_ptr)
1916 break;
1917 c = *cur_ptr++;
1918 switch (c)
1919 {
1920 case '0':
1921 case '1':
1922 case '2':
1923 case '3':
1924 case '4':
1925 case '5':
1926 case '6':
1927 case '7':
1928 {
1929 /* handle \013 */
1930 int val;
1931
1932 val = OCTVALUE(c);
1933 if (cur_ptr < line_end_ptr)
1934 {
1935 c = *cur_ptr;
1936 if (ISOCTAL(c))
1937 {
1938 cur_ptr++;
1939 val = (val << 3) + OCTVALUE(c);
1940 if (cur_ptr < line_end_ptr)
1941 {
1942 c = *cur_ptr;
1943 if (ISOCTAL(c))
1944 {
1945 cur_ptr++;
1946 val = (val << 3) + OCTVALUE(c);
1947 }
1948 }
1949 }
1950 }
1951 c = val & 0377;
1952 if (c == '\0' || IS_HIGHBIT_SET(c))
1953 saw_non_ascii = true;
1954 }
1955 break;
1956 case 'x':
1957 /* Handle \x3F */
1958 if (cur_ptr < line_end_ptr)
1959 {
1960 char hexchar = *cur_ptr;
1961
1962 if (isxdigit((unsigned char) hexchar))
1963 {
1965
1966 cur_ptr++;
1967 if (cur_ptr < line_end_ptr)
1968 {
1969 hexchar = *cur_ptr;
1970 if (isxdigit((unsigned char) hexchar))
1971 {
1972 cur_ptr++;
1973 val = (val << 4) + GetDecimalFromHex(hexchar);
1974 }
1975 }
1976 c = val & 0xff;
1977 if (c == '\0' || IS_HIGHBIT_SET(c))
1978 saw_non_ascii = true;
1979 }
1980 }
1981 break;
1982 case 'b':
1983 c = '\b';
1984 break;
1985 case 'f':
1986 c = '\f';
1987 break;
1988 case 'n':
1989 c = '\n';
1990 break;
1991 case 'r':
1992 c = '\r';
1993 break;
1994 case 't':
1995 c = '\t';
1996 break;
1997 case 'v':
1998 c = '\v';
1999 break;
2000
2001 /*
2002 * in all other cases, take the char after '\'
2003 * literally
2004 */
2005 }
2006 }
2007
2008 /* Add c to output string */
2009 *output_ptr++ = c;
2010 }
2011
2012 /* Check whether raw input matched null marker */
2013 input_len = end_ptr - start_ptr;
2014 if (input_len == cstate->opts.null_print_len &&
2015 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
2016 cstate->raw_fields[fieldno] = NULL;
2017 /* Check whether raw input matched default marker */
2018 else if (fieldno < list_length(cstate->attnumlist) &&
2019 cstate->opts.default_print &&
2020 input_len == cstate->opts.default_print_len &&
2021 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
2022 {
2023 /* fieldno is 0-indexed and attnum is 1-indexed */
2024 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
2025
2026 if (cstate->defexprs[m] != NULL)
2027 {
2028 /* defaults contain entries for all physical attributes */
2029 cstate->defaults[m] = true;
2030 }
2031 else
2032 {
2033 TupleDesc tupDesc = RelationGetDescr(cstate->rel);
2034 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
2035
2036 ereport(ERROR,
2038 errmsg("unexpected default marker in COPY data"),
2039 errdetail("Column \"%s\" has no default value.",
2040 NameStr(att->attname))));
2041 }
2042 }
2043 else
2044 {
2045 /*
2046 * At this point we know the field is supposed to contain data.
2047 *
2048 * If we de-escaped any non-7-bit-ASCII chars, make sure the
2049 * resulting string is valid data for the db encoding.
2050 */
2051 if (saw_non_ascii)
2052 {
2053 char *fld = cstate->raw_fields[fieldno];
2054
2055 pg_verifymbstr(fld, output_ptr - fld, false);
2056 }
2057 }
2058
2059 /* Terminate attribute value in output area */
2060 *output_ptr++ = '\0';
2061
2062 fieldno++;
2063 /* Done if we hit EOL instead of a delim */
2064 if (!found_delim)
2065 break;
2066 }
2067
2068 /* Clean up state of attribute_buf */
2069 output_ptr--;
2070 Assert(*output_ptr == '\0');
2071 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
2072
2073 return fieldno;
2074}
#define IS_HIGHBIT_SET(ch)
Definition c.h:1246
#define OCTVALUE(c)
#define ISOCTAL(c)
static int GetDecimalFromHex(char hex)
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition mbutils.c:1684

References Assert, CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg, ERROR, fb(), GetDecimalFromHex(), IS_HIGHBIT_SET, ISOCTAL, StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, OCTVALUE, CopyFromStateData::opts, pg_verifymbstr(), CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), TupleDescAttr(), and val.

Referenced by NextCopyFromRawFieldsInternal().

◆ CopyReadBinaryAttribute()

static Datum CopyReadBinaryAttribute ( CopyFromState  cstate,
FmgrInfo flinfo,
Oid  typioparam,
int32  typmod,
bool isnull 
)
static

Definition at line 2278 of file copyfromparse.c.

2281{
2283 Datum result;
2284
2285 if (!CopyGetInt32(cstate, &fld_size))
2286 ereport(ERROR,
2288 errmsg("unexpected EOF in COPY data")));
2289 if (fld_size == -1)
2290 {
2291 *isnull = true;
2292 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
2293 }
2294 if (fld_size < 0)
2295 ereport(ERROR,
2297 errmsg("invalid field size")));
2298
2299 /* reset attribute_buf to empty, and load raw data in it */
2301
2303 if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
2304 fld_size) != fld_size)
2305 ereport(ERROR,
2307 errmsg("unexpected EOF in COPY data")));
2308
2309 cstate->attribute_buf.len = fld_size;
2310 cstate->attribute_buf.data[fld_size] = '\0';
2311
2312 /* Call the column type's binary input converter */
2313 result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
2314 typioparam, typmod);
2315
2316 /* Trouble if it didn't eat the whole buffer */
2317 if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
2318 ereport(ERROR,
2320 errmsg("incorrect binary data format")));
2321
2322 *isnull = false;
2323 return result;
2324}
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition fmgr.c:1698

References CopyFromStateData::attribute_buf, CopyGetInt32(), CopyReadBinaryData(), StringInfoData::cursor, StringInfoData::data, enlargeStringInfo(), ereport, errcode(), errmsg, ERROR, fb(), StringInfoData::len, ReceiveFunctionCall(), and resetStringInfo().

Referenced by CopyFromBinaryOneRow().

◆ CopyReadBinaryData()

static int CopyReadBinaryData ( CopyFromState  cstate,
char dest,
int  nbytes 
)
static

Definition at line 707 of file copyfromparse.c.

708{
709 int copied_bytes = 0;
710
711 if (RAW_BUF_BYTES(cstate) >= nbytes)
712 {
713 /* Enough bytes are present in the buffer. */
714 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
715 cstate->raw_buf_index += nbytes;
716 copied_bytes = nbytes;
717 }
718 else
719 {
720 /*
721 * Not enough bytes in the buffer, so must read from the file. Need
722 * to loop since 'nbytes' could be larger than the buffer size.
723 */
724 do
725 {
726 int copy_bytes;
727
728 /* Load more data if buffer is empty. */
729 if (RAW_BUF_BYTES(cstate) == 0)
730 {
731 CopyLoadRawBuf(cstate);
732 if (cstate->raw_reached_eof)
733 break; /* EOF */
734 }
735
736 /* Transfer some bytes. */
737 copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
738 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
739 cstate->raw_buf_index += copy_bytes;
740 dest += copy_bytes;
742 } while (copied_bytes < nbytes);
743 }
744
745 return copied_bytes;
746}
#define Min(x, y)
Definition c.h:1093

References CopyLoadRawBuf(), fb(), Min, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyFromBinaryOneRow(), CopyGetInt16(), CopyGetInt32(), CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

◆ CopyReadLine()

static bool CopyReadLine ( CopyFromState  cstate,
bool  is_csv 
)
static

Definition at line 1236 of file copyfromparse.c.

1237{
1238 bool result;
1239
1240 resetStringInfo(&cstate->line_buf);
1241 cstate->line_buf_valid = false;
1242
1243 /*
1244 * Parse data and transfer into line_buf.
1245 *
1246 * Because this is performance critical, we inline CopyReadLineText() and
1247 * pass the boolean parameters as constants to allow the compiler to emit
1248 * specialized code with fewer branches.
1249 */
1250 if (is_csv)
1251 result = CopyReadLineText(cstate, true);
1252 else
1253 result = CopyReadLineText(cstate, false);
1254
1255 if (result)
1256 {
1257 /*
1258 * Reached EOF. In protocol version 3, we should ignore anything
1259 * after \. up to the protocol end of copy data. (XXX maybe better
1260 * not to treat \. as special?)
1261 */
1262 if (cstate->copy_src == COPY_FRONTEND)
1263 {
1264 int inbytes;
1265
1266 do
1267 {
1268 inbytes = CopyGetData(cstate, cstate->input_buf,
1269 1, INPUT_BUF_SIZE);
1270 } while (inbytes > 0);
1271 cstate->input_buf_index = 0;
1272 cstate->input_buf_len = 0;
1273 cstate->raw_buf_index = 0;
1274 cstate->raw_buf_len = 0;
1275 }
1276 }
1277 else
1278 {
1279 /*
1280 * If we didn't hit EOF, then we must have transferred the EOL marker
1281 * to line_buf along with the data. Get rid of it.
1282 */
1283 switch (cstate->eol_type)
1284 {
1285 case EOL_NL:
1286 Assert(cstate->line_buf.len >= 1);
1287 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1288 cstate->line_buf.len--;
1289 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1290 break;
1291 case EOL_CR:
1292 Assert(cstate->line_buf.len >= 1);
1293 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1294 cstate->line_buf.len--;
1295 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1296 break;
1297 case EOL_CRNL:
1298 Assert(cstate->line_buf.len >= 2);
1299 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1300 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1301 cstate->line_buf.len -= 2;
1302 cstate->line_buf.data[cstate->line_buf.len] = '\0';
1303 break;
1304 case EOL_UNKNOWN:
1305 /* shouldn't get here */
1306 Assert(false);
1307 break;
1308 }
1309 }
1310
1311 /* Now it's safe to use the buffer in error messages */
1312 cstate->line_buf_valid = true;
1313
1314 return result;
1315}
@ EOL_CR
@ EOL_CRNL
@ EOL_UNKNOWN
@ EOL_NL
static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate, bool is_csv)

References Assert, COPY_FRONTEND, CopyFromStateData::copy_src, CopyGetData(), CopyReadLineText(), StringInfoData::data, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, fb(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::line_buf_valid, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and resetStringInfo().

Referenced by NextCopyFromRawFieldsInternal().

◆ CopyReadLineText()

static pg_attribute_always_inline bool CopyReadLineText ( CopyFromState  cstate,
bool  is_csv 
)
static

Definition at line 1467 of file copyfromparse.c.

1468{
1469 char *copy_input_buf;
1470 int input_buf_ptr;
1471 int copy_buf_len;
1472 bool need_data = false;
1473 bool hit_eof = false;
1474 bool result = false;
1475
1476 /* CSV variables */
1477 bool in_quote = false,
1478 last_was_esc = false;
1479 char quotec = '\0';
1480 char escapec = '\0';
1481
1482 if (is_csv)
1483 {
1484 quotec = cstate->opts.quote[0];
1485 escapec = cstate->opts.escape[0];
1486 /* ignore special escape processing if it's the same as quotec */
1487 if (quotec == escapec)
1488 escapec = '\0';
1489 }
1490
1491 /*
1492 * The objective of this loop is to transfer the entire next input line
1493 * into line_buf. Hence, we only care for detecting newlines (\r and/or
1494 * \n) and the end-of-copy marker (\.).
1495 *
1496 * In CSV mode, \r and \n inside a quoted field are just part of the data
1497 * value and are put in line_buf. We keep just enough state to know if we
1498 * are currently in a quoted field or not.
1499 *
1500 * The input has already been converted to the database encoding. All
1501 * supported server encodings have the property that all bytes in a
1502 * multi-byte sequence have the high bit set, so a multibyte character
1503 * cannot contain any newline or escape characters embedded in the
1504 * multibyte sequence. Therefore, we can process the input byte-by-byte,
1505 * regardless of the encoding.
1506 *
1507 * For speed, we try to move data from input_buf to line_buf in chunks
1508 * rather than one character at a time. input_buf_ptr points to the next
1509 * character to examine; any characters from input_buf_index to
1510 * input_buf_ptr have been determined to be part of the line, but not yet
1511 * transferred to line_buf.
1512 *
1513 * For a little extra speed within the loop, we copy some state
1514 * information into local variables. input_buf_ptr could be changed in
1515 * the SIMD path, so we must set that one before it. The others are set
1516 * afterwards.
1517 */
1519
1520 /*
1521 * We first try to use SIMD for the task described above, falling back to
1522 * the scalar path (i.e., the loop below) if needed.
1523 */
1524#ifndef USE_NO_SIMD
1525 if (cstate->simd_enabled)
1526 {
1527 /*
1528 * Using temporary variables seems to encourage the compiler to keep
1529 * them in a register, which is beneficial for performance.
1530 */
1531 bool tmp_hit_eof = false;
1532 int tmp_input_buf_ptr = 0; /* silence compiler warning */
1533
1538
1539 if (result)
1540 {
1541 /* Transfer any still-uncopied data to line_buf. */
1543
1544 return result;
1545 }
1546 }
1547#endif /* ! USE_NO_SIMD */
1548
1549 copy_input_buf = cstate->input_buf;
1550 copy_buf_len = cstate->input_buf_len;
1551
1552 for (;;)
1553 {
1554 int prev_raw_ptr;
1555 char c;
1556
1557 /*
1558 * Load more data if needed.
1559 *
1560 * TODO: We could just force four bytes of read-ahead and avoid the
1561 * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1562 * unsafe with the old v2 COPY protocol, but we don't support that
1563 * anymore.
1564 */
1566 {
1568
1569 CopyLoadInputBuf(cstate);
1570 /* update our local variables */
1571 hit_eof = cstate->input_reached_eof;
1573 copy_buf_len = cstate->input_buf_len;
1574
1575 /*
1576 * If we are completely out of data, break out of the loop,
1577 * reporting EOF.
1578 */
1579 if (INPUT_BUF_BYTES(cstate) <= 0)
1580 {
1581 result = true;
1582 break;
1583 }
1584 need_data = false;
1585 }
1586
1587 /* OK to fetch a character */
1590
1591 if (is_csv)
1592 {
1593 /*
1594 * If character is '\r', we may need to look ahead below. Force
1595 * fetch of the next character if we don't already have it. We
1596 * need to do this before changing CSV state, in case '\r' is also
1597 * the quote or escape character.
1598 */
1599 if (c == '\r')
1600 {
1602 }
1603
1604 /*
1605 * Dealing with quotes and escapes here is mildly tricky. If the
1606 * quote char is also the escape char, there's no problem - we
1607 * just use the char as a toggle. If they are different, we need
1608 * to ensure that we only take account of an escape inside a
1609 * quoted field and immediately preceding a quote char, and not
1610 * the second in an escape-escape sequence.
1611 */
1612 if (in_quote && c == escapec)
1614 if (c == quotec && !last_was_esc)
1615 in_quote = !in_quote;
1616 if (c != escapec)
1617 last_was_esc = false;
1618
1619 /*
1620 * Updating the line count for embedded CR and/or LF chars is
1621 * necessarily a little fragile - this test is probably about the
1622 * best we can do. (XXX it's arguable whether we should do this
1623 * at all --- is cur_lineno a physical or logical count?)
1624 */
1625 if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1626 cstate->cur_lineno++;
1627 }
1628
1629 /* Process \r */
1630 if (c == '\r' && (!is_csv || !in_quote))
1631 {
1632 /* Check for \r\n on first line, _and_ handle \r\n. */
1633 if (cstate->eol_type == EOL_UNKNOWN ||
1634 cstate->eol_type == EOL_CRNL)
1635 {
1636 /*
1637 * If need more data, go back to loop top to load it.
1638 *
1639 * Note that if we are at EOF, c will wind up as '\0' because
1640 * of the guaranteed pad of input_buf.
1641 */
1643
1644 /* get next char */
1646
1647 if (c == '\n')
1648 {
1649 input_buf_ptr++; /* eat newline */
1650 cstate->eol_type = EOL_CRNL; /* in case not set yet */
1651 }
1652 else
1653 {
1654 /* found \r, but no \n */
1655 if (cstate->eol_type == EOL_CRNL)
1656 ereport(ERROR,
1658 !is_csv ?
1659 errmsg("literal carriage return found in data") :
1660 errmsg("unquoted carriage return found in data"),
1661 !is_csv ?
1662 errhint("Use \"\\r\" to represent carriage return.") :
1663 errhint("Use quoted CSV field to represent carriage return.")));
1664
1665 /*
1666 * if we got here, it is the first line and we didn't find
1667 * \n, so don't consume the peeked character
1668 */
1669 cstate->eol_type = EOL_CR;
1670 }
1671 }
1672 else if (cstate->eol_type == EOL_NL)
1673 ereport(ERROR,
1675 !is_csv ?
1676 errmsg("literal carriage return found in data") :
1677 errmsg("unquoted carriage return found in data"),
1678 !is_csv ?
1679 errhint("Use \"\\r\" to represent carriage return.") :
1680 errhint("Use quoted CSV field to represent carriage return.")));
1681 /* If reach here, we have found the line terminator */
1682 break;
1683 }
1684
1685 /* Process \n */
1686 if (c == '\n' && (!is_csv || !in_quote))
1687 {
1688 if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1689 ereport(ERROR,
1691 !is_csv ?
1692 errmsg("literal newline found in data") :
1693 errmsg("unquoted newline found in data"),
1694 !is_csv ?
1695 errhint("Use \"\\n\" to represent newline.") :
1696 errhint("Use quoted CSV field to represent newline.")));
1697 cstate->eol_type = EOL_NL; /* in case not set yet */
1698 /* If reach here, we have found the line terminator */
1699 break;
1700 }
1701
1702 /*
1703 * Process backslash, except in CSV mode where backslash is a normal
1704 * character.
1705 */
1706 if (c == '\\' && !is_csv)
1707 {
1708 char c2;
1709
1712
1713 /* -----
1714 * get next character
1715 * Note: we do not change c so if it isn't \., we can fall
1716 * through and continue processing.
1717 * -----
1718 */
1720
1721 if (c2 == '.')
1722 {
1723 input_buf_ptr++; /* consume the '.' */
1724 if (cstate->eol_type == EOL_CRNL)
1725 {
1726 /* Get the next character */
1728 /* if hit_eof, c2 will become '\0' */
1730
1731 if (c2 == '\n')
1732 ereport(ERROR,
1734 errmsg("end-of-copy marker does not match previous newline style")));
1735 else if (c2 != '\r')
1736 ereport(ERROR,
1738 errmsg("end-of-copy marker is not alone on its line")));
1739 }
1740
1741 /* Get the next character */
1743 /* if hit_eof, c2 will become '\0' */
1745
1746 if (c2 != '\r' && c2 != '\n')
1747 ereport(ERROR,
1749 errmsg("end-of-copy marker is not alone on its line")));
1750
1751 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1752 (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1753 (cstate->eol_type == EOL_CR && c2 != '\r'))
1754 ereport(ERROR,
1756 errmsg("end-of-copy marker does not match previous newline style")));
1757
1758 /*
1759 * If there is any data on this line before the \., complain.
1760 */
1761 if (cstate->line_buf.len > 0 ||
1762 prev_raw_ptr > cstate->input_buf_index)
1763 ereport(ERROR,
1765 errmsg("end-of-copy marker is not alone on its line")));
1766
1767 /*
1768 * Discard the \. and newline, then report EOF.
1769 */
1771 result = true; /* report EOF */
1772 break;
1773 }
1774 else
1775 {
1776 /*
1777 * If we are here, it means we found a backslash followed by
1778 * something other than a period. In non-CSV mode, anything
1779 * after a backslash is special, so we skip over that second
1780 * character too. If we didn't do that \\. would be
1781 * considered an eof-of copy, while in non-CSV mode it is a
1782 * literal backslash followed by a period.
1783 */
1784 input_buf_ptr++;
1785 }
1786 }
1787 } /* end of outer loop */
1788
1789 /*
1790 * Transfer any still-uncopied data to line_buf.
1791 */
1793
1794 return result;
1795}
#define REFILL_LINEBUF
static void CopyLoadInputBuf(CopyFromState cstate)
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
int errhint(const char *fmt,...) pg_attribute_printf(1

References CopyLoadInputBuf(), CopyFromStateData::cur_lineno, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, ereport, errcode(), errhint(), errmsg, ERROR, CopyFormatOptions::escape, fb(), IF_NEED_REFILL_AND_EOF_BREAK, IF_NEED_REFILL_AND_NOT_EOF_CONTINUE, CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::input_reached_eof, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::opts, CopyFormatOptions::quote, REFILL_LINEBUF, and CopyFromStateData::simd_enabled.

Referenced by CopyReadLine().

◆ GetDecimalFromHex()

static int GetDecimalFromHex ( char  hex)
static

Definition at line 1801 of file copyfromparse.c.

1802{
1803 if (isdigit((unsigned char) hex))
1804 return hex - '0';
1805 else
1806 return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
1807}
static unsigned char pg_ascii_tolower(unsigned char ch)
Definition port.h:188

References fb(), and pg_ascii_tolower().

Referenced by CopyReadAttributesText().

◆ NextCopyFrom()

bool NextCopyFrom ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls 
)

Definition at line 887 of file copyfromparse.c.

889{
890 TupleDesc tupDesc;
892 num_defaults = cstate->num_defaults;
893 int i;
894 int *defmap = cstate->defmap;
895 ExprState **defexprs = cstate->defexprs;
896
897 tupDesc = RelationGetDescr(cstate->rel);
898 num_phys_attrs = tupDesc->natts;
899
900 /* Initialize all values for row to NULL */
901 MemSet(values, 0, num_phys_attrs * sizeof(Datum));
902 MemSet(nulls, true, num_phys_attrs * sizeof(bool));
903 MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
904
905 /* Get one row from source */
906 if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))
907 return false;
908
909 /*
910 * Now compute and insert any defaults available for the columns not
911 * provided by the input data. Anything not processed here or above will
912 * remain NULL.
913 */
914 for (i = 0; i < num_defaults; i++)
915 {
916 /*
917 * The caller must supply econtext and have switched into the
918 * per-tuple memory context in it.
919 */
920 Assert(econtext != NULL);
922
923 values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
924 &nulls[defmap[i]]);
925 }
926
927 return true;
928}
#define MemSet(start, val, len)
Definition c.h:1109
int i
Definition isn.c:77
bool(* CopyFromOneRow)(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
Definition copyapi.h:96
const struct CopyFromRoutine * routine

References Assert, CopyFromRoutine::CopyFromOneRow, CurrentMemoryContext, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFromStateData::defmap, ExprContext::ecxt_per_tuple_memory, ExecEvalExpr(), fb(), i, MemSet, TupleDescData::natts, CopyFromStateData::num_defaults, CopyFromStateData::rel, RelationGetDescr, CopyFromStateData::routine, and values.

Referenced by CopyFrom(), file_acquire_sample_rows(), and fileIterateForeignScan().

◆ NextCopyFromRawFields()

bool NextCopyFromRawFields ( CopyFromState  cstate,
char ***  fields,
int nfields 
)

Definition at line 753 of file copyfromparse.c.

754{
755 return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
756 cstate->opts.format == COPY_FORMAT_CSV);
757}
@ COPY_FORMAT_CSV
Definition copy.h:59
CopyFormat format
Definition copy.h:73

References COPY_FORMAT_CSV, CopyFormatOptions::format, NextCopyFromRawFieldsInternal(), and CopyFromStateData::opts.

◆ NextCopyFromRawFieldsInternal()

static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal ( CopyFromState  cstate,
char ***  fields,
int nfields,
bool  is_csv 
)
static

Definition at line 777 of file copyfromparse.c.

778{
779 int fldct;
780 bool done = false;
781
782 /* only available for text or csv input */
783 Assert(cstate->opts.format == COPY_FORMAT_TEXT ||
784 cstate->opts.format == COPY_FORMAT_CSV);
785
786 /* on input check that the header line is correct if needed */
787 if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
788 {
789 ListCell *cur;
790 TupleDesc tupDesc;
791 int lines_to_skip = cstate->opts.header_line;
792
793 /* If set to "match", one header line is skipped */
794 if (cstate->opts.header_line == COPY_HEADER_MATCH)
795 lines_to_skip = 1;
796
797 tupDesc = RelationGetDescr(cstate->rel);
798
799 for (int i = 0; i < lines_to_skip; i++)
800 {
801 cstate->cur_lineno++;
802 if ((done = CopyReadLine(cstate, is_csv)))
803 break;
804 }
805
806 if (cstate->opts.header_line == COPY_HEADER_MATCH)
807 {
808 int fldnum;
809
810 if (is_csv)
812 else
814
815 if (fldct != list_length(cstate->attnumlist))
818 errmsg("wrong number of fields in header line: got %d, expected %d",
819 fldct, list_length(cstate->attnumlist))));
820
821 fldnum = 0;
822 foreach(cur, cstate->attnumlist)
823 {
824 int attnum = lfirst_int(cur);
825 char *colName;
826 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
827
828 Assert(fldnum < cstate->max_fields);
829
830 colName = cstate->raw_fields[fldnum++];
831 if (colName == NULL)
834 errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
835 fldnum, cstate->opts.null_print, NameStr(attr->attname))));
836
837 if (namestrcmp(&attr->attname, colName) != 0)
838 {
841 errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
842 fldnum, colName, NameStr(attr->attname))));
843 }
844 }
845 }
846
847 if (done)
848 return false;
849 }
850
851 cstate->cur_lineno++;
852
853 /* Actually read the line into memory here */
854 done = CopyReadLine(cstate, is_csv);
855
856 /*
857 * EOF at start of line means we're done. If we see EOF after some
858 * characters, we act as though it was newline followed by EOF, ie,
859 * process the line and then exit loop on next iteration.
860 */
861 if (done && cstate->line_buf.len == 0)
862 return false;
863
864 /* Parse the line into de-escaped field values */
865 if (is_csv)
867 else
869
870 *fields = cstate->raw_fields;
871 *nfields = fldct;
872 return true;
873}
static int CopyReadAttributesCSV(CopyFromState cstate)
static int CopyReadAttributesText(CopyFromState cstate)
static bool CopyReadLine(CopyFromState cstate, bool is_csv)
@ COPY_FORMAT_TEXT
Definition copy.h:57
#define COPY_HEADER_MATCH
Definition copy.h:26
#define COPY_HEADER_FALSE
Definition copy.h:27
int namestrcmp(Name name, const char *str)
Definition name.c:247
int header_line
Definition copy.h:75

References Assert, attnum, CopyFromStateData::attnumlist, COPY_FORMAT_CSV, COPY_FORMAT_TEXT, COPY_HEADER_FALSE, COPY_HEADER_MATCH, CopyReadAttributesCSV(), CopyReadAttributesText(), CopyReadLine(), cur, CopyFromStateData::cur_lineno, ereport, errcode(), errmsg, ERROR, fb(), CopyFormatOptions::format, CopyFormatOptions::header_line, i, StringInfoData::len, lfirst_int, CopyFromStateData::line_buf, list_length(), NameStr, namestrcmp(), CopyFormatOptions::null_print, CopyFromStateData::opts, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, and TupleDescAttr().

Referenced by CopyFromTextLikeOneRow(), and NextCopyFromRawFields().

◆ ReceiveCopyBegin()

void ReceiveCopyBegin ( CopyFromState  cstate)

Definition at line 174 of file copyfromparse.c.

175{
177 int natts = list_length(cstate->attnumlist);
178 int16 format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0);
179 int i;
180
182 pq_sendbyte(&buf, format); /* overall format */
183 pq_sendint16(&buf, natts);
184 for (i = 0; i < natts; i++)
185 pq_sendint16(&buf, format); /* per-column formats */
187 cstate->copy_src = COPY_FRONTEND;
188 cstate->fe_msgbuf = makeStringInfo();
189 /* We *must* flush here to ensure FE knows it can send. */
190 pq_flush();
191}
@ COPY_FORMAT_BINARY
Definition copy.h:58
#define pq_flush()
Definition libpq.h:49
static char format
void pq_endmessage(StringInfo buf)
Definition pqformat.c:296
void pq_beginmessage(StringInfo buf, char msgtype)
Definition pqformat.c:88
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition pqformat.h:160
static void pq_sendint16(StringInfo buf, uint16 i)
Definition pqformat.h:136
#define PqMsg_CopyInResponse
Definition protocol.h:45
StringInfo makeStringInfo(void)
Definition stringinfo.c:72

References CopyFromStateData::attnumlist, buf, COPY_FORMAT_BINARY, COPY_FRONTEND, CopyFromStateData::copy_src, CopyFromStateData::fe_msgbuf, format, CopyFormatOptions::format, i, list_length(), makeStringInfo(), CopyFromStateData::opts, pq_beginmessage(), pq_endmessage(), pq_flush, pq_sendbyte(), pq_sendint16(), and PqMsg_CopyInResponse.

Referenced by BeginCopyFrom().

◆ ReceiveCopyBinaryHeader()

void ReceiveCopyBinaryHeader ( CopyFromState  cstate)

Definition at line 194 of file copyfromparse.c.

195{
196 char readSig[11];
197 int32 tmp;
198
199 /* Signature */
200 if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
201 memcmp(readSig, BinarySignature, 11) != 0)
204 errmsg("COPY file signature not recognized")));
205 /* Flags field */
206 if (!CopyGetInt32(cstate, &tmp))
209 errmsg("invalid COPY file header (missing flags)")));
210 if ((tmp & (1 << 16)) != 0)
213 errmsg("invalid COPY file header (WITH OIDS)")));
214 tmp &= ~(1 << 16);
215 if ((tmp >> 16) != 0)
218 errmsg("unrecognized critical flags in COPY file header")));
219 /* Header extension length */
220 if (!CopyGetInt32(cstate, &tmp) ||
221 tmp < 0)
224 errmsg("invalid COPY file header (missing length)")));
225 /* Skip extension header, if present */
226 while (tmp-- > 0)
227 {
228 if (CopyReadBinaryData(cstate, readSig, 1) != 1)
231 errmsg("invalid COPY file header (wrong length)")));
232 }
233}
static const char BinarySignature[11]

References BinarySignature, CopyGetInt32(), CopyReadBinaryData(), ereport, errcode(), errmsg, ERROR, and fb().

Referenced by CopyFromBinaryStart().

Variable Documentation

◆ BinarySignature

const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"
static

Definition at line 142 of file copyfromparse.c.

Referenced by ReceiveCopyBinaryHeader().