PostgreSQL Source Code  git master
copyfromparse.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include "commands/copy.h"
#include "commands/copyfrom_internal.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Include dependency graph for copyfromparse.c:

Go to the source code of this file.

Macros

#define ISOCTAL(c)   (((c) >= '0') && ((c) <= '7'))
 
#define OCTVALUE(c)   ((c) - '0')
 
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
 
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
 
#define REFILL_LINEBUF
 
#define NO_END_OF_COPY_GOTO
 

Functions

static bool CopyReadLine (CopyFromState cstate)
 
static bool CopyReadLineText (CopyFromState cstate)
 
static int CopyReadAttributesText (CopyFromState cstate)
 
static int CopyReadAttributesCSV (CopyFromState cstate)
 
static Datum CopyReadBinaryAttribute (CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
 
static int CopyGetData (CopyFromState cstate, void *databuf, int minread, int maxread)
 
static bool CopyGetInt32 (CopyFromState cstate, int32 *val)
 
static bool CopyGetInt16 (CopyFromState cstate, int16 *val)
 
static bool CopyLoadRawBuf (CopyFromState cstate)
 
static int CopyReadBinaryData (CopyFromState cstate, char *dest, int nbytes)
 
void ReceiveCopyBegin (CopyFromState cstate)
 
void ReceiveCopyBinaryHeader (CopyFromState cstate)
 
bool NextCopyFromRawFields (CopyFromState cstate, char ***fields, int *nfields)
 
bool NextCopyFrom (CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)
 
static int GetDecimalFromHex (char hex)
 

Variables

static const char BinarySignature [11] = "PGCOPY\n\377\r\n\0"
 

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK (   extralen)
Value:
if (1) \
{ \
if (raw_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
{ \
if (extralen) \
raw_buf_ptr = copy_buf_len; /* consume the partial character */ \
/* backslash just before EOF, treat as data char */ \
result = true; \
break; \
} \
} else ((void) 0)

Definition at line 65 of file copyfromparse.c.

Referenced by CopyReadLineText().

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE (   extralen)
Value:
if (1) \
{ \
if (raw_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
{ \
raw_buf_ptr = prev_raw_ptr; /* undo fetch */ \
need_data = true; \
continue; \
} \
} else ((void) 0)

Definition at line 53 of file copyfromparse.c.

Referenced by CopyReadLineText().

◆ ISOCTAL

#define ISOCTAL (   c)    (((c) >= '0') && ((c) <= '7'))

Definition at line 34 of file copyfromparse.c.

Referenced by CopyReadAttributesText().

◆ NO_END_OF_COPY_GOTO

#define NO_END_OF_COPY_GOTO
Value:
if (1) \
{ \
raw_buf_ptr = prev_raw_ptr + 1; \
goto not_end_of_copy; \
} else ((void) 0)

Definition at line 95 of file copyfromparse.c.

Referenced by CopyReadLineText().

◆ OCTVALUE

#define OCTVALUE (   c)    ((c) - '0')

Definition at line 35 of file copyfromparse.c.

Referenced by CopyReadAttributesText().

◆ REFILL_LINEBUF

#define REFILL_LINEBUF
Value:
if (1) \
{ \
if (raw_buf_ptr > cstate->raw_buf_index) \
{ \
appendBinaryStringInfo(&cstate->line_buf, \
cstate->raw_buf + cstate->raw_buf_index, \
raw_buf_ptr - cstate->raw_buf_index); \
cstate->raw_buf_index = raw_buf_ptr; \
} \
} else ((void) 0)

Definition at line 82 of file copyfromparse.c.

Referenced by CopyReadLineText().

Function Documentation

◆ CopyGetData()

static int CopyGetData ( CopyFromState  cstate,
void *  databuf,
int  minread,
int  maxread 
)
static

Definition at line 216 of file copyfromparse.c.

References COPY_CALLBACK, COPY_FILE, CopyFromStateData::copy_file, COPY_NEW_FE, COPY_OLD_FE, CopyFromStateData::copy_src, StringInfoData::cursor, CopyFromStateData::data_source_cb, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, CopyFromStateData::fe_msgbuf, HOLD_CANCEL_INTERRUPTS, StringInfoData::len, pq_copymsgbytes(), pq_getbyte(), pq_getbytes(), pq_getmessage(), pq_getmsgstring(), pq_startmsgread(), CopyFromStateData::reached_eof, and RESUME_CANCEL_INTERRUPTS.

Referenced by CopyLoadRawBuf().

217 {
218  int bytesread = 0;
219 
220  switch (cstate->copy_src)
221  {
222  case COPY_FILE:
223  bytesread = fread(databuf, 1, maxread, cstate->copy_file);
224  if (ferror(cstate->copy_file))
225  ereport(ERROR,
227  errmsg("could not read from COPY file: %m")));
228  if (bytesread == 0)
229  cstate->reached_eof = true;
230  break;
231  case COPY_OLD_FE:
232 
233  /*
234  * We cannot read more than minread bytes (which in practice is 1)
235  * because old protocol doesn't have any clear way of separating
236  * the COPY stream from following data. This is slow, but not any
237  * slower than the code path was originally, and we don't care
238  * much anymore about the performance of old protocol.
239  */
240  if (pq_getbytes((char *) databuf, minread))
241  {
242  /* Only a \. terminator is legal EOF in old protocol */
243  ereport(ERROR,
244  (errcode(ERRCODE_CONNECTION_FAILURE),
245  errmsg("unexpected EOF on client connection with an open transaction")));
246  }
247  bytesread = minread;
248  break;
249  case COPY_NEW_FE:
250  while (maxread > 0 && bytesread < minread && !cstate->reached_eof)
251  {
252  int avail;
253 
254  while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
255  {
256  /* Try to receive another message */
257  int mtype;
258 
259  readmessage:
261  pq_startmsgread();
262  mtype = pq_getbyte();
263  if (mtype == EOF)
264  ereport(ERROR,
265  (errcode(ERRCODE_CONNECTION_FAILURE),
266  errmsg("unexpected EOF on client connection with an open transaction")));
267  if (pq_getmessage(cstate->fe_msgbuf, 0))
268  ereport(ERROR,
269  (errcode(ERRCODE_CONNECTION_FAILURE),
270  errmsg("unexpected EOF on client connection with an open transaction")));
272  switch (mtype)
273  {
274  case 'd': /* CopyData */
275  break;
276  case 'c': /* CopyDone */
277  /* COPY IN correctly terminated by frontend */
278  cstate->reached_eof = true;
279  return bytesread;
280  case 'f': /* CopyFail */
281  ereport(ERROR,
282  (errcode(ERRCODE_QUERY_CANCELED),
283  errmsg("COPY from stdin failed: %s",
284  pq_getmsgstring(cstate->fe_msgbuf))));
285  break;
286  case 'H': /* Flush */
287  case 'S': /* Sync */
288 
289  /*
290  * Ignore Flush/Sync for the convenience of client
291  * libraries (such as libpq) that may send those
292  * without noticing that the command they just
293  * sent was COPY.
294  */
295  goto readmessage;
296  default:
297  ereport(ERROR,
298  (errcode(ERRCODE_PROTOCOL_VIOLATION),
299  errmsg("unexpected message type 0x%02X during COPY from stdin",
300  mtype)));
301  break;
302  }
303  }
304  avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
305  if (avail > maxread)
306  avail = maxread;
307  pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
308  databuf = (void *) ((char *) databuf + avail);
309  maxread -= avail;
310  bytesread += avail;
311  }
312  break;
313  case COPY_CALLBACK:
314  bytesread = cstate->data_source_cb(databuf, minread, maxread);
315  break;
316  }
317 
318  return bytesread;
319 }
copy_data_source_cb data_source_cb
#define HOLD_CANCEL_INTERRUPTS()
Definition: miscadmin.h:125
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:581
int errcode(int sqlerrcode)
Definition: elog.c:704
#define ERROR
Definition: elog.h:45
void pq_startmsgread(void)
Definition: pqcomm.c:1209
int pq_getbytes(char *s, size_t len)
Definition: pqcomm.c:1093
int errcode_for_file_access(void)
Definition: elog.c:727
int pq_getmessage(StringInfo s, int maxlen)
Definition: pqcomm.c:1271
int pq_getbyte(void)
Definition: pqcomm.c:999
#define ereport(elevel,...)
Definition: elog.h:155
void pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
Definition: pqformat.c:530
int errmsg(const char *fmt,...)
Definition: elog.c:915
#define RESUME_CANCEL_INTERRUPTS()
Definition: miscadmin.h:127

◆ CopyGetInt16()

static bool CopyGetInt16 ( CopyFromState  cstate,
int16 val 
)
inlinestatic

Definition at line 349 of file copyfromparse.c.

References buf, CopyReadBinaryData(), and pg_ntoh16.

Referenced by NextCopyFrom().

350 {
351  uint16 buf;
352 
353  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
354  {
355  *val = 0; /* suppress compiler warning */
356  return false;
357  }
358  *val = (int16) pg_ntoh16(buf);
359  return true;
360 }
signed short int16
Definition: c.h:416
#define pg_ntoh16(x)
Definition: pg_bswap.h:124
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
unsigned short uint16
Definition: c.h:428
static char * buf
Definition: pg_test_fsync.c:68
long val
Definition: informix.c:664

◆ CopyGetInt32()

static bool CopyGetInt32 ( CopyFromState  cstate,
int32 val 
)
inlinestatic

Definition at line 332 of file copyfromparse.c.

References buf, CopyReadBinaryData(), and pg_ntoh32.

Referenced by CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

333 {
334  uint32 buf;
335 
336  if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
337  {
338  *val = 0; /* suppress compiler warning */
339  return false;
340  }
341  *val = (int32) pg_ntoh32(buf);
342  return true;
343 }
signed int int32
Definition: c.h:417
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define pg_ntoh32(x)
Definition: pg_bswap.h:125
static char * buf
Definition: pg_test_fsync.c:68
unsigned int uint32
Definition: c.h:429
long val
Definition: informix.c:664

◆ CopyLoadRawBuf()

static bool CopyLoadRawBuf ( CopyFromState  cstate)
static

Definition at line 373 of file copyfromparse.c.

References CopyFromStateData::bytes_processed, CopyGetData(), pgstat_progress_update_param(), PROGRESS_COPY_BYTES_PROCESSED, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and RAW_BUF_SIZE.

Referenced by CopyReadBinaryData(), CopyReadLine(), and CopyReadLineText().

374 {
375  int nbytes = RAW_BUF_BYTES(cstate);
376  int inbytes;
377 
378  /* Copy down the unprocessed data if any. */
379  if (nbytes > 0)
380  memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
381  nbytes);
382 
383  inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
384  1, RAW_BUF_SIZE - nbytes);
385  nbytes += inbytes;
386  cstate->raw_buf[nbytes] = '\0';
387  cstate->raw_buf_index = 0;
388  cstate->raw_buf_len = nbytes;
389  cstate->bytes_processed += nbytes;
391  return (inbytes > 0);
392 }
#define RAW_BUF_BYTES(cstate)
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3478
#define RAW_BUF_SIZE
static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
#define PROGRESS_COPY_BYTES_PROCESSED
Definition: progress.h:137

◆ CopyReadAttributesCSV()

static int CopyReadAttributesCSV ( CopyFromState  cstate)
static

Definition at line 1404 of file copyfromparse.c.

References Assert, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, CopyFormatOptions::escape, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::max_fields, StringInfoData::maxlen, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, CopyFromStateData::opts, CopyFormatOptions::quote, CopyFromStateData::raw_fields, repalloc(), and resetStringInfo().

Referenced by NextCopyFromRawFields().

1405 {
1406  char delimc = cstate->opts.delim[0];
1407  char quotec = cstate->opts.quote[0];
1408  char escapec = cstate->opts.escape[0];
1409  int fieldno;
1410  char *output_ptr;
1411  char *cur_ptr;
1412  char *line_end_ptr;
1413 
1414  /*
1415  * We need a special case for zero-column tables: check that the input
1416  * line is empty, and return.
1417  */
1418  if (cstate->max_fields <= 0)
1419  {
1420  if (cstate->line_buf.len != 0)
1421  ereport(ERROR,
1422  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1423  errmsg("extra data after last expected column")));
1424  return 0;
1425  }
1426 
1427  resetStringInfo(&cstate->attribute_buf);
1428 
1429  /*
1430  * The de-escaped attributes will certainly not be longer than the input
1431  * data line, so we can just force attribute_buf to be large enough and
1432  * then transfer data without any checks for enough space. We need to do
1433  * it this way because enlarging attribute_buf mid-stream would invalidate
1434  * pointers already stored into cstate->raw_fields[].
1435  */
1436  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1437  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1438  output_ptr = cstate->attribute_buf.data;
1439 
1440  /* set pointer variables for loop */
1441  cur_ptr = cstate->line_buf.data;
1442  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1443 
1444  /* Outer loop iterates over fields */
1445  fieldno = 0;
1446  for (;;)
1447  {
1448  bool found_delim = false;
1449  bool saw_quote = false;
1450  char *start_ptr;
1451  char *end_ptr;
1452  int input_len;
1453 
1454  /* Make sure there is enough space for the next value */
1455  if (fieldno >= cstate->max_fields)
1456  {
1457  cstate->max_fields *= 2;
1458  cstate->raw_fields =
1459  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1460  }
1461 
1462  /* Remember start of field on both input and output sides */
1463  start_ptr = cur_ptr;
1464  cstate->raw_fields[fieldno] = output_ptr;
1465 
1466  /*
1467  * Scan data for field,
1468  *
1469  * The loop starts in "not quote" mode and then toggles between that
1470  * and "in quote" mode. The loop exits normally if it is in "not
1471  * quote" mode and a delimiter or line end is seen.
1472  */
1473  for (;;)
1474  {
1475  char c;
1476 
1477  /* Not in quote */
1478  for (;;)
1479  {
1480  end_ptr = cur_ptr;
1481  if (cur_ptr >= line_end_ptr)
1482  goto endfield;
1483  c = *cur_ptr++;
1484  /* unquoted field delimiter */
1485  if (c == delimc)
1486  {
1487  found_delim = true;
1488  goto endfield;
1489  }
1490  /* start of quoted field (or part of field) */
1491  if (c == quotec)
1492  {
1493  saw_quote = true;
1494  break;
1495  }
1496  /* Add c to output string */
1497  *output_ptr++ = c;
1498  }
1499 
1500  /* In quote */
1501  for (;;)
1502  {
1503  end_ptr = cur_ptr;
1504  if (cur_ptr >= line_end_ptr)
1505  ereport(ERROR,
1506  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1507  errmsg("unterminated CSV quoted field")));
1508 
1509  c = *cur_ptr++;
1510 
1511  /* escape within a quoted field */
1512  if (c == escapec)
1513  {
1514  /*
1515  * peek at the next char if available, and escape it if it
1516  * is an escape char or a quote char
1517  */
1518  if (cur_ptr < line_end_ptr)
1519  {
1520  char nextc = *cur_ptr;
1521 
1522  if (nextc == escapec || nextc == quotec)
1523  {
1524  *output_ptr++ = nextc;
1525  cur_ptr++;
1526  continue;
1527  }
1528  }
1529  }
1530 
1531  /*
1532  * end of quoted field. Must do this test after testing for
1533  * escape in case quote char and escape char are the same
1534  * (which is the common case).
1535  */
1536  if (c == quotec)
1537  break;
1538 
1539  /* Add c to output string */
1540  *output_ptr++ = c;
1541  }
1542  }
1543 endfield:
1544 
1545  /* Terminate attribute value in output area */
1546  *output_ptr++ = '\0';
1547 
1548  /* Check whether raw input matched null marker */
1549  input_len = end_ptr - start_ptr;
1550  if (!saw_quote && input_len == cstate->opts.null_print_len &&
1551  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1552  cstate->raw_fields[fieldno] = NULL;
1553 
1554  fieldno++;
1555  /* Done if we hit EOL instead of a delim */
1556  if (!found_delim)
1557  break;
1558  }
1559 
1560  /* Clean up state of attribute_buf */
1561  output_ptr--;
1562  Assert(*output_ptr == '\0');
1563  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1564 
1565  return fieldno;
1566 }
int null_print_len
Definition: copy.h:37
StringInfoData attribute_buf
StringInfoData line_buf
int errcode(int sqlerrcode)
Definition: elog.c:704
char * null_print
Definition: copy.h:36
char * quote
Definition: copy.h:40
#define ERROR
Definition: elog.h:45
char * c
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
char * delim
Definition: copy.h:39
#define ereport(elevel,...)
Definition: elog.h:155
#define Assert(condition)
Definition: c.h:792
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
CopyFormatOptions opts
char * escape
Definition: copy.h:41
int errmsg(const char *fmt,...)
Definition: elog.c:915

◆ CopyReadAttributesText()

static int CopyReadAttributesText ( CopyFromState  cstate)
static

Definition at line 1176 of file copyfromparse.c.

References Assert, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, GetDecimalFromHex(), IS_HIGHBIT_SET, ISOCTAL, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::max_fields, StringInfoData::maxlen, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, OCTVALUE, CopyFromStateData::opts, pg_verifymbstr(), CopyFromStateData::raw_fields, repalloc(), resetStringInfo(), and val.

Referenced by NextCopyFromRawFields().

1177 {
1178  char delimc = cstate->opts.delim[0];
1179  int fieldno;
1180  char *output_ptr;
1181  char *cur_ptr;
1182  char *line_end_ptr;
1183 
1184  /*
1185  * We need a special case for zero-column tables: check that the input
1186  * line is empty, and return.
1187  */
1188  if (cstate->max_fields <= 0)
1189  {
1190  if (cstate->line_buf.len != 0)
1191  ereport(ERROR,
1192  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1193  errmsg("extra data after last expected column")));
1194  return 0;
1195  }
1196 
1197  resetStringInfo(&cstate->attribute_buf);
1198 
1199  /*
1200  * The de-escaped attributes will certainly not be longer than the input
1201  * data line, so we can just force attribute_buf to be large enough and
1202  * then transfer data without any checks for enough space. We need to do
1203  * it this way because enlarging attribute_buf mid-stream would invalidate
1204  * pointers already stored into cstate->raw_fields[].
1205  */
1206  if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1207  enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1208  output_ptr = cstate->attribute_buf.data;
1209 
1210  /* set pointer variables for loop */
1211  cur_ptr = cstate->line_buf.data;
1212  line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1213 
1214  /* Outer loop iterates over fields */
1215  fieldno = 0;
1216  for (;;)
1217  {
1218  bool found_delim = false;
1219  char *start_ptr;
1220  char *end_ptr;
1221  int input_len;
1222  bool saw_non_ascii = false;
1223 
1224  /* Make sure there is enough space for the next value */
1225  if (fieldno >= cstate->max_fields)
1226  {
1227  cstate->max_fields *= 2;
1228  cstate->raw_fields =
1229  repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1230  }
1231 
1232  /* Remember start of field on both input and output sides */
1233  start_ptr = cur_ptr;
1234  cstate->raw_fields[fieldno] = output_ptr;
1235 
1236  /*
1237  * Scan data for field.
1238  *
1239  * Note that in this loop, we are scanning to locate the end of field
1240  * and also speculatively performing de-escaping. Once we find the
1241  * end-of-field, we can match the raw field contents against the null
1242  * marker string. Only after that comparison fails do we know that
1243  * de-escaping is actually the right thing to do; therefore we *must
1244  * not* throw any syntax errors before we've done the null-marker
1245  * check.
1246  */
1247  for (;;)
1248  {
1249  char c;
1250 
1251  end_ptr = cur_ptr;
1252  if (cur_ptr >= line_end_ptr)
1253  break;
1254  c = *cur_ptr++;
1255  if (c == delimc)
1256  {
1257  found_delim = true;
1258  break;
1259  }
1260  if (c == '\\')
1261  {
1262  if (cur_ptr >= line_end_ptr)
1263  break;
1264  c = *cur_ptr++;
1265  switch (c)
1266  {
1267  case '0':
1268  case '1':
1269  case '2':
1270  case '3':
1271  case '4':
1272  case '5':
1273  case '6':
1274  case '7':
1275  {
1276  /* handle \013 */
1277  int val;
1278 
1279  val = OCTVALUE(c);
1280  if (cur_ptr < line_end_ptr)
1281  {
1282  c = *cur_ptr;
1283  if (ISOCTAL(c))
1284  {
1285  cur_ptr++;
1286  val = (val << 3) + OCTVALUE(c);
1287  if (cur_ptr < line_end_ptr)
1288  {
1289  c = *cur_ptr;
1290  if (ISOCTAL(c))
1291  {
1292  cur_ptr++;
1293  val = (val << 3) + OCTVALUE(c);
1294  }
1295  }
1296  }
1297  }
1298  c = val & 0377;
1299  if (c == '\0' || IS_HIGHBIT_SET(c))
1300  saw_non_ascii = true;
1301  }
1302  break;
1303  case 'x':
1304  /* Handle \x3F */
1305  if (cur_ptr < line_end_ptr)
1306  {
1307  char hexchar = *cur_ptr;
1308 
1309  if (isxdigit((unsigned char) hexchar))
1310  {
1311  int val = GetDecimalFromHex(hexchar);
1312 
1313  cur_ptr++;
1314  if (cur_ptr < line_end_ptr)
1315  {
1316  hexchar = *cur_ptr;
1317  if (isxdigit((unsigned char) hexchar))
1318  {
1319  cur_ptr++;
1320  val = (val << 4) + GetDecimalFromHex(hexchar);
1321  }
1322  }
1323  c = val & 0xff;
1324  if (c == '\0' || IS_HIGHBIT_SET(c))
1325  saw_non_ascii = true;
1326  }
1327  }
1328  break;
1329  case 'b':
1330  c = '\b';
1331  break;
1332  case 'f':
1333  c = '\f';
1334  break;
1335  case 'n':
1336  c = '\n';
1337  break;
1338  case 'r':
1339  c = '\r';
1340  break;
1341  case 't':
1342  c = '\t';
1343  break;
1344  case 'v':
1345  c = '\v';
1346  break;
1347 
1348  /*
1349  * in all other cases, take the char after '\'
1350  * literally
1351  */
1352  }
1353  }
1354 
1355  /* Add c to output string */
1356  *output_ptr++ = c;
1357  }
1358 
1359  /* Check whether raw input matched null marker */
1360  input_len = end_ptr - start_ptr;
1361  if (input_len == cstate->opts.null_print_len &&
1362  strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1363  cstate->raw_fields[fieldno] = NULL;
1364  else
1365  {
1366  /*
1367  * At this point we know the field is supposed to contain data.
1368  *
1369  * If we de-escaped any non-7-bit-ASCII chars, make sure the
1370  * resulting string is valid data for the db encoding.
1371  */
1372  if (saw_non_ascii)
1373  {
1374  char *fld = cstate->raw_fields[fieldno];
1375 
1376  pg_verifymbstr(fld, output_ptr - fld, false);
1377  }
1378  }
1379 
1380  /* Terminate attribute value in output area */
1381  *output_ptr++ = '\0';
1382 
1383  fieldno++;
1384  /* Done if we hit EOL instead of a delim */
1385  if (!found_delim)
1386  break;
1387  }
1388 
1389  /* Clean up state of attribute_buf */
1390  output_ptr--;
1391  Assert(*output_ptr == '\0');
1392  cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1393 
1394  return fieldno;
1395 }
int null_print_len
Definition: copy.h:37
StringInfoData attribute_buf
StringInfoData line_buf
int errcode(int sqlerrcode)
Definition: elog.c:704
char * null_print
Definition: copy.h:36
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1144
#define ERROR
Definition: elog.h:45
char * c
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
#define ISOCTAL(c)
Definition: copyfromparse.c:34
#define OCTVALUE(c)
Definition: copyfromparse.c:35
char * delim
Definition: copy.h:39
static int GetDecimalFromHex(char hex)
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition: mbutils.c:1446
#define ereport(elevel,...)
Definition: elog.h:155
#define Assert(condition)
Definition: c.h:792
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
CopyFormatOptions opts
int errmsg(const char *fmt,...)
Definition: elog.c:915
long val
Definition: informix.c:664

◆ CopyReadBinaryAttribute()

static Datum CopyReadBinaryAttribute ( CopyFromState  cstate,
FmgrInfo flinfo,
Oid  typioparam,
int32  typmod,
bool isnull 
)
static

Definition at line 1573 of file copyfromparse.c.

References CopyFromStateData::attribute_buf, CopyGetInt32(), CopyReadBinaryData(), StringInfoData::cursor, StringInfoData::data, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, StringInfoData::len, ReceiveFunctionCall(), and resetStringInfo().

Referenced by NextCopyFrom().

1576 {
1577  int32 fld_size;
1578  Datum result;
1579 
1580  if (!CopyGetInt32(cstate, &fld_size))
1581  ereport(ERROR,
1582  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1583  errmsg("unexpected EOF in COPY data")));
1584  if (fld_size == -1)
1585  {
1586  *isnull = true;
1587  return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1588  }
1589  if (fld_size < 0)
1590  ereport(ERROR,
1591  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1592  errmsg("invalid field size")));
1593 
1594  /* reset attribute_buf to empty, and load raw data in it */
1595  resetStringInfo(&cstate->attribute_buf);
1596 
1597  enlargeStringInfo(&cstate->attribute_buf, fld_size);
1598  if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1599  fld_size) != fld_size)
1600  ereport(ERROR,
1601  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1602  errmsg("unexpected EOF in COPY data")));
1603 
1604  cstate->attribute_buf.len = fld_size;
1605  cstate->attribute_buf.data[fld_size] = '\0';
1606 
1607  /* Call the column type's binary input converter */
1608  result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1609  typioparam, typmod);
1610 
1611  /* Trouble if it didn't eat the whole buffer */
1612  if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1613  ereport(ERROR,
1614  (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1615  errmsg("incorrect binary data format")));
1616 
1617  *isnull = false;
1618  return result;
1619 }
StringInfoData attribute_buf
int errcode(int sqlerrcode)
Definition: elog.c:704
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
signed int int32
Definition: c.h:417
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define ERROR
Definition: elog.h:45
Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)
Definition: fmgr.c:1590
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
uintptr_t Datum
Definition: postgres.h:367
#define ereport(elevel,...)
Definition: elog.h:155
int errmsg(const char *fmt,...)
Definition: elog.c:915

◆ CopyReadBinaryData()

static int CopyReadBinaryData ( CopyFromState  cstate,
char *  dest,
int  nbytes 
)
static

Definition at line 402 of file copyfromparse.c.

References CopyLoadRawBuf(), Min, CopyFromStateData::raw_buf, RAW_BUF_BYTES, and CopyFromStateData::raw_buf_index.

Referenced by CopyGetInt16(), CopyGetInt32(), CopyReadBinaryAttribute(), NextCopyFrom(), and ReceiveCopyBinaryHeader().

403 {
404  int copied_bytes = 0;
405 
406  if (RAW_BUF_BYTES(cstate) >= nbytes)
407  {
408  /* Enough bytes are present in the buffer. */
409  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
410  cstate->raw_buf_index += nbytes;
411  copied_bytes = nbytes;
412  }
413  else
414  {
415  /*
416  * Not enough bytes in the buffer, so must read from the file. Need
417  * to loop since 'nbytes' could be larger than the buffer size.
418  */
419  do
420  {
421  int copy_bytes;
422 
423  /* Load more data if buffer is empty. */
424  if (RAW_BUF_BYTES(cstate) == 0)
425  {
426  if (!CopyLoadRawBuf(cstate))
427  break; /* EOF */
428  }
429 
430  /* Transfer some bytes. */
431  copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
432  memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
433  cstate->raw_buf_index += copy_bytes;
434  dest += copy_bytes;
435  copied_bytes += copy_bytes;
436  } while (copied_bytes < nbytes);
437  }
438 
439  return copied_bytes;
440 }
#define RAW_BUF_BYTES(cstate)
#define Min(x, y)
Definition: c.h:974
static bool CopyLoadRawBuf(CopyFromState cstate)

◆ CopyReadLine()

static bool CopyReadLine ( CopyFromState  cstate)
static

Definition at line 695 of file copyfromparse.c.

References appendBinaryStringInfo(), Assert, COPY_NEW_FE, CopyFromStateData::copy_src, CopyLoadRawBuf(), CopyReadLineText(), StringInfoData::data, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, CopyFromStateData::file_encoding, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::line_buf_converted, CopyFromStateData::line_buf_valid, CopyFromStateData::need_transcoding, pfree(), pg_any_to_server(), CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and resetStringInfo().

Referenced by NextCopyFromRawFields().

696 {
697  bool result;
698 
699  resetStringInfo(&cstate->line_buf);
700  cstate->line_buf_valid = true;
701 
702  /* Mark that encoding conversion hasn't occurred yet */
703  cstate->line_buf_converted = false;
704 
705  /* Parse data and transfer into line_buf */
706  result = CopyReadLineText(cstate);
707 
708  if (result)
709  {
710  /*
711  * Reached EOF. In protocol version 3, we should ignore anything
712  * after \. up to the protocol end of copy data. (XXX maybe better
713  * not to treat \. as special?)
714  */
715  if (cstate->copy_src == COPY_NEW_FE)
716  {
717  do
718  {
719  cstate->raw_buf_index = cstate->raw_buf_len;
720  } while (CopyLoadRawBuf(cstate));
721  }
722  }
723  else
724  {
725  /*
726  * If we didn't hit EOF, then we must have transferred the EOL marker
727  * to line_buf along with the data. Get rid of it.
728  */
729  switch (cstate->eol_type)
730  {
731  case EOL_NL:
732  Assert(cstate->line_buf.len >= 1);
733  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
734  cstate->line_buf.len--;
735  cstate->line_buf.data[cstate->line_buf.len] = '\0';
736  break;
737  case EOL_CR:
738  Assert(cstate->line_buf.len >= 1);
739  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
740  cstate->line_buf.len--;
741  cstate->line_buf.data[cstate->line_buf.len] = '\0';
742  break;
743  case EOL_CRNL:
744  Assert(cstate->line_buf.len >= 2);
745  Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
746  Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
747  cstate->line_buf.len -= 2;
748  cstate->line_buf.data[cstate->line_buf.len] = '\0';
749  break;
750  case EOL_UNKNOWN:
751  /* shouldn't get here */
752  Assert(false);
753  break;
754  }
755  }
756 
757  /* Done reading the line. Convert it to server encoding. */
758  if (cstate->need_transcoding)
759  {
760  char *cvt;
761 
762  cvt = pg_any_to_server(cstate->line_buf.data,
763  cstate->line_buf.len,
764  cstate->file_encoding);
765  if (cvt != cstate->line_buf.data)
766  {
767  /* transfer converted data back to line_buf */
768  resetStringInfo(&cstate->line_buf);
769  appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
770  pfree(cvt);
771  }
772  }
773 
774  /* Now it's safe to use the buffer in error messages */
775  cstate->line_buf_converted = true;
776 
777  return result;
778 }
StringInfoData line_buf
void pfree(void *pointer)
Definition: mcxt.c:1057
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
static bool CopyLoadRawBuf(CopyFromState cstate)
#define Assert(condition)
Definition: c.h:792
static bool CopyReadLineText(CopyFromState cstate)
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:619
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227

◆ CopyReadLineText()

static bool CopyReadLineText ( CopyFromState  cstate)
static

Definition at line 784 of file copyfromparse.c.

References appendBinaryStringInfo(), CopyLoadRawBuf(), CopyFormatOptions::csv_mode, CopyFromStateData::cur_lineno, CopyFromStateData::encoding_embeds_ascii, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, ereport, errcode(), errhint(), errmsg(), ERROR, CopyFormatOptions::escape, CopyFromStateData::file_encoding, IF_NEED_REFILL_AND_EOF_BREAK, IF_NEED_REFILL_AND_NOT_EOF_CONTINUE, IS_HIGHBIT_SET, CopyFromStateData::line_buf, NO_END_OF_COPY_GOTO, CopyFromStateData::opts, pg_encoding_mblen(), CopyFormatOptions::quote, CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and REFILL_LINEBUF.

Referenced by CopyReadLine().

785 {
786  char *copy_raw_buf;
787  int raw_buf_ptr;
788  int copy_buf_len;
789  bool need_data = false;
790  bool hit_eof = false;
791  bool result = false;
792  char mblen_str[2];
793 
794  /* CSV variables */
795  bool first_char_in_line = true;
796  bool in_quote = false,
797  last_was_esc = false;
798  char quotec = '\0';
799  char escapec = '\0';
800 
801  if (cstate->opts.csv_mode)
802  {
803  quotec = cstate->opts.quote[0];
804  escapec = cstate->opts.escape[0];
805  /* ignore special escape processing if it's the same as quotec */
806  if (quotec == escapec)
807  escapec = '\0';
808  }
809 
810  mblen_str[1] = '\0';
811 
812  /*
813  * The objective of this loop is to transfer the entire next input line
814  * into line_buf. Hence, we only care for detecting newlines (\r and/or
815  * \n) and the end-of-copy marker (\.).
816  *
817  * In CSV mode, \r and \n inside a quoted field are just part of the data
818  * value and are put in line_buf. We keep just enough state to know if we
819  * are currently in a quoted field or not.
820  *
821  * These four characters, and the CSV escape and quote characters, are
822  * assumed the same in frontend and backend encodings.
823  *
824  * For speed, we try to move data from raw_buf to line_buf in chunks
825  * rather than one character at a time. raw_buf_ptr points to the next
826  * character to examine; any characters from raw_buf_index to raw_buf_ptr
827  * have been determined to be part of the line, but not yet transferred to
828  * line_buf.
829  *
830  * For a little extra speed within the loop, we copy raw_buf and
831  * raw_buf_len into local variables.
832  */
833  copy_raw_buf = cstate->raw_buf;
834  raw_buf_ptr = cstate->raw_buf_index;
835  copy_buf_len = cstate->raw_buf_len;
836 
837  for (;;)
838  {
839  int prev_raw_ptr;
840  char c;
841 
842  /*
843  * Load more data if needed. Ideally we would just force four bytes
844  * of read-ahead and avoid the many calls to
845  * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
846  * does not allow us to read too far ahead or we might read into the
847  * next data, so we read-ahead only as far we know we can. One
848  * optimization would be to read-ahead four byte here if
849  * cstate->copy_src != COPY_OLD_FE, but it hardly seems worth it,
850  * considering the size of the buffer.
851  */
852  if (raw_buf_ptr >= copy_buf_len || need_data)
853  {
855 
856  /*
857  * Try to read some more data. This will certainly reset
858  * raw_buf_index to zero, and raw_buf_ptr must go with it.
859  */
860  if (!CopyLoadRawBuf(cstate))
861  hit_eof = true;
862  raw_buf_ptr = 0;
863  copy_buf_len = cstate->raw_buf_len;
864 
865  /*
866  * If we are completely out of data, break out of the loop,
867  * reporting EOF.
868  */
869  if (copy_buf_len <= 0)
870  {
871  result = true;
872  break;
873  }
874  need_data = false;
875  }
876 
877  /* OK to fetch a character */
878  prev_raw_ptr = raw_buf_ptr;
879  c = copy_raw_buf[raw_buf_ptr++];
880 
881  if (cstate->opts.csv_mode)
882  {
883  /*
884  * If character is '\\' or '\r', we may need to look ahead below.
885  * Force fetch of the next character if we don't already have it.
886  * We need to do this before changing CSV state, in case one of
887  * these characters is also the quote or escape character.
888  *
889  * Note: old-protocol does not like forced prefetch, but it's OK
890  * here since we cannot validly be at EOF.
891  */
892  if (c == '\\' || c == '\r')
893  {
895  }
896 
897  /*
898  * Dealing with quotes and escapes here is mildly tricky. If the
899  * quote char is also the escape char, there's no problem - we
900  * just use the char as a toggle. If they are different, we need
901  * to ensure that we only take account of an escape inside a
902  * quoted field and immediately preceding a quote char, and not
903  * the second in an escape-escape sequence.
904  */
905  if (in_quote && c == escapec)
906  last_was_esc = !last_was_esc;
907  if (c == quotec && !last_was_esc)
908  in_quote = !in_quote;
909  if (c != escapec)
910  last_was_esc = false;
911 
912  /*
913  * Updating the line count for embedded CR and/or LF chars is
914  * necessarily a little fragile - this test is probably about the
915  * best we can do. (XXX it's arguable whether we should do this
916  * at all --- is cur_lineno a physical or logical count?)
917  */
918  if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
919  cstate->cur_lineno++;
920  }
921 
922  /* Process \r */
923  if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
924  {
925  /* Check for \r\n on first line, _and_ handle \r\n. */
926  if (cstate->eol_type == EOL_UNKNOWN ||
927  cstate->eol_type == EOL_CRNL)
928  {
929  /*
930  * If need more data, go back to loop top to load it.
931  *
932  * Note that if we are at EOF, c will wind up as '\0' because
933  * of the guaranteed pad of raw_buf.
934  */
936 
937  /* get next char */
938  c = copy_raw_buf[raw_buf_ptr];
939 
940  if (c == '\n')
941  {
942  raw_buf_ptr++; /* eat newline */
943  cstate->eol_type = EOL_CRNL; /* in case not set yet */
944  }
945  else
946  {
947  /* found \r, but no \n */
948  if (cstate->eol_type == EOL_CRNL)
949  ereport(ERROR,
950  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
951  !cstate->opts.csv_mode ?
952  errmsg("literal carriage return found in data") :
953  errmsg("unquoted carriage return found in data"),
954  !cstate->opts.csv_mode ?
955  errhint("Use \"\\r\" to represent carriage return.") :
956  errhint("Use quoted CSV field to represent carriage return.")));
957 
958  /*
959  * if we got here, it is the first line and we didn't find
960  * \n, so don't consume the peeked character
961  */
962  cstate->eol_type = EOL_CR;
963  }
964  }
965  else if (cstate->eol_type == EOL_NL)
966  ereport(ERROR,
967  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
968  !cstate->opts.csv_mode ?
969  errmsg("literal carriage return found in data") :
970  errmsg("unquoted carriage return found in data"),
971  !cstate->opts.csv_mode ?
972  errhint("Use \"\\r\" to represent carriage return.") :
973  errhint("Use quoted CSV field to represent carriage return.")));
974  /* If reach here, we have found the line terminator */
975  break;
976  }
977 
978  /* Process \n */
979  if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
980  {
981  if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
982  ereport(ERROR,
983  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
984  !cstate->opts.csv_mode ?
985  errmsg("literal newline found in data") :
986  errmsg("unquoted newline found in data"),
987  !cstate->opts.csv_mode ?
988  errhint("Use \"\\n\" to represent newline.") :
989  errhint("Use quoted CSV field to represent newline.")));
990  cstate->eol_type = EOL_NL; /* in case not set yet */
991  /* If reach here, we have found the line terminator */
992  break;
993  }
994 
995  /*
996  * In CSV mode, we only recognize \. alone on a line. This is because
997  * \. is a valid CSV data value.
998  */
999  if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
1000  {
1001  char c2;
1002 
1005 
1006  /* -----
1007  * get next character
1008  * Note: we do not change c so if it isn't \., we can fall
1009  * through and continue processing for file encoding.
1010  * -----
1011  */
1012  c2 = copy_raw_buf[raw_buf_ptr];
1013 
1014  if (c2 == '.')
1015  {
1016  raw_buf_ptr++; /* consume the '.' */
1017 
1018  /*
1019  * Note: if we loop back for more data here, it does not
1020  * matter that the CSV state change checks are re-executed; we
1021  * will come back here with no important state changed.
1022  */
1023  if (cstate->eol_type == EOL_CRNL)
1024  {
1025  /* Get the next character */
1027  /* if hit_eof, c2 will become '\0' */
1028  c2 = copy_raw_buf[raw_buf_ptr++];
1029 
1030  if (c2 == '\n')
1031  {
1032  if (!cstate->opts.csv_mode)
1033  ereport(ERROR,
1034  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1035  errmsg("end-of-copy marker does not match previous newline style")));
1036  else
1038  }
1039  else if (c2 != '\r')
1040  {
1041  if (!cstate->opts.csv_mode)
1042  ereport(ERROR,
1043  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1044  errmsg("end-of-copy marker corrupt")));
1045  else
1047  }
1048  }
1049 
1050  /* Get the next character */
1052  /* if hit_eof, c2 will become '\0' */
1053  c2 = copy_raw_buf[raw_buf_ptr++];
1054 
1055  if (c2 != '\r' && c2 != '\n')
1056  {
1057  if (!cstate->opts.csv_mode)
1058  ereport(ERROR,
1059  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1060  errmsg("end-of-copy marker corrupt")));
1061  else
1063  }
1064 
1065  if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1066  (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1067  (cstate->eol_type == EOL_CR && c2 != '\r'))
1068  {
1069  ereport(ERROR,
1070  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1071  errmsg("end-of-copy marker does not match previous newline style")));
1072  }
1073 
1074  /*
1075  * Transfer only the data before the \. into line_buf, then
1076  * discard the data and the \. sequence.
1077  */
1078  if (prev_raw_ptr > cstate->raw_buf_index)
1080  cstate->raw_buf + cstate->raw_buf_index,
1081  prev_raw_ptr - cstate->raw_buf_index);
1082  cstate->raw_buf_index = raw_buf_ptr;
1083  result = true; /* report EOF */
1084  break;
1085  }
1086  else if (!cstate->opts.csv_mode)
1087 
1088  /*
1089  * If we are here, it means we found a backslash followed by
1090  * something other than a period. In non-CSV mode, anything
1091  * after a backslash is special, so we skip over that second
1092  * character too. If we didn't do that \\. would be
1093  * considered an eof-of copy, while in non-CSV mode it is a
1094  * literal backslash followed by a period. In CSV mode,
1095  * backslashes are not special, so we want to process the
1096  * character after the backslash just like a normal character,
1097  * so we don't increment in those cases.
1098  */
1099  raw_buf_ptr++;
1100  }
1101 
1102  /*
1103  * This label is for CSV cases where \. appears at the start of a
1104  * line, but there is more text after it, meaning it was a data value.
1105  * We are more strict for \. in CSV mode because \. could be a data
1106  * value, while in non-CSV mode, \. cannot be a data value.
1107  */
1108 not_end_of_copy:
1109 
1110  /*
1111  * Process all bytes of a multi-byte character as a group.
1112  *
1113  * We only support multi-byte sequences where the first byte has the
1114  * high-bit set, so as an optimization we can avoid this block
1115  * entirely if it is not set.
1116  */
1117  if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
1118  {
1119  int mblen;
1120 
1121  /*
1122  * It is enough to look at the first byte in all our encodings, to
1123  * get the length. (GB18030 is a bit special, but still works for
1124  * our purposes; see comment in pg_gb18030_mblen())
1125  */
1126  mblen_str[0] = c;
1127  mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
1128 
1130  IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
1131  raw_buf_ptr += mblen - 1;
1132  }
1133  first_char_in_line = false;
1134  } /* end of outer loop */
1135 
1136  /*
1137  * Transfer any still-uncopied data to line_buf.
1138  */
1140 
1141  return result;
1142 }
int errhint(const char *fmt,...)
Definition: elog.c:1162
StringInfoData line_buf
int errcode(int sqlerrcode)
Definition: elog.c:704
char * quote
Definition: copy.h:40
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1144
#define ERROR
Definition: elog.h:45
char * c
bool csv_mode
Definition: copy.h:34
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:1554
#define NO_END_OF_COPY_GOTO
Definition: copyfromparse.c:95
static bool CopyLoadRawBuf(CopyFromState cstate)
#define REFILL_LINEBUF
Definition: copyfromparse.c:82
#define ereport(elevel,...)
Definition: elog.h:155
#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)
Definition: copyfromparse.c:65
CopyFormatOptions opts
char * escape
Definition: copy.h:41
int errmsg(const char *fmt,...)
Definition: elog.c:915
#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)
Definition: copyfromparse.c:53
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227

◆ GetDecimalFromHex()

static int GetDecimalFromHex ( char  hex)
static

Definition at line 1148 of file copyfromparse.c.

Referenced by CopyReadAttributesText().

1149 {
1150  if (isdigit((unsigned char) hex))
1151  return hex - '0';
1152  else
1153  return tolower((unsigned char) hex) - 'a' + 10;
1154 }

◆ NextCopyFrom()

bool NextCopyFrom ( CopyFromState  cstate,
ExprContext econtext,
Datum values,
bool nulls 
)

Definition at line 505 of file copyfromparse.c.

References Assert, attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, CopyFromStateData::convert_select_flags, COPY_OLD_FE, CopyFromStateData::copy_src, CopyGetInt16(), CopyReadBinaryAttribute(), CopyReadBinaryData(), CopyFormatOptions::csv_mode, cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_attval, CopyFromStateData::cur_lineno, CurrentMemoryContext, CopyFromStateData::defexprs, CopyFromStateData::defmap, ExprContext::ecxt_per_tuple_memory, ereport, errcode(), errmsg(), ERROR, ExecEvalExpr(), CopyFormatOptions::force_notnull_flags, CopyFormatOptions::force_null_flags, i, CopyFromStateData::in_functions, InputFunctionCall(), lfirst_int, list_length(), MemSet, NameStr, TupleDescData::natts, NextCopyFromRawFields(), CopyFormatOptions::null_print, CopyFromStateData::num_defaults, CopyFromStateData::opts, CopyFromStateData::rel, RelationGetDescr, TupleDescAttr, and CopyFromStateData::typioparams.

Referenced by CopyFrom(), file_acquire_sample_rows(), and fileIterateForeignScan().

507 {
508  TupleDesc tupDesc;
509  AttrNumber num_phys_attrs,
510  attr_count,
511  num_defaults = cstate->num_defaults;
512  FmgrInfo *in_functions = cstate->in_functions;
513  Oid *typioparams = cstate->typioparams;
514  int i;
515  int *defmap = cstate->defmap;
516  ExprState **defexprs = cstate->defexprs;
517 
518  tupDesc = RelationGetDescr(cstate->rel);
519  num_phys_attrs = tupDesc->natts;
520  attr_count = list_length(cstate->attnumlist);
521 
522  /* Initialize all values for row to NULL */
523  MemSet(values, 0, num_phys_attrs * sizeof(Datum));
524  MemSet(nulls, true, num_phys_attrs * sizeof(bool));
525 
526  if (!cstate->opts.binary)
527  {
528  char **field_strings;
529  ListCell *cur;
530  int fldct;
531  int fieldno;
532  char *string;
533 
534  /* read raw fields in the next line */
535  if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
536  return false;
537 
538  /* check for overflowing fields */
539  if (attr_count > 0 && fldct > attr_count)
540  ereport(ERROR,
541  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
542  errmsg("extra data after last expected column")));
543 
544  fieldno = 0;
545 
546  /* Loop to read the user attributes on the line. */
547  foreach(cur, cstate->attnumlist)
548  {
549  int attnum = lfirst_int(cur);
550  int m = attnum - 1;
551  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
552 
553  if (fieldno >= fldct)
554  ereport(ERROR,
555  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
556  errmsg("missing data for column \"%s\"",
557  NameStr(att->attname))));
558  string = field_strings[fieldno++];
559 
560  if (cstate->convert_select_flags &&
561  !cstate->convert_select_flags[m])
562  {
563  /* ignore input field, leaving column as NULL */
564  continue;
565  }
566 
567  if (cstate->opts.csv_mode)
568  {
569  if (string == NULL &&
570  cstate->opts.force_notnull_flags[m])
571  {
572  /*
573  * FORCE_NOT_NULL option is set and column is NULL -
574  * convert it to the NULL string.
575  */
576  string = cstate->opts.null_print;
577  }
578  else if (string != NULL && cstate->opts.force_null_flags[m]
579  && strcmp(string, cstate->opts.null_print) == 0)
580  {
581  /*
582  * FORCE_NULL option is set and column matches the NULL
583  * string. It must have been quoted, or otherwise the
584  * string would already have been set to NULL. Convert it
585  * to NULL as specified.
586  */
587  string = NULL;
588  }
589  }
590 
591  cstate->cur_attname = NameStr(att->attname);
592  cstate->cur_attval = string;
593  values[m] = InputFunctionCall(&in_functions[m],
594  string,
595  typioparams[m],
596  att->atttypmod);
597  if (string != NULL)
598  nulls[m] = false;
599  cstate->cur_attname = NULL;
600  cstate->cur_attval = NULL;
601  }
602 
603  Assert(fieldno == attr_count);
604  }
605  else
606  {
607  /* binary */
608  int16 fld_count;
609  ListCell *cur;
610 
611  cstate->cur_lineno++;
612 
613  if (!CopyGetInt16(cstate, &fld_count))
614  {
615  /* EOF detected (end of file, or protocol-level EOF) */
616  return false;
617  }
618 
619  if (fld_count == -1)
620  {
621  /*
622  * Received EOF marker. In a V3-protocol copy, wait for the
623  * protocol-level EOF, and complain if it doesn't come
624  * immediately. This ensures that we correctly handle CopyFail,
625  * if client chooses to send that now.
626  *
627  * Note that we MUST NOT try to read more data in an old-protocol
628  * copy, since there is no protocol-level EOF marker then. We
629  * could go either way for copy from file, but choose to throw
630  * error if there's data after the EOF marker, for consistency
631  * with the new-protocol case.
632  */
633  char dummy;
634 
635  if (cstate->copy_src != COPY_OLD_FE &&
636  CopyReadBinaryData(cstate, &dummy, 1) > 0)
637  ereport(ERROR,
638  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
639  errmsg("received copy data after EOF marker")));
640  return false;
641  }
642 
643  if (fld_count != attr_count)
644  ereport(ERROR,
645  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
646  errmsg("row field count is %d, expected %d",
647  (int) fld_count, attr_count)));
648 
649  foreach(cur, cstate->attnumlist)
650  {
651  int attnum = lfirst_int(cur);
652  int m = attnum - 1;
653  Form_pg_attribute att = TupleDescAttr(tupDesc, m);
654 
655  cstate->cur_attname = NameStr(att->attname);
656  values[m] = CopyReadBinaryAttribute(cstate,
657  &in_functions[m],
658  typioparams[m],
659  att->atttypmod,
660  &nulls[m]);
661  cstate->cur_attname = NULL;
662  }
663  }
664 
665  /*
666  * Now compute and insert any defaults available for the columns not
667  * provided by the input data. Anything not processed here or above will
668  * remain NULL.
669  */
670  for (i = 0; i < num_defaults; i++)
671  {
672  /*
673  * The caller must supply econtext and have switched into the
674  * per-tuple memory context in it.
675  */
676  Assert(econtext != NULL);
678 
679  values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
680  &nulls[defmap[i]]);
681  }
682 
683  return true;
684 }
signed short int16
Definition: c.h:416
Definition: fmgr.h:56
bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
const char * cur_attname
#define RelationGetDescr(relation)
Definition: rel.h:483
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:233
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:704
static bool CopyGetInt16(CopyFromState cstate, int16 *val)
#define MemSet(start, val, len)
Definition: c.h:996
const char * cur_attval
unsigned int Oid
Definition: postgres_ext.h:31
char * null_print
Definition: copy.h:36
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define ERROR
Definition: elog.h:45
#define lfirst_int(lc)
Definition: pg_list.h:170
bool binary
Definition: copy.h:32
bool csv_mode
Definition: copy.h:34
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:292
char string[11]
Definition: preproc-type.c:46
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:193
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
uintptr_t Datum
Definition: postgres.h:367
Datum InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
Definition: fmgr.c:1532
int16 attnum
Definition: pg_attribute.h:79
#define ereport(elevel,...)
Definition: elog.h:155
#define Assert(condition)
Definition: c.h:792
bool * force_null_flags
Definition: copy.h:48
static int list_length(const List *l)
Definition: pg_list.h:149
CopyFormatOptions opts
static Datum values[MAXATTR]
Definition: bootstrap.c:165
int errmsg(const char *fmt,...)
Definition: elog.c:915
int i
#define NameStr(name)
Definition: c.h:669
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)
int16 AttrNumber
Definition: attnum.h:21
bool * force_notnull_flags
Definition: copy.h:46

◆ NextCopyFromRawFields()

bool NextCopyFromRawFields ( CopyFromState  cstate,
char ***  fields,
int *  nfields 
)

Definition at line 454 of file copyfromparse.c.

References Assert, CopyFormatOptions::binary, CopyReadAttributesCSV(), CopyReadAttributesText(), CopyReadLine(), CopyFormatOptions::csv_mode, CopyFromStateData::cur_lineno, CopyFormatOptions::header_line, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::opts, and CopyFromStateData::raw_fields.

Referenced by NextCopyFrom().

455 {
456  int fldct;
457  bool done;
458 
459  /* only available for text or csv input */
460  Assert(!cstate->opts.binary);
461 
462  /* on input just throw the header line away */
463  if (cstate->cur_lineno == 0 && cstate->opts.header_line)
464  {
465  cstate->cur_lineno++;
466  if (CopyReadLine(cstate))
467  return false; /* done */
468  }
469 
470  cstate->cur_lineno++;
471 
472  /* Actually read the line into memory here */
473  done = CopyReadLine(cstate);
474 
475  /*
476  * EOF at start of line means we're done. If we see EOF after some
477  * characters, we act as though it was newline followed by EOF, ie,
478  * process the line and then exit loop on next iteration.
479  */
480  if (done && cstate->line_buf.len == 0)
481  return false;
482 
483  /* Parse the line into de-escaped field values */
484  if (cstate->opts.csv_mode)
485  fldct = CopyReadAttributesCSV(cstate);
486  else
487  fldct = CopyReadAttributesText(cstate);
488 
489  *fields = cstate->raw_fields;
490  *nfields = fldct;
491  return true;
492 }
StringInfoData line_buf
static bool CopyReadLine(CopyFromState cstate)
bool binary
Definition: copy.h:32
bool csv_mode
Definition: copy.h:34
bool header_line
Definition: copy.h:35
static int CopyReadAttributesText(CopyFromState cstate)
#define Assert(condition)
Definition: c.h:792
CopyFormatOptions opts
static int CopyReadAttributesCSV(CopyFromState cstate)

◆ ReceiveCopyBegin()

void ReceiveCopyBegin ( CopyFromState  cstate)

Definition at line 125 of file copyfromparse.c.

References CopyFromStateData::attnumlist, CopyFormatOptions::binary, buf, COPY_NEW_FE, COPY_OLD_FE, CopyFromStateData::copy_src, ereport, errcode(), errmsg(), ERROR, CopyFromStateData::fe_msgbuf, format, FrontendProtocol, i, list_length(), makeStringInfo(), CopyFromStateData::opts, PG_PROTOCOL_MAJOR, pq_beginmessage(), pq_endmessage(), pq_flush, pq_putemptymessage(), pq_sendbyte(), pq_sendint16(), and pq_startmsgread().

Referenced by BeginCopyFrom().

126 {
128  {
129  /* new way */
131  int natts = list_length(cstate->attnumlist);
132  int16 format = (cstate->opts.binary ? 1 : 0);
133  int i;
134 
135  pq_beginmessage(&buf, 'G');
136  pq_sendbyte(&buf, format); /* overall format */
137  pq_sendint16(&buf, natts);
138  for (i = 0; i < natts; i++)
139  pq_sendint16(&buf, format); /* per-column formats */
140  pq_endmessage(&buf);
141  cstate->copy_src = COPY_NEW_FE;
142  cstate->fe_msgbuf = makeStringInfo();
143  }
144  else
145  {
146  /* old way */
147  if (cstate->opts.binary)
148  ereport(ERROR,
149  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
150  errmsg("COPY BINARY is not supported to stdout or from stdin")));
151  pq_putemptymessage('G');
152  /* any error in old protocol will make us lose sync */
153  pq_startmsgread();
154  cstate->copy_src = COPY_OLD_FE;
155  }
156  /* We *must* flush here to ensure FE knows it can send. */
157  pq_flush();
158 }
signed short int16
Definition: c.h:416
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
#define pq_flush()
Definition: libpq.h:39
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
int errcode(int sqlerrcode)
Definition: elog.c:704
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
#define PG_PROTOCOL_MAJOR(v)
Definition: pqcomm.h:113
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define ERROR
Definition: elog.h:45
void pq_startmsgread(void)
Definition: pqcomm.c:1209
bool binary
Definition: copy.h:32
static char * buf
Definition: pg_test_fsync.c:68
#define ereport(elevel,...)
Definition: elog.h:155
static int list_length(const List *l)
Definition: pg_list.h:149
CopyFormatOptions opts
int errmsg(const char *fmt,...)
Definition: elog.c:915
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
int i
static char format
ProtocolVersion FrontendProtocol
Definition: globals.c:28

◆ ReceiveCopyBinaryHeader()

void ReceiveCopyBinaryHeader ( CopyFromState  cstate)

Definition at line 161 of file copyfromparse.c.

References BinarySignature, CopyGetInt32(), CopyReadBinaryData(), ereport, errcode(), errmsg(), and ERROR.

Referenced by BeginCopyFrom().

162 {
163  char readSig[11];
164  int32 tmp;
165 
166  /* Signature */
167  if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
168  memcmp(readSig, BinarySignature, 11) != 0)
169  ereport(ERROR,
170  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
171  errmsg("COPY file signature not recognized")));
172  /* Flags field */
173  if (!CopyGetInt32(cstate, &tmp))
174  ereport(ERROR,
175  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
176  errmsg("invalid COPY file header (missing flags)")));
177  if ((tmp & (1 << 16)) != 0)
178  ereport(ERROR,
179  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
180  errmsg("invalid COPY file header (WITH OIDS)")));
181  tmp &= ~(1 << 16);
182  if ((tmp >> 16) != 0)
183  ereport(ERROR,
184  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
185  errmsg("unrecognized critical flags in COPY file header")));
186  /* Header extension length */
187  if (!CopyGetInt32(cstate, &tmp) ||
188  tmp < 0)
189  ereport(ERROR,
190  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
191  errmsg("invalid COPY file header (missing length)")));
192  /* Skip extension header, if present */
193  while (tmp-- > 0)
194  {
195  if (CopyReadBinaryData(cstate, readSig, 1) != 1)
196  ereport(ERROR,
197  (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
198  errmsg("invalid COPY file header (wrong length)")));
199  }
200 }
static const char BinarySignature[11]
int errcode(int sqlerrcode)
Definition: elog.c:704
static bool CopyGetInt32(CopyFromState cstate, int32 *val)
signed int int32
Definition: c.h:417
static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
#define ERROR
Definition: elog.h:45
#define ereport(elevel,...)
Definition: elog.h:155
int errmsg(const char *fmt,...)
Definition: elog.c:915

Variable Documentation

◆ BinarySignature

const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"
static

Definition at line 103 of file copyfromparse.c.

Referenced by ReceiveCopyBinaryHeader().