#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <sys/stat.h>
#include "commands/copyapi.h"
#include "commands/copyfrom_internal.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/rel.h"

Include dependency graph for copyfromparse.c:

Macros
#define	ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))

#define	OCTVALUE(c) ((c) - '0')

#define	IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)

#define	IF_NEED_REFILL_AND_EOF_BREAK(extralen)

#define	REFILL_LINEBUF

Functions
static bool	CopyReadLine (CopyFromState cstate, bool is_csv)

static bool	CopyReadLineText (CopyFromState cstate, bool is_csv)

static int	CopyReadAttributesText (CopyFromState cstate)

static int	CopyReadAttributesCSV (CopyFromState cstate)

static Datum	CopyReadBinaryAttribute (CopyFromState cstate, FmgrInfo flinfo, Oid typioparam, int32 typmod, bool isnull)

static pg_attribute_always_inline bool	CopyFromTextLikeOneRow (CopyFromState cstate, ExprContext econtext, Datum values, bool *nulls, bool is_csv)

static pg_attribute_always_inline bool	NextCopyFromRawFieldsInternal (CopyFromState cstate, char **fields, int nfields, bool is_csv)

static int	CopyGetData (CopyFromState cstate, void *databuf, int minread, int maxread)

static bool	CopyGetInt32 (CopyFromState cstate, int32 *val)

static bool	CopyGetInt16 (CopyFromState cstate, int16 *val)

static void	CopyLoadInputBuf (CopyFromState cstate)

static int	CopyReadBinaryData (CopyFromState cstate, char *dest, int nbytes)

void	ReceiveCopyBegin (CopyFromState cstate)

void	ReceiveCopyBinaryHeader (CopyFromState cstate)

static void	CopyConvertBuf (CopyFromState cstate)

static void	CopyConversionError (CopyFromState cstate)

static void	CopyLoadRawBuf (CopyFromState cstate)

bool	NextCopyFromRawFields (CopyFromState cstate, char **fields, int nfields)

bool	NextCopyFrom (CopyFromState cstate, ExprContext econtext, Datum values, bool *nulls)

bool	CopyFromTextOneRow (CopyFromState cstate, ExprContext econtext, Datum values, bool *nulls)

bool	CopyFromCSVOneRow (CopyFromState cstate, ExprContext econtext, Datum values, bool *nulls)

bool	CopyFromBinaryOneRow (CopyFromState cstate, ExprContext econtext, Datum values, bool *nulls)

static int	GetDecimalFromHex (char hex)

Variables
static const char	BinarySignature [11] = "PGCOPY\n\377\r\n\0"

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK ( extralen )

Value:

if (1) \
{ \
    if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
    { \
        if (extralen) \
            input_buf_ptr = copy_buf_len; /* consume the partial character */ \
        /* backslash just before EOF, treat as data char */ \
        result = true; \
        break; \
    } \
} else ((void) 0)

Definition at line 109 of file copyfromparse.c.

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE ( extralen )

Value:

if (1) \
{ \
    if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
    { \
        input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
        need_data = true; \
        continue; \
    } \
} else ((void) 0)

Definition at line 97 of file copyfromparse.c.

◆ ISOCTAL

#define ISOCTAL ( c ) (((c) >= '0') && ((c) <= '7'))

Definition at line 78 of file copyfromparse.c.

◆ OCTVALUE

#define OCTVALUE ( c ) ((c) - '0')

Definition at line 79 of file copyfromparse.c.

◆ REFILL_LINEBUF

#define REFILL_LINEBUF

Value:

if (1) \
{ \
    if (input_buf_ptr > cstate->input_buf_index) \
    { \
        appendBinaryStringInfo(&cstate->line_buf, \
                             cstate->input_buf + cstate->input_buf_index, \
                               input_buf_ptr - cstate->input_buf_index); \
        cstate->input_buf_index = input_buf_ptr; \
    } \
} else ((void) 0)

Definition at line 126 of file copyfromparse.c.

Function Documentation

◆ CopyConversionError()

static void CopyConversionError ( CopyFromState cstate )

static

Definition at line 533 of file copyfromparse.c.

{
    Assert(cstate->raw_buf_len > 0);
    Assert(cstate->input_reached_error);
 
    if (!cstate->need_transcoding)
    {
        /*
         * Everything up to input_buf_len was successfully verified, and
         * input_buf_len points to the invalid or incomplete character.
         */
        report_invalid_encoding(cstate->file_encoding,
                                cstate->raw_buf + cstate->input_buf_len,
                                cstate->raw_buf_len - cstate->input_buf_len);
    }
    else
    {
        /*
         * raw_buf_index points to the invalid or untranslatable character. We
         * let the conversion routine report the error, because it can provide
         * a more specific error message than we could here.  An earlier call
         * to the conversion routine in CopyConvertBuf() detected that there
         * is an error, now we call the conversion routine again with
         * noError=false, to have it throw the error.
         */
        unsigned char *src;
        int         srclen;
        unsigned char *dst;
        int         dstlen;
 
        src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
        srclen = cstate->raw_buf_len - cstate->raw_buf_index;
        dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
        dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
 
        (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
                                             cstate->file_encoding,
                                             GetDatabaseEncoding(),
                                             src, srclen,
                                             dst, dstlen,
                                             false);
 
        /*
         * The conversion routine should have reported an error, so this
         * should not be reached.
         */
        elog(ERROR, "encoding conversion failed without error");
    }
}

References Assert(), CopyFromStateData::conversion_proc, elog, ERROR, CopyFromStateData::file_encoding, GetDatabaseEncoding(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, pg_do_encoding_conversion_buf(), CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and report_invalid_encoding().

Referenced by CopyLoadInputBuf().

◆ CopyConvertBuf()

static void CopyConvertBuf ( CopyFromState cstate )

static

Definition at line 400 of file copyfromparse.c.

{
    /*
     * If the file and server encoding are the same, no encoding conversion is
     * required.  However, we still need to verify that the input is valid for
     * the encoding.
     */
    if (!cstate->need_transcoding)
    {
        /*
         * When conversion is not required, input_buf and raw_buf are the
         * same.  raw_buf_len is the total number of bytes in the buffer, and
         * input_buf_len tracks how many of those bytes have already been
         * verified.
         */
        int         preverifiedlen = cstate->input_buf_len;
        int         unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
        int         nverified;
 
        if (unverifiedlen == 0)
        {
            /*
             * If no more raw data is coming, report the EOF to the caller.
             */
            if (cstate->raw_reached_eof)
                cstate->input_reached_eof = true;
            return;
        }
 
        /*
         * Verify the new data, including any residual unverified bytes from
         * previous round.
         */
        nverified = pg_encoding_verifymbstr(cstate->file_encoding,
                                            cstate->raw_buf + preverifiedlen,
                                            unverifiedlen);
        if (nverified == 0)
        {
            /*
             * Could not verify anything.
             *
             * If there is no more raw input data coming, it means that there
             * was an incomplete multi-byte sequence at the end.  Also, if
             * there's "enough" input left, we should be able to verify at
             * least one character, and a failure to do so means that we've
             * hit an invalid byte sequence.
             */
            if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
                cstate->input_reached_error = true;
            return;
        }
        cstate->input_buf_len += nverified;
    }
    else
    {
        /*
         * Encoding conversion is needed.
         */
        int         nbytes;
        unsigned char *src;
        int         srclen;
        unsigned char *dst;
        int         dstlen;
        int         convertedlen;
 
        if (RAW_BUF_BYTES(cstate) == 0)
        {
            /*
             * If no more raw data is coming, report the EOF to the caller.
             */
            if (cstate->raw_reached_eof)
                cstate->input_reached_eof = true;
            return;
        }
 
        /*
         * First, copy down any unprocessed data.
         */
        nbytes = INPUT_BUF_BYTES(cstate);
        if (nbytes > 0 && cstate->input_buf_index > 0)
            memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
                    nbytes);
        cstate->input_buf_index = 0;
        cstate->input_buf_len = nbytes;
        cstate->input_buf[nbytes] = '\0';
 
        src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
        srclen = cstate->raw_buf_len - cstate->raw_buf_index;
        dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
        dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
 
        /*
         * Do the conversion.  This might stop short, if there is an invalid
         * byte sequence in the input.  We'll convert as much as we can in
         * that case.
         *
         * Note: Even if we hit an invalid byte sequence, we don't report the
         * error until all the valid bytes have been consumed.  The input
         * might contain an end-of-input marker (\.), and we don't want to
         * report an error if the invalid byte sequence is after the
         * end-of-input marker.  We might unnecessarily convert some data
         * after the end-of-input marker as long as it's valid for the
         * encoding, but that's harmless.
         */
        convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
                                                     cstate->file_encoding,
                                                     GetDatabaseEncoding(),
                                                     src, srclen,
                                                     dst, dstlen,
                                                     true);
        if (convertedlen == 0)
        {
            /*
             * Could not convert anything.  If there is no more raw input data
             * coming, it means that there was an incomplete multi-byte
             * sequence at the end.  Also, if there is plenty of input left,
             * we should be able to convert at least one character, so a
             * failure to do so must mean that we've hit a byte sequence
             * that's invalid.
             */
            if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
                cstate->input_reached_error = true;
            return;
        }
        cstate->raw_buf_index += convertedlen;
        cstate->input_buf_len += strlen((char *) dst);
    }
}

Referenced by CopyLoadInputBuf().

◆ CopyFromBinaryOneRow()

bool CopyFromBinaryOneRow	(	CopyFromState	cstate,
		ExprContext *	econtext,
		Datum *	values,
		bool *	nulls
	)

Definition at line 1086 of file copyfromparse.c.

{
    TupleDesc   tupDesc;
    AttrNumber  attr_count;
    FmgrInfo   *in_functions = cstate->in_functions;
    Oid        *typioparams = cstate->typioparams;
    int16       fld_count;
    ListCell   *cur;
 
    tupDesc = RelationGetDescr(cstate->rel);
    attr_count = list_length(cstate->attnumlist);
 
    cstate->cur_lineno++;
 
    if (!CopyGetInt16(cstate, &fld_count))
    {
        /* EOF detected (end of file, or protocol-level EOF) */
        return false;
    }
 
    if (fld_count == -1)
    {
        /*
         * Received EOF marker.  Wait for the protocol-level EOF, and complain
         * if it doesn't come immediately.  In COPY FROM STDIN, this ensures
         * that we correctly handle CopyFail, if client chooses to send that
         * now.  When copying from file, we could ignore the rest of the file
         * like in text mode, but we choose to be consistent with the COPY
         * FROM STDIN case.
         */
        char        dummy;
 
        if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
            ereport(ERROR,
                    (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                     errmsg("received copy data after EOF marker")));
        return false;
    }
 
    if (fld_count != attr_count)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("row field count is %d, expected %d",
                        (int) fld_count, attr_count)));
 
    foreach(cur, cstate->attnumlist)
    {
        int         attnum = lfirst_int(cur);
        int         m = attnum - 1;
        Form_pg_attribute att = TupleDescAttr(tupDesc, m);
 
        cstate->cur_attname = NameStr(att->attname);
        values[m] = CopyReadBinaryAttribute(cstate,
                                            &in_functions[m],
                                            typioparams[m],
                                            att->atttypmod,
                                            &nulls[m]);
        cstate->cur_attname = NULL;
    }
 
    return true;
}

References attnum, CopyFromStateData::attnumlist, CopyGetInt16(), CopyReadBinaryAttribute(), CopyReadBinaryData(), cur, CopyFromStateData::cur_attname, CopyFromStateData::cur_lineno, ereport, errcode(), errmsg(), ERROR, CopyFromStateData::in_functions, lfirst_int, list_length(), NameStr, CopyFromStateData::rel, RelationGetDescr, TupleDescAttr(), CopyFromStateData::typioparams, and values.

◆ CopyFromCSVOneRow()

bool CopyFromCSVOneRow	(	CopyFromState	cstate,
		ExprContext *	econtext,
		Datum *	values,
		bool *	nulls
	)

Definition at line 924 of file copyfromparse.c.

{
    return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);
}

References CopyFromTextLikeOneRow(), and values.

◆ CopyFromTextLikeOneRow()

static pg_attribute_always_inline bool CopyFromTextLikeOneRow	(	CopyFromState	cstate,
		ExprContext *	econtext,
		Datum *	values,
		bool *	nulls,
		bool	is_csv
	)

static

Definition at line 937 of file copyfromparse.c.

{
    TupleDesc   tupDesc;
    AttrNumber  attr_count;
    FmgrInfo   *in_functions = cstate->in_functions;
    Oid        *typioparams = cstate->typioparams;
    ExprState **defexprs = cstate->defexprs;
    char      **field_strings;
    ListCell   *cur;
    int         fldct;
    int         fieldno;
    char       *string;
 
    tupDesc = RelationGetDescr(cstate->rel);
    attr_count = list_length(cstate->attnumlist);
 
    /* read raw fields in the next line */
    if (!NextCopyFromRawFieldsInternal(cstate, &field_strings, &fldct, is_csv))
        return false;
 
    /* check for overflowing fields */
    if (attr_count > 0 && fldct > attr_count)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("extra data after last expected column")));
 
    fieldno = 0;
 
    /* Loop to read the user attributes on the line. */
    foreach(cur, cstate->attnumlist)
    {
        int         attnum = lfirst_int(cur);
        int         m = attnum - 1;
        Form_pg_attribute att = TupleDescAttr(tupDesc, m);
 
        if (fieldno >= fldct)
            ereport(ERROR,
                    (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                     errmsg("missing data for column \"%s\"",
                            NameStr(att->attname))));
        string = field_strings[fieldno++];
 
        if (cstate->convert_select_flags &&
            !cstate->convert_select_flags[m])
        {
            /* ignore input field, leaving column as NULL */
            continue;
        }
 
        if (is_csv)
        {
            if (string == NULL &&
                cstate->opts.force_notnull_flags[m])
            {
                /*
                 * FORCE_NOT_NULL option is set and column is NULL - convert
                 * it to the NULL string.
                 */
                string = cstate->opts.null_print;
            }
            else if (string != NULL && cstate->opts.force_null_flags[m]
                     && strcmp(string, cstate->opts.null_print) == 0)
            {
                /*
                 * FORCE_NULL option is set and column matches the NULL
                 * string. It must have been quoted, or otherwise the string
                 * would already have been set to NULL. Convert it to NULL as
                 * specified.
                 */
                string = NULL;
            }
        }
 
        cstate->cur_attname = NameStr(att->attname);
        cstate->cur_attval = string;
 
        if (string != NULL)
            nulls[m] = false;
 
        if (cstate->defaults[m])
        {
            /* We must have switched into the per-tuple memory context */
            Assert(econtext != NULL);
            Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
 
            values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
        }
 
        /*
         * If ON_ERROR is specified with IGNORE, skip rows with soft errors
         */
        else if (!InputFunctionCallSafe(&in_functions[m],
                                        string,
                                        typioparams[m],
                                        att->atttypmod,
                                        (Node *) cstate->escontext,
                                        &values[m]))
        {
            Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
 
            cstate->num_errors++;
 
            if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
            {
                /*
                 * Since we emit line number and column info in the below
                 * notice message, we suppress error context information other
                 * than the relation name.
                 */
                Assert(!cstate->relname_only);
                cstate->relname_only = true;
 
                if (cstate->cur_attval)
                {
                    char       *attval;
 
                    attval = CopyLimitPrintoutLength(cstate->cur_attval);
                    ereport(NOTICE,
                            errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
                                   cstate->cur_lineno,
                                   cstate->cur_attname,
                                   attval));
                    pfree(attval);
                }
                else
                    ereport(NOTICE,
                            errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
                                   cstate->cur_lineno,
                                   cstate->cur_attname));
 
                /* reset relname_only */
                cstate->relname_only = false;
            }
 
            return true;
        }
 
        cstate->cur_attname = NULL;
        cstate->cur_attval = NULL;
    }
 
    Assert(fieldno == attr_count);
 
    return true;
}

Referenced by CopyFromCSVOneRow(), and CopyFromTextOneRow().

◆ CopyFromTextOneRow()

bool CopyFromTextOneRow	(	CopyFromState	cstate,
		ExprContext *	econtext,
		Datum *	values,
		bool *	nulls
	)

Definition at line 916 of file copyfromparse.c.

{
    return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);
}

References CopyFromTextLikeOneRow(), and values.

◆ CopyGetData()

static int CopyGetData	(	CopyFromState	cstate,
		void *	databuf,
		int	minread,
		int	maxread
	)

static

Definition at line 245 of file copyfromparse.c.

{
    int         bytesread = 0;
 
    switch (cstate->copy_src)
    {
        case COPY_FILE:
            bytesread = fread(databuf, 1, maxread, cstate->copy_file);
            if (ferror(cstate->copy_file))
                ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not read from COPY file: %m")));
            if (bytesread == 0)
                cstate->raw_reached_eof = true;
            break;
        case COPY_FRONTEND:
            while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
            {
                int         avail;
 
                while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
                {
                    /* Try to receive another message */
                    int         mtype;
                    int         maxmsglen;
 
            readmessage:
                    HOLD_CANCEL_INTERRUPTS();
                    pq_startmsgread();
                    mtype = pq_getbyte();
                    if (mtype == EOF)
                        ereport(ERROR,
                                (errcode(ERRCODE_CONNECTION_FAILURE),
                                 errmsg("unexpected EOF on client connection with an open transaction")));
                    /* Validate message type and set packet size limit */
                    switch (mtype)
                    {
                        case PqMsg_CopyData:
                            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
                            break;
                        case PqMsg_CopyDone:
                        case PqMsg_CopyFail:
                        case PqMsg_Flush:
                        case PqMsg_Sync:
                            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
                            break;
                        default:
                            ereport(ERROR,
                                    (errcode(ERRCODE_PROTOCOL_VIOLATION),
                                     errmsg("unexpected message type 0x%02X during COPY from stdin",
                                            mtype)));
                            maxmsglen = 0;  /* keep compiler quiet */
                            break;
                    }
                    /* Now collect the message body */
                    if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
                        ereport(ERROR,
                                (errcode(ERRCODE_CONNECTION_FAILURE),
                                 errmsg("unexpected EOF on client connection with an open transaction")));
                    RESUME_CANCEL_INTERRUPTS();
                    /* ... and process it */
                    switch (mtype)
                    {
                        case PqMsg_CopyData:
                            break;
                        case PqMsg_CopyDone:
                            /* COPY IN correctly terminated by frontend */
                            cstate->raw_reached_eof = true;
                            return bytesread;
                        case PqMsg_CopyFail:
                            ereport(ERROR,
                                    (errcode(ERRCODE_QUERY_CANCELED),
                                     errmsg("COPY from stdin failed: %s",
                                            pq_getmsgstring(cstate->fe_msgbuf))));
                            break;
                        case PqMsg_Flush:
                        case PqMsg_Sync:
 
                            /*
                             * Ignore Flush/Sync for the convenience of client
                             * libraries (such as libpq) that may send those
                             * without noticing that the command they just
                             * sent was COPY.
                             */
                            goto readmessage;
                        default:
                            Assert(false);  /* NOT REACHED */
                    }
                }
                avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
                if (avail > maxread)
                    avail = maxread;
                pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
                databuf = (void *) ((char *) databuf + avail);
                maxread -= avail;
                bytesread += avail;
            }
            break;
        case COPY_CALLBACK:
            bytesread = cstate->data_source_cb(databuf, minread, maxread);
            break;
    }
 
    return bytesread;
}

References Assert(), COPY_CALLBACK, COPY_FILE, CopyFromStateData::copy_file, COPY_FRONTEND, CopyFromStateData::copy_src, StringInfoData::cursor, CopyFromStateData::data_source_cb, ereport, errcode(), errcode_for_file_access(), errmsg(), ERROR, CopyFromStateData::fe_msgbuf, HOLD_CANCEL_INTERRUPTS, StringInfoData::len, pq_copymsgbytes(), pq_getbyte(), pq_getmessage(), pq_getmsgstring(), PQ_LARGE_MESSAGE_LIMIT, PQ_SMALL_MESSAGE_LIMIT, pq_startmsgread(), PqMsg_CopyData, PqMsg_CopyDone, PqMsg_CopyFail, PqMsg_Flush, PqMsg_Sync, CopyFromStateData::raw_reached_eof, and RESUME_CANCEL_INTERRUPTS.

Referenced by CopyLoadRawBuf(), and CopyReadLine().

◆ CopyGetInt16()

static bool CopyGetInt16	(	CopyFromState	cstate,
		int16 *	val
	)

inlinestatic

Definition at line 379 of file copyfromparse.c.

{
    uint16      buf;
 
    if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
    {
        *val = 0;               /* suppress compiler warning */
        return false;
    }
    *val = (int16) pg_ntoh16(buf);
    return true;
}

References buf, CopyReadBinaryData(), pg_ntoh16, and val.

Referenced by CopyFromBinaryOneRow().

◆ CopyGetInt32()

static bool CopyGetInt32	(	CopyFromState	cstate,
		int32 *	val
	)

inlinestatic

Definition at line 362 of file copyfromparse.c.

{
    uint32      buf;
 
    if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
    {
        *val = 0;               /* suppress compiler warning */
        return false;
    }
    *val = (int32) pg_ntoh32(buf);
    return true;
}

References buf, CopyReadBinaryData(), pg_ntoh32, and val.

Referenced by CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

◆ CopyLoadInputBuf()

static void CopyLoadInputBuf ( CopyFromState cstate )

static

Definition at line 650 of file copyfromparse.c.

{
    int         nbytes = INPUT_BUF_BYTES(cstate);
 
    /*
     * The caller has updated input_buf_index to indicate how much of the
     * input has been consumed and isn't needed anymore.  If input_buf is the
     * same physical area as raw_buf, update raw_buf_index accordingly.
     */
    if (cstate->raw_buf == cstate->input_buf)
    {
        Assert(!cstate->need_transcoding);
        Assert(cstate->input_buf_index >= cstate->raw_buf_index);
        cstate->raw_buf_index = cstate->input_buf_index;
    }
 
    for (;;)
    {
        /* If we now have some unconverted data, try to convert it */
        CopyConvertBuf(cstate);
 
        /* If we now have some more input bytes ready, return them */
        if (INPUT_BUF_BYTES(cstate) > nbytes)
            return;
 
        /*
         * If we reached an invalid byte sequence, or we're at an incomplete
         * multi-byte character but there is no more raw input data, report
         * conversion error.
         */
        if (cstate->input_reached_error)
            CopyConversionError(cstate);
 
        /* no more input, and everything has been converted */
        if (cstate->input_reached_eof)
            break;
 
        /* Try to load more raw data */
        Assert(!cstate->raw_reached_eof);
        CopyLoadRawBuf(cstate);
    }
}

References Assert(), CopyConversionError(), CopyConvertBuf(), CopyLoadRawBuf(), CopyFromStateData::input_buf, INPUT_BUF_BYTES, CopyFromStateData::input_buf_index, CopyFromStateData::input_reached_eof, CopyFromStateData::input_reached_error, CopyFromStateData::need_transcoding, CopyFromStateData::raw_buf, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyReadLineText().

◆ CopyLoadRawBuf()

static void CopyLoadRawBuf ( CopyFromState cstate )

static

Definition at line 590 of file copyfromparse.c.

{
    int         nbytes;
    int         inbytes;
 
    /*
     * In text mode, if encoding conversion is not required, raw_buf and
     * input_buf point to the same buffer.  Their len/index better agree, too.
     */
    if (cstate->raw_buf == cstate->input_buf)
    {
        Assert(!cstate->need_transcoding);
        Assert(cstate->raw_buf_index == cstate->input_buf_index);
        Assert(cstate->input_buf_len <= cstate->raw_buf_len);
    }
 
    /*
     * Copy down the unprocessed data if any.
     */
    nbytes = RAW_BUF_BYTES(cstate);
    if (nbytes > 0 && cstate->raw_buf_index > 0)
        memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
                nbytes);
    cstate->raw_buf_len -= cstate->raw_buf_index;
    cstate->raw_buf_index = 0;
 
    /*
     * If raw_buf and input_buf are in fact the same buffer, adjust the
     * input_buf variables, too.
     */
    if (cstate->raw_buf == cstate->input_buf)
    {
        cstate->input_buf_len -= cstate->input_buf_index;
        cstate->input_buf_index = 0;
    }
 
    /* Load more data */
    inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
                          1, RAW_BUF_SIZE - cstate->raw_buf_len);
    nbytes += inbytes;
    cstate->raw_buf[nbytes] = '\0';
    cstate->raw_buf_len = nbytes;
 
    cstate->bytes_processed += inbytes;
    pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
 
    if (inbytes == 0)
        cstate->raw_reached_eof = true;
}

References Assert(), CopyFromStateData::bytes_processed, CopyGetData(), CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, CopyFromStateData::need_transcoding, pgstat_progress_update_param(), PROGRESS_COPY_BYTES_PROCESSED, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, RAW_BUF_SIZE, and CopyFromStateData::raw_reached_eof.

Referenced by CopyLoadInputBuf(), and CopyReadBinaryData().

◆ CopyReadAttributesCSV()

static int CopyReadAttributesCSV ( CopyFromState cstate )

static

Definition at line 1818 of file copyfromparse.c.

{
    char        delimc = cstate->opts.delim[0];
    char        quotec = cstate->opts.quote[0];
    char        escapec = cstate->opts.escape[0];
    int         fieldno;
    char       *output_ptr;
    char       *cur_ptr;
    char       *line_end_ptr;
 
    /*
     * We need a special case for zero-column tables: check that the input
     * line is empty, and return.
     */
    if (cstate->max_fields <= 0)
    {
        if (cstate->line_buf.len != 0)
            ereport(ERROR,
                    (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                     errmsg("extra data after last expected column")));
        return 0;
    }
 
    resetStringInfo(&cstate->attribute_buf);
 
    /*
     * The de-escaped attributes will certainly not be longer than the input
     * data line, so we can just force attribute_buf to be large enough and
     * then transfer data without any checks for enough space.  We need to do
     * it this way because enlarging attribute_buf mid-stream would invalidate
     * pointers already stored into cstate->raw_fields[].
     */
    if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
        enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    output_ptr = cstate->attribute_buf.data;
 
    /* set pointer variables for loop */
    cur_ptr = cstate->line_buf.data;
    line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
 
    /* Outer loop iterates over fields */
    fieldno = 0;
    for (;;)
    {
        bool        found_delim = false;
        bool        saw_quote = false;
        char       *start_ptr;
        char       *end_ptr;
        int         input_len;
 
        /* Make sure there is enough space for the next value */
        if (fieldno >= cstate->max_fields)
        {
            cstate->max_fields *= 2;
            cstate->raw_fields =
                repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
        }
 
        /* Remember start of field on both input and output sides */
        start_ptr = cur_ptr;
        cstate->raw_fields[fieldno] = output_ptr;
 
        /*
         * Scan data for field,
         *
         * The loop starts in "not quote" mode and then toggles between that
         * and "in quote" mode. The loop exits normally if it is in "not
         * quote" mode and a delimiter or line end is seen.
         */
        for (;;)
        {
            char        c;
 
            /* Not in quote */
            for (;;)
            {
                end_ptr = cur_ptr;
                if (cur_ptr >= line_end_ptr)
                    goto endfield;
                c = *cur_ptr++;
                /* unquoted field delimiter */
                if (c == delimc)
                {
                    found_delim = true;
                    goto endfield;
                }
                /* start of quoted field (or part of field) */
                if (c == quotec)
                {
                    saw_quote = true;
                    break;
                }
                /* Add c to output string */
                *output_ptr++ = c;
            }
 
            /* In quote */
            for (;;)
            {
                end_ptr = cur_ptr;
                if (cur_ptr >= line_end_ptr)
                    ereport(ERROR,
                            (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                             errmsg("unterminated CSV quoted field")));
 
                c = *cur_ptr++;
 
                /* escape within a quoted field */
                if (c == escapec)
                {
                    /*
                     * peek at the next char if available, and escape it if it
                     * is an escape char or a quote char
                     */
                    if (cur_ptr < line_end_ptr)
                    {
                        char        nextc = *cur_ptr;
 
                        if (nextc == escapec || nextc == quotec)
                        {
                            *output_ptr++ = nextc;
                            cur_ptr++;
                            continue;
                        }
                    }
                }
 
                /*
                 * end of quoted field. Must do this test after testing for
                 * escape in case quote char and escape char are the same
                 * (which is the common case).
                 */
                if (c == quotec)
                    break;
 
                /* Add c to output string */
                *output_ptr++ = c;
            }
        }
endfield:
 
        /* Terminate attribute value in output area */
        *output_ptr++ = '\0';
 
        /* Check whether raw input matched null marker */
        input_len = end_ptr - start_ptr;
        if (!saw_quote && input_len == cstate->opts.null_print_len &&
            strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
            cstate->raw_fields[fieldno] = NULL;
        /* Check whether raw input matched default marker */
        else if (fieldno < list_length(cstate->attnumlist) &&
                 cstate->opts.default_print &&
                 input_len == cstate->opts.default_print_len &&
                 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
        {
            /* fieldno is 0-index and attnum is 1-index */
            int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
 
            if (cstate->defexprs[m] != NULL)
            {
                /* defaults contain entries for all physical attributes */
                cstate->defaults[m] = true;
            }
            else
            {
                TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
                Form_pg_attribute att = TupleDescAttr(tupDesc, m);
 
                ereport(ERROR,
                        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                         errmsg("unexpected default marker in COPY data"),
                         errdetail("Column \"%s\" has no default value.",
                                   NameStr(att->attname))));
            }
        }
 
        fieldno++;
        /* Done if we hit EOL instead of a delim */
        if (!found_delim)
            break;
    }
 
    /* Clean up state of attribute_buf */
    output_ptr--;
    Assert(*output_ptr == '\0');
    cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
 
    return fieldno;
}

References Assert(), CopyFromStateData::attnumlist, CopyFromStateData::attribute_buf, StringInfoData::data, CopyFormatOptions::default_print, CopyFormatOptions::default_print_len, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFormatOptions::delim, enlargeStringInfo(), ereport, errcode(), errdetail(), errmsg(), ERROR, CopyFormatOptions::escape, StringInfoData::len, CopyFromStateData::line_buf, list_length(), list_nth_int(), CopyFromStateData::max_fields, StringInfoData::maxlen, NameStr, CopyFormatOptions::null_print, CopyFormatOptions::null_print_len, CopyFromStateData::opts, CopyFormatOptions::quote, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, repalloc(), resetStringInfo(), and TupleDescAttr().

Referenced by NextCopyFromRawFieldsInternal().

◆ CopyReadAttributesText()

static int CopyReadAttributesText ( CopyFromState cstate )

static

Definition at line 1564 of file copyfromparse.c.

{
    char        delimc = cstate->opts.delim[0];
    int         fieldno;
    char       *output_ptr;
    char       *cur_ptr;
    char       *line_end_ptr;
 
    /*
     * We need a special case for zero-column tables: check that the input
     * line is empty, and return.
     */
    if (cstate->max_fields <= 0)
    {
        if (cstate->line_buf.len != 0)
            ereport(ERROR,
                    (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                     errmsg("extra data after last expected column")));
        return 0;
    }
 
    resetStringInfo(&cstate->attribute_buf);
 
    /*
     * The de-escaped attributes will certainly not be longer than the input
     * data line, so we can just force attribute_buf to be large enough and
     * then transfer data without any checks for enough space.  We need to do
     * it this way because enlarging attribute_buf mid-stream would invalidate
     * pointers already stored into cstate->raw_fields[].
     */
    if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
        enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    output_ptr = cstate->attribute_buf.data;
 
    /* set pointer variables for loop */
    cur_ptr = cstate->line_buf.data;
    line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
 
    /* Outer loop iterates over fields */
    fieldno = 0;
    for (;;)
    {
        bool        found_delim = false;
        char       *start_ptr;
        char       *end_ptr;
        int         input_len;
        bool        saw_non_ascii = false;
 
        /* Make sure there is enough space for the next value */
        if (fieldno >= cstate->max_fields)
        {
            cstate->max_fields *= 2;
            cstate->raw_fields =
                repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
        }
 
        /* Remember start of field on both input and output sides */
        start_ptr = cur_ptr;
        cstate->raw_fields[fieldno] = output_ptr;
 
        /*
         * Scan data for field.
         *
         * Note that in this loop, we are scanning to locate the end of field
         * and also speculatively performing de-escaping.  Once we find the
         * end-of-field, we can match the raw field contents against the null
         * marker string.  Only after that comparison fails do we know that
         * de-escaping is actually the right thing to do; therefore we *must
         * not* throw any syntax errors before we've done the null-marker
         * check.
         */
        for (;;)
        {
            char        c;
 
            end_ptr = cur_ptr;
            if (cur_ptr >= line_end_ptr)
                break;
            c = *cur_ptr++;
            if (c == delimc)
            {
                found_delim = true;
                break;
            }
            if (c == '\\')
            {
                if (cur_ptr >= line_end_ptr)
                    break;
                c = *cur_ptr++;
                switch (c)
                {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                        {
                            /* handle \013 */
                            int         val;
 
                            val = OCTVALUE(c);
                            if (cur_ptr < line_end_ptr)
                            {
                                c = *cur_ptr;
                                if (ISOCTAL(c))
                                {
                                    cur_ptr++;
                                    val = (val << 3) + OCTVALUE(c);
                                    if (cur_ptr < line_end_ptr)
                                    {
                                        c = *cur_ptr;
                                        if (ISOCTAL(c))
                                        {
                                            cur_ptr++;
                                            val = (val << 3) + OCTVALUE(c);
                                        }
                                    }
                                }
                            }
                            c = val & 0377;
                            if (c == '\0' || IS_HIGHBIT_SET(c))
                                saw_non_ascii = true;
                        }
                        break;
                    case 'x':
                        /* Handle \x3F */
                        if (cur_ptr < line_end_ptr)
                        {
                            char        hexchar = *cur_ptr;
 
                            if (isxdigit((unsigned char) hexchar))
                            {
                                int         val = GetDecimalFromHex(hexchar);
 
                                cur_ptr++;
                                if (cur_ptr < line_end_ptr)
                                {
                                    hexchar = *cur_ptr;
                                    if (isxdigit((unsigned char) hexchar))
                                    {
                                        cur_ptr++;
                                        val = (val << 4) + GetDecimalFromHex(hexchar);
                                    }
                                }
                                c = val & 0xff;
                                if (c == '\0' || IS_HIGHBIT_SET(c))
                                    saw_non_ascii = true;
                            }
                        }
                        break;
                    case 'b':
                        c = '\b';
                        break;
                    case 'f':
                        c = '\f';
                        break;
                    case 'n':
                        c = '\n';
                        break;
                    case 'r':
                        c = '\r';
                        break;
                    case 't':
                        c = '\t';
                        break;
                    case 'v':
                        c = '\v';
                        break;
 
                        /*
                         * in all other cases, take the char after '\'
                         * literally
                         */
                }
            }
 
            /* Add c to output string */
            *output_ptr++ = c;
        }
 
        /* Check whether raw input matched null marker */
        input_len = end_ptr - start_ptr;
        if (input_len == cstate->opts.null_print_len &&
            strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
            cstate->raw_fields[fieldno] = NULL;
        /* Check whether raw input matched default marker */
        else if (fieldno < list_length(cstate->attnumlist) &&
                 cstate->opts.default_print &&
                 input_len == cstate->opts.default_print_len &&
                 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
        {
            /* fieldno is 0-indexed and attnum is 1-indexed */
            int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
 
            if (cstate->defexprs[m] != NULL)
            {
                /* defaults contain entries for all physical attributes */
                cstate->defaults[m] = true;
            }
            else
            {
                TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
                Form_pg_attribute att = TupleDescAttr(tupDesc, m);
 
                ereport(ERROR,
                        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                         errmsg("unexpected default marker in COPY data"),
                         errdetail("Column \"%s\" has no default value.",
                                   NameStr(att->attname))));
            }
        }
        else
        {
            /*
             * At this point we know the field is supposed to contain data.
             *
             * If we de-escaped any non-7-bit-ASCII chars, make sure the
             * resulting string is valid data for the db encoding.
             */
            if (saw_non_ascii)
            {
                char       *fld = cstate->raw_fields[fieldno];
 
                pg_verifymbstr(fld, output_ptr - fld, false);
            }
        }
 
        /* Terminate attribute value in output area */
        *output_ptr++ = '\0';
 
        fieldno++;
        /* Done if we hit EOL instead of a delim */
        if (!found_delim)
            break;
    }
 
    /* Clean up state of attribute_buf */
    output_ptr--;
    Assert(*output_ptr == '\0');
    cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
 
    return fieldno;
}

Referenced by NextCopyFromRawFieldsInternal().

◆ CopyReadBinaryAttribute()

static Datum CopyReadBinaryAttribute	(	CopyFromState	cstate,
		FmgrInfo *	flinfo,
		Oid	typioparam,
		int32	typmod,
		bool *	isnull
	)

static

Definition at line 2013 of file copyfromparse.c.

{
    int32       fld_size;
    Datum       result;
 
    if (!CopyGetInt32(cstate, &fld_size))
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("unexpected EOF in COPY data")));
    if (fld_size == -1)
    {
        *isnull = true;
        return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
    }
    if (fld_size < 0)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("invalid field size")));
 
    /* reset attribute_buf to empty, and load raw data in it */
    resetStringInfo(&cstate->attribute_buf);
 
    enlargeStringInfo(&cstate->attribute_buf, fld_size);
    if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
                           fld_size) != fld_size)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("unexpected EOF in COPY data")));
 
    cstate->attribute_buf.len = fld_size;
    cstate->attribute_buf.data[fld_size] = '\0';
 
    /* Call the column type's binary input converter */
    result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
                                 typioparam, typmod);
 
    /* Trouble if it didn't eat the whole buffer */
    if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
                 errmsg("incorrect binary data format")));
 
    *isnull = false;
    return result;
}

References CopyFromStateData::attribute_buf, CopyGetInt32(), CopyReadBinaryData(), StringInfoData::cursor, StringInfoData::data, enlargeStringInfo(), ereport, errcode(), errmsg(), ERROR, StringInfoData::len, ReceiveFunctionCall(), and resetStringInfo().

Referenced by CopyFromBinaryOneRow().

◆ CopyReadBinaryData()

static int CopyReadBinaryData	(	CopyFromState	cstate,
		char *	dest,
		int	nbytes
	)

static

Definition at line 701 of file copyfromparse.c.

{
    int         copied_bytes = 0;
 
    if (RAW_BUF_BYTES(cstate) >= nbytes)
    {
        /* Enough bytes are present in the buffer. */
        memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
        cstate->raw_buf_index += nbytes;
        copied_bytes = nbytes;
    }
    else
    {
        /*
         * Not enough bytes in the buffer, so must read from the file.  Need
         * to loop since 'nbytes' could be larger than the buffer size.
         */
        do
        {
            int         copy_bytes;
 
            /* Load more data if buffer is empty. */
            if (RAW_BUF_BYTES(cstate) == 0)
            {
                CopyLoadRawBuf(cstate);
                if (cstate->raw_reached_eof)
                    break;      /* EOF */
            }
 
            /* Transfer some bytes. */
            copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
            memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
            cstate->raw_buf_index += copy_bytes;
            dest += copy_bytes;
            copied_bytes += copy_bytes;
        } while (copied_bytes < nbytes);
    }
 
    return copied_bytes;
}

References CopyLoadRawBuf(), generate_unaccent_rules::dest, Min, CopyFromStateData::raw_buf, RAW_BUF_BYTES, CopyFromStateData::raw_buf_index, and CopyFromStateData::raw_reached_eof.

Referenced by CopyFromBinaryOneRow(), CopyGetInt16(), CopyGetInt32(), CopyReadBinaryAttribute(), and ReceiveCopyBinaryHeader().

◆ CopyReadLine()

static bool CopyReadLine	(	CopyFromState	cstate,
		bool	is_csv
	)

static

Definition at line 1158 of file copyfromparse.c.

{
    bool        result;
 
    resetStringInfo(&cstate->line_buf);
    cstate->line_buf_valid = false;
 
    /* Parse data and transfer into line_buf */
    result = CopyReadLineText(cstate, is_csv);
 
    if (result)
    {
        /*
         * Reached EOF.  In protocol version 3, we should ignore anything
         * after \. up to the protocol end of copy data.  (XXX maybe better
         * not to treat \. as special?)
         */
        if (cstate->copy_src == COPY_FRONTEND)
        {
            int         inbytes;
 
            do
            {
                inbytes = CopyGetData(cstate, cstate->input_buf,
                                      1, INPUT_BUF_SIZE);
            } while (inbytes > 0);
            cstate->input_buf_index = 0;
            cstate->input_buf_len = 0;
            cstate->raw_buf_index = 0;
            cstate->raw_buf_len = 0;
        }
    }
    else
    {
        /*
         * If we didn't hit EOF, then we must have transferred the EOL marker
         * to line_buf along with the data.  Get rid of it.
         */
        switch (cstate->eol_type)
        {
            case EOL_NL:
                Assert(cstate->line_buf.len >= 1);
                Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
                cstate->line_buf.len--;
                cstate->line_buf.data[cstate->line_buf.len] = '\0';
                break;
            case EOL_CR:
                Assert(cstate->line_buf.len >= 1);
                Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
                cstate->line_buf.len--;
                cstate->line_buf.data[cstate->line_buf.len] = '\0';
                break;
            case EOL_CRNL:
                Assert(cstate->line_buf.len >= 2);
                Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
                Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
                cstate->line_buf.len -= 2;
                cstate->line_buf.data[cstate->line_buf.len] = '\0';
                break;
            case EOL_UNKNOWN:
                /* shouldn't get here */
                Assert(false);
                break;
        }
    }
 
    /* Now it's safe to use the buffer in error messages */
    cstate->line_buf_valid = true;
 
    return result;
}

References Assert(), COPY_FRONTEND, CopyFromStateData::copy_src, CopyGetData(), CopyReadLineText(), StringInfoData::data, EOL_CR, EOL_CRNL, EOL_NL, CopyFromStateData::eol_type, EOL_UNKNOWN, CopyFromStateData::input_buf, CopyFromStateData::input_buf_index, CopyFromStateData::input_buf_len, INPUT_BUF_SIZE, StringInfoData::len, CopyFromStateData::line_buf, CopyFromStateData::line_buf_valid, CopyFromStateData::raw_buf_index, CopyFromStateData::raw_buf_len, and resetStringInfo().

Referenced by NextCopyFromRawFieldsInternal().

◆ CopyReadLineText()

static bool CopyReadLineText	(	CopyFromState	cstate,
		bool	is_csv
	)

static

Definition at line 1234 of file copyfromparse.c.

{
    char       *copy_input_buf;
    int         input_buf_ptr;
    int         copy_buf_len;
    bool        need_data = false;
    bool        hit_eof = false;
    bool        result = false;
 
    /* CSV variables */
    bool        in_quote = false,
                last_was_esc = false;
    char        quotec = '\0';
    char        escapec = '\0';
 
    if (is_csv)
    {
        quotec = cstate->opts.quote[0];
        escapec = cstate->opts.escape[0];
        /* ignore special escape processing if it's the same as quotec */
        if (quotec == escapec)
            escapec = '\0';
    }
 
    /*
     * The objective of this loop is to transfer the entire next input line
     * into line_buf.  Hence, we only care for detecting newlines (\r and/or
     * \n) and the end-of-copy marker (\.).
     *
     * In CSV mode, \r and \n inside a quoted field are just part of the data
     * value and are put in line_buf.  We keep just enough state to know if we
     * are currently in a quoted field or not.
     *
     * The input has already been converted to the database encoding.  All
     * supported server encodings have the property that all bytes in a
     * multi-byte sequence have the high bit set, so a multibyte character
     * cannot contain any newline or escape characters embedded in the
     * multibyte sequence.  Therefore, we can process the input byte-by-byte,
     * regardless of the encoding.
     *
     * For speed, we try to move data from input_buf to line_buf in chunks
     * rather than one character at a time.  input_buf_ptr points to the next
     * character to examine; any characters from input_buf_index to
     * input_buf_ptr have been determined to be part of the line, but not yet
     * transferred to line_buf.
     *
     * For a little extra speed within the loop, we copy input_buf and
     * input_buf_len into local variables.
     */
    copy_input_buf = cstate->input_buf;
    input_buf_ptr = cstate->input_buf_index;
    copy_buf_len = cstate->input_buf_len;
 
    for (;;)
    {
        int         prev_raw_ptr;
        char        c;
 
        /*
         * Load more data if needed.
         *
         * TODO: We could just force four bytes of read-ahead and avoid the
         * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
         * unsafe with the old v2 COPY protocol, but we don't support that
         * anymore.
         */
        if (input_buf_ptr >= copy_buf_len || need_data)
        {
            REFILL_LINEBUF;
 
            CopyLoadInputBuf(cstate);
            /* update our local variables */
            hit_eof = cstate->input_reached_eof;
            input_buf_ptr = cstate->input_buf_index;
            copy_buf_len = cstate->input_buf_len;
 
            /*
             * If we are completely out of data, break out of the loop,
             * reporting EOF.
             */
            if (INPUT_BUF_BYTES(cstate) <= 0)
            {
                result = true;
                break;
            }
            need_data = false;
        }
 
        /* OK to fetch a character */
        prev_raw_ptr = input_buf_ptr;
        c = copy_input_buf[input_buf_ptr++];
 
        if (is_csv)
        {
            /*
             * If character is '\r', we may need to look ahead below.  Force
             * fetch of the next character if we don't already have it.  We
             * need to do this before changing CSV state, in case '\r' is also
             * the quote or escape character.
             */
            if (c == '\r')
            {
                IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
            }
 
            /*
             * Dealing with quotes and escapes here is mildly tricky. If the
             * quote char is also the escape char, there's no problem - we
             * just use the char as a toggle. If they are different, we need
             * to ensure that we only take account of an escape inside a
             * quoted field and immediately preceding a quote char, and not
             * the second in an escape-escape sequence.
             */
            if (in_quote && c == escapec)
                last_was_esc = !last_was_esc;
            if (c == quotec && !last_was_esc)
                in_quote = !in_quote;
            if (c != escapec)
                last_was_esc = false;
 
            /*
             * Updating the line count for embedded CR and/or LF chars is
             * necessarily a little fragile - this test is probably about the
             * best we can do.  (XXX it's arguable whether we should do this
             * at all --- is cur_lineno a physical or logical count?)
             */
            if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
                cstate->cur_lineno++;
        }
 
        /* Process \r */
        if (c == '\r' && (!is_csv || !in_quote))
        {
            /* Check for \r\n on first line, _and_ handle \r\n. */
            if (cstate->eol_type == EOL_UNKNOWN ||
                cstate->eol_type == EOL_CRNL)
            {
                /*
                 * If need more data, go back to loop top to load it.
                 *
                 * Note that if we are at EOF, c will wind up as '\0' because
                 * of the guaranteed pad of input_buf.
                 */
                IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
 
                /* get next char */
                c = copy_input_buf[input_buf_ptr];
 
                if (c == '\n')
                {
                    input_buf_ptr++;    /* eat newline */
                    cstate->eol_type = EOL_CRNL;    /* in case not set yet */
                }
                else
                {
                    /* found \r, but no \n */
                    if (cstate->eol_type == EOL_CRNL)
                        ereport(ERROR,
                                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                 !is_csv ?
                                 errmsg("literal carriage return found in data") :
                                 errmsg("unquoted carriage return found in data"),
                                 !is_csv ?
                                 errhint("Use \"\\r\" to represent carriage return.") :
                                 errhint("Use quoted CSV field to represent carriage return.")));
 
                    /*
                     * if we got here, it is the first line and we didn't find
                     * \n, so don't consume the peeked character
                     */
                    cstate->eol_type = EOL_CR;
                }
            }
            else if (cstate->eol_type == EOL_NL)
                ereport(ERROR,
                        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                         !is_csv ?
                         errmsg("literal carriage return found in data") :
                         errmsg("unquoted carriage return found in data"),
                         !is_csv ?
                         errhint("Use \"\\r\" to represent carriage return.") :
                         errhint("Use quoted CSV field to represent carriage return.")));
            /* If reach here, we have found the line terminator */
            break;
        }
 
        /* Process \n */
        if (c == '\n' && (!is_csv || !in_quote))
        {
            if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
                ereport(ERROR,
                        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                         !is_csv ?
                         errmsg("literal newline found in data") :
                         errmsg("unquoted newline found in data"),
                         !is_csv ?
                         errhint("Use \"\\n\" to represent newline.") :
                         errhint("Use quoted CSV field to represent newline.")));
            cstate->eol_type = EOL_NL;  /* in case not set yet */
            /* If reach here, we have found the line terminator */
            break;
        }
 
        /*
         * Process backslash, except in CSV mode where backslash is a normal
         * character.
         */
        if (c == '\\' && !is_csv)
        {
            char        c2;
 
            IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
            IF_NEED_REFILL_AND_EOF_BREAK(0);
 
            /* -----
             * get next character
             * Note: we do not change c so if it isn't \., we can fall
             * through and continue processing.
             * -----
             */
            c2 = copy_input_buf[input_buf_ptr];
 
            if (c2 == '.')
            {
                input_buf_ptr++;    /* consume the '.' */
                if (cstate->eol_type == EOL_CRNL)
                {
                    /* Get the next character */
                    IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
                    /* if hit_eof, c2 will become '\0' */
                    c2 = copy_input_buf[input_buf_ptr++];
 
                    if (c2 == '\n')
                        ereport(ERROR,
                                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                 errmsg("end-of-copy marker does not match previous newline style")));
                    else if (c2 != '\r')
                        ereport(ERROR,
                                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                 errmsg("end-of-copy marker is not alone on its line")));
                }
 
                /* Get the next character */
                IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
                /* if hit_eof, c2 will become '\0' */
                c2 = copy_input_buf[input_buf_ptr++];
 
                if (c2 != '\r' && c2 != '\n')
                    ereport(ERROR,
                            (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                             errmsg("end-of-copy marker is not alone on its line")));
 
                if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
                    (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
                    (cstate->eol_type == EOL_CR && c2 != '\r'))
                    ereport(ERROR,
                            (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                             errmsg("end-of-copy marker does not match previous newline style")));
 
                /*
                 * If there is any data on this line before the \., complain.
                 */
                if (cstate->line_buf.len > 0 ||
                    prev_raw_ptr > cstate->input_buf_index)
                    ereport(ERROR,
                            (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                             errmsg("end-of-copy marker is not alone on its line")));
 
                /*
                 * Discard the \. and newline, then report EOF.
                 */
                cstate->input_buf_index = input_buf_ptr;
                result = true;  /* report EOF */
                break;
            }
            else
            {
                /*
                 * If we are here, it means we found a backslash followed by
                 * something other than a period.  In non-CSV mode, anything
                 * after a backslash is special, so we skip over that second
                 * character too.  If we didn't do that \\. would be
                 * considered an eof-of copy, while in non-CSV mode it is a
                 * literal backslash followed by a period.
                 */
                input_buf_ptr++;
            }
        }
    }                           /* end of outer loop */
 
    /*
     * Transfer any still-uncopied data to line_buf.
     */
    REFILL_LINEBUF;
 
    return result;
}

Referenced by CopyReadLine().

◆ GetDecimalFromHex()

static int GetDecimalFromHex ( char hex )

static

Definition at line 1536 of file copyfromparse.c.

{
    if (isdigit((unsigned char) hex))
        return hex - '0';
    else
        return tolower((unsigned char) hex) - 'a' + 10;
}

Referenced by CopyReadAttributesText().

◆ NextCopyFrom()

bool NextCopyFrom	(	CopyFromState	cstate,
		ExprContext *	econtext,
		Datum *	values,
		bool *	nulls
	)

Definition at line 871 of file copyfromparse.c.

{
    TupleDesc   tupDesc;
    AttrNumber  num_phys_attrs,
                num_defaults = cstate->num_defaults;
    int         i;
    int        *defmap = cstate->defmap;
    ExprState **defexprs = cstate->defexprs;
 
    tupDesc = RelationGetDescr(cstate->rel);
    num_phys_attrs = tupDesc->natts;
 
    /* Initialize all values for row to NULL */
    MemSet(values, 0, num_phys_attrs * sizeof(Datum));
    MemSet(nulls, true, num_phys_attrs * sizeof(bool));
    MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
 
    /* Get one row from source */
    if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))
        return false;
 
    /*
     * Now compute and insert any defaults available for the columns not
     * provided by the input data.  Anything not processed here or above will
     * remain NULL.
     */
    for (i = 0; i < num_defaults; i++)
    {
        /*
         * The caller must supply econtext and have switched into the
         * per-tuple memory context in it.
         */
        Assert(econtext != NULL);
        Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
 
        values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
                                         &nulls[defmap[i]]);
    }
 
    return true;
}

References Assert(), CopyFromRoutine::CopyFromOneRow, CurrentMemoryContext, CopyFromStateData::defaults, CopyFromStateData::defexprs, CopyFromStateData::defmap, ExprContext::ecxt_per_tuple_memory, ExecEvalExpr(), i, MemSet, TupleDescData::natts, CopyFromStateData::num_defaults, CopyFromStateData::rel, RelationGetDescr, CopyFromStateData::routine, and values.

Referenced by CopyFrom(), file_acquire_sample_rows(), and fileIterateForeignScan().

◆ NextCopyFromRawFields()

bool NextCopyFromRawFields	(	CopyFromState	cstate,
		char ***	fields,
		int *	nfields
	)

Definition at line 747 of file copyfromparse.c.

{
    return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
                                         cstate->opts.csv_mode);
}

References CopyFormatOptions::csv_mode, NextCopyFromRawFieldsInternal(), and CopyFromStateData::opts.

◆ NextCopyFromRawFieldsInternal()

static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal	(	CopyFromState	cstate,
		char ***	fields,
		int *	nfields,
		bool	is_csv
	)

static

Definition at line 771 of file copyfromparse.c.

{
    int         fldct;
    bool        done;
 
    /* only available for text or csv input */
    Assert(!cstate->opts.binary);
 
    /* on input check that the header line is correct if needed */
    if (cstate->cur_lineno == 0 && cstate->opts.header_line)
    {
        ListCell   *cur;
        TupleDesc   tupDesc;
 
        tupDesc = RelationGetDescr(cstate->rel);
 
        cstate->cur_lineno++;
        done = CopyReadLine(cstate, is_csv);
 
        if (cstate->opts.header_line == COPY_HEADER_MATCH)
        {
            int         fldnum;
 
            if (is_csv)
                fldct = CopyReadAttributesCSV(cstate);
            else
                fldct = CopyReadAttributesText(cstate);
 
            if (fldct != list_length(cstate->attnumlist))
                ereport(ERROR,
                        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                         errmsg("wrong number of fields in header line: got %d, expected %d",
                                fldct, list_length(cstate->attnumlist))));
 
            fldnum = 0;
            foreach(cur, cstate->attnumlist)
            {
                int         attnum = lfirst_int(cur);
                char       *colName;
                Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
 
                Assert(fldnum < cstate->max_fields);
 
                colName = cstate->raw_fields[fldnum++];
                if (colName == NULL)
                    ereport(ERROR,
                            (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                             errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
                                    fldnum, cstate->opts.null_print, NameStr(attr->attname))));
 
                if (namestrcmp(&attr->attname, colName) != 0)
                {
                    ereport(ERROR,
                            (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                             errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
                                    fldnum, colName, NameStr(attr->attname))));
                }
            }
        }
 
        if (done)
            return false;
    }
 
    cstate->cur_lineno++;
 
    /* Actually read the line into memory here */
    done = CopyReadLine(cstate, is_csv);
 
    /*
     * EOF at start of line means we're done.  If we see EOF after some
     * characters, we act as though it was newline followed by EOF, ie,
     * process the line and then exit loop on next iteration.
     */
    if (done && cstate->line_buf.len == 0)
        return false;
 
    /* Parse the line into de-escaped field values */
    if (is_csv)
        fldct = CopyReadAttributesCSV(cstate);
    else
        fldct = CopyReadAttributesText(cstate);
 
    *fields = cstate->raw_fields;
    *nfields = fldct;
    return true;
}

References Assert(), attnum, CopyFromStateData::attnumlist, CopyFormatOptions::binary, COPY_HEADER_MATCH, CopyReadAttributesCSV(), CopyReadAttributesText(), CopyReadLine(), cur, CopyFromStateData::cur_lineno, ereport, errcode(), errmsg(), ERROR, CopyFormatOptions::header_line, StringInfoData::len, lfirst_int, CopyFromStateData::line_buf, list_length(), NameStr, namestrcmp(), CopyFormatOptions::null_print, CopyFromStateData::opts, CopyFromStateData::raw_fields, CopyFromStateData::rel, RelationGetDescr, and TupleDescAttr().

Referenced by CopyFromTextLikeOneRow(), and NextCopyFromRawFields().

◆ ReceiveCopyBegin()

void ReceiveCopyBegin ( CopyFromState cstate )

Definition at line 170 of file copyfromparse.c.

{
    StringInfoData buf;
    int         natts = list_length(cstate->attnumlist);
    int16       format = (cstate->opts.binary ? 1 : 0);
    int         i;
 
    pq_beginmessage(&buf, PqMsg_CopyInResponse);
    pq_sendbyte(&buf, format);  /* overall format */
    pq_sendint16(&buf, natts);
    for (i = 0; i < natts; i++)
        pq_sendint16(&buf, format); /* per-column formats */
    pq_endmessage(&buf);
    cstate->copy_src = COPY_FRONTEND;
    cstate->fe_msgbuf = makeStringInfo();
    /* We *must* flush here to ensure FE knows it can send. */
    pq_flush();
}

References CopyFromStateData::attnumlist, CopyFormatOptions::binary, buf, COPY_FRONTEND, CopyFromStateData::copy_src, CopyFromStateData::fe_msgbuf, format, i, list_length(), makeStringInfo(), CopyFromStateData::opts, pq_beginmessage(), pq_endmessage(), pq_flush, pq_sendbyte(), pq_sendint16(), and PqMsg_CopyInResponse.

Referenced by BeginCopyFrom().

◆ ReceiveCopyBinaryHeader()

void ReceiveCopyBinaryHeader ( CopyFromState cstate )

Definition at line 190 of file copyfromparse.c.

{
    char        readSig[11];
    int32       tmp;
 
    /* Signature */
    if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
        memcmp(readSig, BinarySignature, 11) != 0)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("COPY file signature not recognized")));
    /* Flags field */
    if (!CopyGetInt32(cstate, &tmp))
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("invalid COPY file header (missing flags)")));
    if ((tmp & (1 << 16)) != 0)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("invalid COPY file header (WITH OIDS)")));
    tmp &= ~(1 << 16);
    if ((tmp >> 16) != 0)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("unrecognized critical flags in COPY file header")));
    /* Header extension length */
    if (!CopyGetInt32(cstate, &tmp) ||
        tmp < 0)
        ereport(ERROR,
                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                 errmsg("invalid COPY file header (missing length)")));
    /* Skip extension header, if present */
    while (tmp-- > 0)
    {
        if (CopyReadBinaryData(cstate, readSig, 1) != 1)
            ereport(ERROR,
                    (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                     errmsg("invalid COPY file header (wrong length)")));
    }
}

References BinarySignature, CopyGetInt32(), CopyReadBinaryData(), ereport, errcode(), errmsg(), and ERROR.

Referenced by CopyFromBinaryStart().

Variable Documentation

◆ BinarySignature

const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"

static

Definition at line 139 of file copyfromparse.c.

Referenced by ReceiveCopyBinaryHeader().

Macros

Functions

Variables

Macro Definition Documentation

◆ IF_NEED_REFILL_AND_EOF_BREAK

◆ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

◆ ISOCTAL

◆ OCTVALUE

◆ REFILL_LINEBUF

Function Documentation

◆ CopyConversionError()

◆ CopyConvertBuf()

◆ CopyFromBinaryOneRow()

◆ CopyFromCSVOneRow()

◆ CopyFromTextLikeOneRow()

◆ CopyFromTextOneRow()

◆ CopyGetData()

◆ CopyGetInt16()

◆ CopyGetInt32()

◆ CopyLoadInputBuf()

◆ CopyLoadRawBuf()

◆ CopyReadAttributesCSV()

◆ CopyReadAttributesText()

◆ CopyReadBinaryAttribute()

◆ CopyReadBinaryData()

◆ CopyReadLine()

◆ CopyReadLineText()

◆ GetDecimalFromHex()

◆ NextCopyFrom()

◆ NextCopyFromRawFields()

◆ NextCopyFromRawFieldsInternal()

◆ ReceiveCopyBegin()

◆ ReceiveCopyBinaryHeader()

Variable Documentation

◆ BinarySignature