#include "postgres.h"
#include "gramparse.h"
#include "mb/pg_wchar.h"
#include "parser/parser.h"
#include "parser/scansup.h"

Include dependency graph for parser.c:

Functions
static bool	check_uescapechar (unsigned char escape)

static char *	str_udeescape (const char *str, char escape, int position, core_yyscan_t yyscanner)

List *	raw_parser (const char *str, RawParseMode mode)

int	base_yylex (YYSTYPE lvalp, YYLTYPE llocp, core_yyscan_t yyscanner)

static unsigned int	hexval (unsigned char c)

static void	check_unicode_value (pg_wchar c)

Function Documentation

◆ base_yylex()

int base_yylex	(	YYSTYPE *	lvalp,
		YYLTYPE *	llocp,
		core_yyscan_t	yyscanner
	)

Definition at line 111 of file parser.c.

{
    base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
    int         cur_token;
    int         next_token;
    int         cur_token_length;
    YYLTYPE     cur_yylloc;
 
    /* Get next token --- we might already have it */
    if (yyextra->have_lookahead)
    {
        cur_token = yyextra->lookahead_token;
        lvalp->core_yystype = yyextra->lookahead_yylval;
        *llocp = yyextra->lookahead_yylloc;
        if (yyextra->lookahead_end)
            *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
        yyextra->have_lookahead = false;
    }
    else
        cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 
    /*
     * If this token isn't one that requires lookahead, just return it.  If it
     * does, determine the token length.  (We could get that via strlen(), but
     * since we have such a small set of possibilities, hardwiring seems
     * feasible and more efficient --- at least for the fixed-length cases.)
     */
    switch (cur_token)
    {
        case FORMAT:
            cur_token_length = 6;
            break;
        case NOT:
            cur_token_length = 3;
            break;
        case NULLS_P:
            cur_token_length = 5;
            break;
        case WITH:
            cur_token_length = 4;
            break;
        case UIDENT:
        case USCONST:
            cur_token_length = strlen(yyextra->core_yy_extra.scanbuf + *llocp);
            break;
        case WITHOUT:
            cur_token_length = 7;
            break;
        default:
            return cur_token;
    }
 
    /*
     * Identify end+1 of current token.  core_yylex() has temporarily stored a
     * '\0' here, and will undo that when we call it again.  We need to redo
     * it to fully revert the lookahead call for error reporting purposes.
     */
    yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
        *llocp + cur_token_length;
    Assert(*(yyextra->lookahead_end) == '\0');
 
    /*
     * Save and restore *llocp around the call.  It might look like we could
     * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
     * does not work because flex actually holds onto the last-passed pointer
     * internally, and will use that for error reporting.  We need any error
     * reports to point to the current token, not the next one.
     */
    cur_yylloc = *llocp;
 
    /* Get next token, saving outputs into lookahead variables */
    next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
    yyextra->lookahead_token = next_token;
    yyextra->lookahead_yylloc = *llocp;
 
    *llocp = cur_yylloc;
 
    /* Now revert the un-truncation of the current token */
    yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
    *(yyextra->lookahead_end) = '\0';
 
    yyextra->have_lookahead = true;
 
    /* Replace cur_token if needed, based on lookahead */
    switch (cur_token)
    {
        case FORMAT:
            /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
            switch (next_token)
            {
                case JSON:
                    cur_token = FORMAT_LA;
                    break;
            }
            break;
 
        case NOT:
            /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
            switch (next_token)
            {
                case BETWEEN:
                case IN_P:
                case LIKE:
                case ILIKE:
                case SIMILAR:
                    cur_token = NOT_LA;
                    break;
            }
            break;
 
        case NULLS_P:
            /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
            switch (next_token)
            {
                case FIRST_P:
                case LAST_P:
                    cur_token = NULLS_LA;
                    break;
            }
            break;
 
        case WITH:
            /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
            switch (next_token)
            {
                case TIME:
                case ORDINALITY:
                    cur_token = WITH_LA;
                    break;
            }
            break;
 
        case WITHOUT:
            /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
            switch (next_token)
            {
                case TIME:
                    cur_token = WITHOUT_LA;
                    break;
            }
            break;
 
        case UIDENT:
        case USCONST:
            /* Look ahead for UESCAPE */
            if (next_token == UESCAPE)
            {
                /* Yup, so get third token, which had better be SCONST */
                const char *escstr;
 
                /* Again save and restore *llocp */
                cur_yylloc = *llocp;
 
                /* Un-truncate current token so errors point to third token */
                *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
 
                /* Get third token */
                next_token = core_yylex(&(yyextra->lookahead_yylval),
                                        llocp, yyscanner);
 
                /* If we throw error here, it will point to third token */
                if (next_token != SCONST)
                    scanner_yyerror("UESCAPE must be followed by a simple string literal",
                                    yyscanner);
 
                escstr = yyextra->lookahead_yylval.str;
                if (strlen(escstr) != 1 || !check_uescapechar(escstr[0]))
                    scanner_yyerror("invalid Unicode escape character",
                                    yyscanner);
 
                /* Now restore *llocp; errors will point to first token */
                *llocp = cur_yylloc;
 
                /* Apply Unicode conversion */
                lvalp->core_yystype.str =
                    str_udeescape(lvalp->core_yystype.str,
                                  escstr[0],
                                  *llocp,
                                  yyscanner);
 
                /*
                 * We don't need to revert the un-truncation of UESCAPE.  What
                 * we do want to do is clear have_lookahead, thereby consuming
                 * all three tokens.
                 */
                yyextra->have_lookahead = false;
            }
            else
            {
                /* No UESCAPE, so convert using default escape character */
                lvalp->core_yystype.str =
                    str_udeescape(lvalp->core_yystype.str,
                                  '\\',
                                  *llocp,
                                  yyscanner);
            }
 
            if (cur_token == UIDENT)
            {
                /* It's an identifier, so truncate as appropriate */
                truncate_identifier(lvalp->core_yystype.str,
                                    strlen(lvalp->core_yystype.str),
                                    true);
                cur_token = IDENT;
            }
            else if (cur_token == USCONST)
            {
                cur_token = SCONST;
            }
            break;
    }
 
    return cur_token;
}

References Assert(), check_uescapechar(), core_yylex(), next_token(), pg_yyget_extra, scanner_yyerror(), str_udeescape(), truncate_identifier(), and yyextra.

Referenced by base_yylex_location().

◆ check_uescapechar()

static bool check_uescapechar ( unsigned char escape )

static

Definition at line 352 of file parser.c.

{
    if (isxdigit(escape)
        || escape == '+'
        || escape == '\''
        || escape == '"'
        || scanner_isspace(escape))
        return false;
    else
        return true;
}

References scanner_isspace().

Referenced by base_yylex().

◆ check_unicode_value()

static void check_unicode_value ( pg_wchar c )

static

Definition at line 342 of file parser.c.

{
    if (!is_valid_unicode_codepoint(c))
        ereport(ERROR,
                (errcode(ERRCODE_SYNTAX_ERROR),
                 errmsg("invalid Unicode escape value")));
}

References ereport, errcode(), errmsg(), ERROR, and is_valid_unicode_codepoint().

Referenced by str_udeescape().

◆ hexval()

static unsigned int hexval ( unsigned char c )

static

Definition at line 328 of file parser.c.

{
    if (c >= '0' && c <= '9')
        return c - '0';
    if (c >= 'a' && c <= 'f')
        return c - 'a' + 0xA;
    if (c >= 'A' && c <= 'F')
        return c - 'A' + 0xA;
    elog(ERROR, "invalid hexadecimal digit");
    return 0;                   /* not reached */
}

References elog, and ERROR.

Referenced by str_udeescape().

◆ raw_parser()

List * raw_parser	(	const char *	str,
		RawParseMode	mode
	)

Definition at line 42 of file parser.c.

{
    core_yyscan_t yyscanner;
    base_yy_extra_type yyextra;
    int         yyresult;
 
    /* initialize the flex scanner */
    yyscanner = scanner_init(str, &yyextra.core_yy_extra,
                             &ScanKeywords, ScanKeywordTokens);
 
    /* base_yylex() only needs us to initialize the lookahead token, if any */
    if (mode == RAW_PARSE_DEFAULT)
        yyextra.have_lookahead = false;
    else
    {
        /* this array is indexed by RawParseMode enum */
        static const int mode_token[] = {
            [RAW_PARSE_DEFAULT] = 0,
            [RAW_PARSE_TYPE_NAME] = MODE_TYPE_NAME,
            [RAW_PARSE_PLPGSQL_EXPR] = MODE_PLPGSQL_EXPR,
            [RAW_PARSE_PLPGSQL_ASSIGN1] = MODE_PLPGSQL_ASSIGN1,
            [RAW_PARSE_PLPGSQL_ASSIGN2] = MODE_PLPGSQL_ASSIGN2,
            [RAW_PARSE_PLPGSQL_ASSIGN3] = MODE_PLPGSQL_ASSIGN3,
        };
 
        yyextra.have_lookahead = true;
        yyextra.lookahead_token = mode_token[mode];
        yyextra.lookahead_yylloc = 0;
        yyextra.lookahead_end = NULL;
    }
 
    /* initialize the bison parser */
    parser_init(&yyextra);
 
    /* Parse! */
    yyresult = base_yyparse(yyscanner);
 
    /* Clean up (release memory) */
    scanner_finish(yyscanner);
 
    if (yyresult)               /* error */
        return NIL;
 
    return yyextra.parsetree;
}

References base_yyparse(), mode, NIL, parser_init(), RAW_PARSE_DEFAULT, RAW_PARSE_PLPGSQL_ASSIGN1, RAW_PARSE_PLPGSQL_ASSIGN2, RAW_PARSE_PLPGSQL_ASSIGN3, RAW_PARSE_PLPGSQL_EXPR, RAW_PARSE_TYPE_NAME, ScanKeywords, ScanKeywordTokens, scanner_finish(), scanner_init(), str, and yyextra.

Referenced by _SPI_prepare_oneshot_plan(), _SPI_prepare_plan(), ATPostAlterTypeParse(), pg_parse_query(), and typeStringToTypeName().

◆ str_udeescape()

static char * str_udeescape	(	const char *	str,
		char	escape,
		int	position,
		core_yyscan_t	yyscanner
	)

static

Definition at line 372 of file parser.c.

{
    const char *in;
    char       *new,
               *out;
    size_t      new_len;
    pg_wchar    pair_first = 0;
    ScannerCallbackState scbstate;
 
    /*
     * Guesstimate that result will be no longer than input, but allow enough
     * padding for Unicode conversion.
     */
    new_len = strlen(str) + MAX_UNICODE_EQUIVALENT_STRING + 1;
    new = palloc(new_len);
 
    in = str;
    out = new;
    while (*in)
    {
        /* Enlarge string if needed */
        size_t      out_dist = out - new;
 
        if (out_dist > new_len - (MAX_UNICODE_EQUIVALENT_STRING + 1))
        {
            new_len *= 2;
            new = repalloc(new, new_len);
            out = new + out_dist;
        }
 
        if (in[0] == escape)
        {
            /*
             * Any errors reported while processing this escape sequence will
             * have an error cursor pointing at the escape.
             */
            setup_scanner_errposition_callback(&scbstate, yyscanner,
                                               in - str + position + 3);    /* 3 for U&" */
            if (in[1] == escape)
            {
                if (pair_first)
                    goto invalid_pair;
                *out++ = escape;
                in += 2;
            }
            else if (isxdigit((unsigned char) in[1]) &&
                     isxdigit((unsigned char) in[2]) &&
                     isxdigit((unsigned char) in[3]) &&
                     isxdigit((unsigned char) in[4]))
            {
                pg_wchar    unicode;
 
                unicode = (hexval(in[1]) << 12) +
                    (hexval(in[2]) << 8) +
                    (hexval(in[3]) << 4) +
                    hexval(in[4]);
                check_unicode_value(unicode);
                if (pair_first)
                {
                    if (is_utf16_surrogate_second(unicode))
                    {
                        unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                        pair_first = 0;
                    }
                    else
                        goto invalid_pair;
                }
                else if (is_utf16_surrogate_second(unicode))
                    goto invalid_pair;
 
                if (is_utf16_surrogate_first(unicode))
                    pair_first = unicode;
                else
                {
                    pg_unicode_to_server(unicode, (unsigned char *) out);
                    out += strlen(out);
                }
                in += 5;
            }
            else if (in[1] == '+' &&
                     isxdigit((unsigned char) in[2]) &&
                     isxdigit((unsigned char) in[3]) &&
                     isxdigit((unsigned char) in[4]) &&
                     isxdigit((unsigned char) in[5]) &&
                     isxdigit((unsigned char) in[6]) &&
                     isxdigit((unsigned char) in[7]))
            {
                pg_wchar    unicode;
 
                unicode = (hexval(in[2]) << 20) +
                    (hexval(in[3]) << 16) +
                    (hexval(in[4]) << 12) +
                    (hexval(in[5]) << 8) +
                    (hexval(in[6]) << 4) +
                    hexval(in[7]);
                check_unicode_value(unicode);
                if (pair_first)
                {
                    if (is_utf16_surrogate_second(unicode))
                    {
                        unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                        pair_first = 0;
                    }
                    else
                        goto invalid_pair;
                }
                else if (is_utf16_surrogate_second(unicode))
                    goto invalid_pair;
 
                if (is_utf16_surrogate_first(unicode))
                    pair_first = unicode;
                else
                {
                    pg_unicode_to_server(unicode, (unsigned char *) out);
                    out += strlen(out);
                }
                in += 8;
            }
            else
                ereport(ERROR,
                        (errcode(ERRCODE_SYNTAX_ERROR),
                         errmsg("invalid Unicode escape"),
                         errhint("Unicode escapes must be \\XXXX or \\+XXXXXX.")));
 
            cancel_scanner_errposition_callback(&scbstate);
        }
        else
        {
            if (pair_first)
                goto invalid_pair;
 
            *out++ = *in++;
        }
    }
 
    /* unfinished surrogate pair? */
    if (pair_first)
        goto invalid_pair;
 
    *out = '\0';
    return new;
 
    /*
     * We might get here with the error callback active, or not.  Call
     * scanner_errposition to make sure an error cursor appears; if the
     * callback is active, this is duplicative but harmless.
     */
invalid_pair:
    ereport(ERROR,
            (errcode(ERRCODE_SYNTAX_ERROR),
             errmsg("invalid Unicode surrogate pair"),
             scanner_errposition(in - str + position + 3,   /* 3 for U&" */
                                 yyscanner)));
    return NULL;                /* keep compiler quiet */
}

References cancel_scanner_errposition_callback(), check_unicode_value(), ereport, errcode(), errhint(), errmsg(), ERROR, hexval(), is_utf16_surrogate_first(), is_utf16_surrogate_second(), MAX_UNICODE_EQUIVALENT_STRING, palloc(), pg_unicode_to_server(), repalloc(), scanner_errposition(), setup_scanner_errposition_callback(), str, and surrogate_pair_to_codepoint().

Referenced by base_yylex().

Functions

Function Documentation

◆ base_yylex()

◆ check_uescapechar()

◆ check_unicode_value()

◆ hexval()

◆ raw_parser()

◆ str_udeescape()