read_8c_source.html

/*-------------------------------------------------------------------------

 *

 * read.c

 *    routines to convert a string (legal ascii representation of node) back

 *    to nodes

 *

 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group

 * Portions Copyright (c) 1994, Regents of the University of California

 *

 *

 * IDENTIFICATION

 *    src/backend/nodes/read.c

 *

 * HISTORY

 *    AUTHOR            DATE            MAJOR EVENT

 *    Andrew Yu         Nov 2, 1994     file creation

 *

 *-------------------------------------------------------------------------

 */

#include "postgres.h"


#include <ctype.h>


#include "common/string.h"

#include "nodes/bitmapset.h"

#include "nodes/pg_list.h"

#include "nodes/readfuncs.h"

#include "nodes/value.h"


/* Static state for pg_strtok */

static const char *pg_strtok_ptr = NULL;


/* State flag that determines how readfuncs.c should treat location fields */

#ifdef DEBUG_NODE_TESTS_ENABLED

bool        restore_location_fields = false;

#endif


/*

 * stringToNode -

 *    builds a Node tree from its string representation (assumed valid)

 *

 * restore_loc_fields instructs readfuncs.c whether to restore location

 * fields rather than set them to -1.  This is currently only supported

 * in builds with DEBUG_NODE_TESTS_ENABLED defined.

 */

static void *

stringToNodeInternal(const char *str, bool restore_loc_fields)

{

    void       *retval;

    const char *save_strtok;

#ifdef DEBUG_NODE_TESTS_ENABLED

    bool        save_restore_location_fields;

#endif


    /*

     * We save and restore the pre-existing state of pg_strtok. This makes the

     * world safe for re-entrant invocation of stringToNode, without incurring

     * a lot of notational overhead by having to pass the next-character

     * pointer around through all the readfuncs.c code.

     */

    save_strtok = pg_strtok_ptr;


    pg_strtok_ptr = str;        /* point pg_strtok at the string to read */


    /*

     * If enabled, likewise save/restore the location field handling flag.

     */

#ifdef DEBUG_NODE_TESTS_ENABLED

    save_restore_location_fields = restore_location_fields;

    restore_location_fields = restore_loc_fields;

#endif


    retval = nodeRead(NULL, 0); /* do the reading */


    pg_strtok_ptr = save_strtok;


#ifdef DEBUG_NODE_TESTS_ENABLED

    restore_location_fields = save_restore_location_fields;

#endif


    return retval;

}


/*

 * Externally visible entry points

 */

void *

stringToNode(const char *str)

{

    return stringToNodeInternal(str, false);

}


#ifdef DEBUG_NODE_TESTS_ENABLED


void *

stringToNodeWithLocations(const char *str)

{

    return stringToNodeInternal(str, true);

}


#endif


/*****************************************************************************

 *

 * the lisp token parser

 *

 *****************************************************************************/


/*

 * pg_strtok --- retrieve next "token" from a string.

 *

 * Works kinda like strtok, except it never modifies the source string.

 * (Instead of storing nulls into the string, the length of the token

 * is returned to the caller.)

 * Also, the rules about what is a token are hard-wired rather than being

 * configured by passing a set of terminating characters.

 *

 * The string is assumed to have been initialized already by stringToNode.

 *

 * The rules for tokens are:

 *  * Whitespace (space, tab, newline) always separates tokens.

 *  * The characters '(', ')', '{', '}' form individual tokens even

 *    without any whitespace around them.

 *  * Otherwise, a token is all the characters up to the next whitespace

 *    or occurrence of one of the four special characters.

 *  * A backslash '\' can be used to quote whitespace or one of the four

 *    special characters, so that it is treated as a plain token character.

 *    Backslashes themselves must also be backslashed for consistency.

 *    Any other character can be, but need not be, backslashed as well.

 *  * If the resulting token is '<>' (with no backslash), it is returned

 *    as a non-NULL pointer to the token but with length == 0.  Note that

 *    there is no other way to get a zero-length token.

 *

 * Returns a pointer to the start of the next token, and the length of the

 * token (including any embedded backslashes!) in *length.  If there are

 * no more tokens, NULL and 0 are returned.

 *

 * NOTE: this routine doesn't remove backslashes; the caller must do so

 * if necessary (see "debackslash").

 *

 * NOTE: prior to release 7.0, this routine also had a special case to treat

 * a token starting with '"' as extending to the next '"'.  This code was

 * broken, however, since it would fail to cope with a string containing an

 * embedded '"'.  I have therefore removed this special case, and instead

 * introduced rules for using backslashes to quote characters.  Higher-level

 * code should add backslashes to a string constant to ensure it is treated

 * as a single token.

 */

const char *

pg_strtok(int *length)

{

    const char *local_str;      /* working pointer to string */

    const char *ret_str;        /* start of token to return */


    local_str = pg_strtok_ptr;


    while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')

        local_str++;


    if (*local_str == '\0')

    {

        *length = 0;

        pg_strtok_ptr = local_str;

        return NULL;            /* no more tokens */

    }


    /*

     * Now pointing at start of next token.

     */

    ret_str = local_str;


    if (*local_str == '(' || *local_str == ')' ||

        *local_str == '{' || *local_str == '}')

    {

        /* special 1-character token */

        local_str++;

    }

    else

    {

        /* Normal token, possibly containing backslashes */

        while (*local_str != '\0' &&

               *local_str != ' ' && *local_str != '\n' &&

               *local_str != '\t' &&

               *local_str != '(' && *local_str != ')' &&

               *local_str != '{' && *local_str != '}')

        {

            if (*local_str == '\\' && local_str[1] != '\0')

                local_str += 2;

            else

                local_str++;

        }

    }


    *length = local_str - ret_str;


    /* Recognize special case for "empty" token */

    if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')

        *length = 0;


    pg_strtok_ptr = local_str;


    return ret_str;

}


/*

 * debackslash -

 *    create a palloc'd string holding the given token.

 *    any protective backslashes in the token are removed.

 */

char *

debackslash(const char *token, int length)

{

    char       *result = palloc(length + 1);

    char       *ptr = result;


    while (length > 0)

    {

        if (*token == '\\' && length > 1)

            token++, length--;

        *ptr++ = *token++;

        length--;

    }

    *ptr = '\0';

    return result;

}


#define RIGHT_PAREN (1000000 + 1)

#define LEFT_PAREN  (1000000 + 2)

#define LEFT_BRACE  (1000000 + 3)

#define OTHER_TOKEN (1000000 + 4)


/*

 * nodeTokenType -

 *    returns the type of the node token contained in token.

 *    It returns one of the following valid NodeTags:

 *      T_Integer, T_Float, T_Boolean, T_String, T_BitString

 *    and some of its own:

 *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN

 *

 *    Assumption: the ascii representation is legal

 */

static NodeTag

nodeTokenType(const char *token, int length)

{

    NodeTag     retval;

    const char *numptr;

    int         numlen;


    /*

     * Check if the token is a number

     */

    numptr = token;

    numlen = length;

    if (*numptr == '+' || *numptr == '-')

        numptr++, numlen--;

    if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||

        (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))

    {

        /*

         * Yes.  Figure out whether it is integral or float; this requires

         * both a syntax check and a range check. strtoint() can do both for

         * us. We know the token will end at a character that strtoint will

         * stop at, so we do not need to modify the string.

         */

        char       *endptr;


        errno = 0;

        (void) strtoint(numptr, &endptr, 10);

        if (endptr != token + length || errno == ERANGE)

            return T_Float;

        return T_Integer;

    }


    /*

     * these three cases do not need length checks, since pg_strtok() will

     * always treat them as single-byte tokens

     */

    else if (*token == '(')

        retval = LEFT_PAREN;

    else if (*token == ')')

        retval = RIGHT_PAREN;

    else if (*token == '{')

        retval = LEFT_BRACE;

    else if ((length == 4 && strncmp(token, "true", 4) == 0) ||

             (length == 5 && strncmp(token, "false", 5) == 0))

        retval = T_Boolean;

    else if (*token == '"' && length > 1 && token[length - 1] == '"')

        retval = T_String;

    else if (*token == 'b' || *token == 'x')

        retval = T_BitString;

    else

        retval = OTHER_TOKEN;

    return retval;

}


/*

 * nodeRead -

 *    Slightly higher-level reader.

 *

 * This routine applies some semantic knowledge on top of the purely

 * lexical tokenizer pg_strtok().   It can read

 *  * Value token nodes (integers, floats, booleans, or strings);

 *  * General nodes (via parseNodeString() from readfuncs.c);

 *  * Lists of the above;

 *  * Lists of integers, OIDs, or TransactionIds.

 * The return value is declared void *, not Node *, to avoid having to

 * cast it explicitly in callers that assign to fields of different types.

 *

 * External callers should always pass NULL/0 for the arguments.  Internally

 * a non-NULL token may be passed when the upper recursion level has already

 * scanned the first token of a node's representation.

 *

 * We assume pg_strtok is already initialized with a string to read (hence

 * this should only be invoked from within a stringToNode operation).

 */

void *

nodeRead(const char *token, int tok_len)

{

    Node       *result;

    NodeTag     type;


    if (token == NULL)          /* need to read a token? */

    {

        token = pg_strtok(&tok_len);


        if (token == NULL)      /* end of input */

            return NULL;

    }


    type = nodeTokenType(token, tok_len);


    switch ((int) type)

    {

        case LEFT_BRACE:

            result = parseNodeString();

            token = pg_strtok(&tok_len);

            if (token == NULL || token[0] != '}')

                elog(ERROR, "did not find '}' at end of input node");

            break;

        case LEFT_PAREN:

            {

                List       *l = NIL;


                /*----------

                 * Could be an integer list:    (i int int ...)

                 * or an OID list:              (o int int ...)

                 * or an XID list:              (x int int ...)

                 * or a bitmapset:              (b int int ...)

                 * or a list of nodes/values:   (node node ...)

                 *----------

                 */

                token = pg_strtok(&tok_len);

                if (token == NULL)

                    elog(ERROR, "unterminated List structure");

                if (tok_len == 1 && token[0] == 'i')

                {

                    /* List of integers */

                    for (;;)

                    {

                        int         val;

                        char       *endptr;


                        token = pg_strtok(&tok_len);

                        if (token == NULL)

                            elog(ERROR, "unterminated List structure");

                        if (token[0] == ')')

                            break;

                        val = (int) strtol(token, &endptr, 10);

                        if (endptr != token + tok_len)

                            elog(ERROR, "unrecognized integer: \"%.*s\"",

                                 tok_len, token);

                        l = lappend_int(l, val);

                    }

                    result = (Node *) l;

                }

                else if (tok_len == 1 && token[0] == 'o')

                {

                    /* List of OIDs */

                    for (;;)

                    {

                        Oid         val;

                        char       *endptr;


                        token = pg_strtok(&tok_len);

                        if (token == NULL)

                            elog(ERROR, "unterminated List structure");

                        if (token[0] == ')')

                            break;

                        val = (Oid) strtoul(token, &endptr, 10);

                        if (endptr != token + tok_len)

                            elog(ERROR, "unrecognized OID: \"%.*s\"",

                                 tok_len, token);

                        l = lappend_oid(l, val);

                    }

                    result = (Node *) l;

                }

                else if (tok_len == 1 && token[0] == 'x')

                {

                    /* List of TransactionIds */

                    for (;;)

                    {

                        TransactionId val;

                        char       *endptr;


                        token = pg_strtok(&tok_len);

                        if (token == NULL)

                            elog(ERROR, "unterminated List structure");

                        if (token[0] == ')')

                            break;

                        val = (TransactionId) strtoul(token, &endptr, 10);

                        if (endptr != token + tok_len)

                            elog(ERROR, "unrecognized Xid: \"%.*s\"",

                                 tok_len, token);

                        l = lappend_xid(l, val);

                    }

                    result = (Node *) l;

                }

                else if (tok_len == 1 && token[0] == 'b')

                {

                    /* Bitmapset -- see also _readBitmapset() */

                    Bitmapset  *bms = NULL;


                    for (;;)

                    {

                        int         val;

                        char       *endptr;


                        token = pg_strtok(&tok_len);

                        if (token == NULL)

                            elog(ERROR, "unterminated Bitmapset structure");

                        if (tok_len == 1 && token[0] == ')')

                            break;

                        val = (int) strtol(token, &endptr, 10);

                        if (endptr != token + tok_len)

                            elog(ERROR, "unrecognized integer: \"%.*s\"",

                                 tok_len, token);

                        bms = bms_add_member(bms, val);

                    }

                    result = (Node *) bms;

                }

                else

                {

                    /* List of other node types */

                    for (;;)

                    {

                        /* We have already scanned next token... */

                        if (token[0] == ')')

                            break;

                        l = lappend(l, nodeRead(token, tok_len));

                        token = pg_strtok(&tok_len);

                        if (token == NULL)

                            elog(ERROR, "unterminated List structure");

                    }

                    result = (Node *) l;

                }

                break;

            }

        case RIGHT_PAREN:

            elog(ERROR, "unexpected right parenthesis");

            result = NULL;      /* keep compiler happy */

            break;

        case OTHER_TOKEN:

            if (tok_len == 0)

            {

                /* must be "<>" --- represents a null pointer */

                result = NULL;

            }

            else

            {

                elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);

                result = NULL;  /* keep compiler happy */

            }

            break;

        case T_Integer:


            /*

             * we know that the token terminates on a char atoi will stop at

             */

            result = (Node *) makeInteger(atoi(token));

            break;

        case T_Float:

            {

                char       *fval = (char *) palloc(tok_len + 1);


                memcpy(fval, token, tok_len);

                fval[tok_len] = '\0';

                result = (Node *) makeFloat(fval);

            }

            break;

        case T_Boolean:

            result = (Node *) makeBoolean(token[0] == 't');

            break;

        case T_String:

            /* need to remove leading and trailing quotes, and backslashes */

            result = (Node *) makeString(debackslash(token + 1, tok_len - 2));

            break;

        case T_BitString:

            /* need to remove backslashes, but there are no quotes */

            result = (Node *) makeBitString(debackslash(token, tok_len));

            break;

        default:

            elog(ERROR, "unrecognized node type: %d", (int) type);

            result = NULL;      /* keep compiler happy */

            break;

    }


    return result;

}

bms_add_member
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815

bitmapset.h

TransactionId
uint32 TransactionId
Definition: c.h:623

ERROR
#define ERROR
Definition: elog.h:39

elog
#define elog(elevel,...)
Definition: elog.h:225

str
const char * str
Definition: hashfn_unstable.h:254

token
#define token
Definition: indent_globs.h:126

val
long val
Definition: informix.c:689

lappend
List * lappend(List *list, void *datum)
Definition: list.c:339

lappend_xid
List * lappend_xid(List *list, TransactionId datum)
Definition: list.c:393

lappend_int
List * lappend_int(List *list, int datum)
Definition: list.c:357

lappend_oid
List * lappend_oid(List *list, Oid datum)
Definition: list.c:375

palloc
void * palloc(Size size)
Definition: mcxt.c:1321

NodeTag
NodeTag
Definition: nodes.h:27

pg_list.h

NIL
#define NIL
Definition: pg_list.h:68

postgres.h

Oid
unsigned int Oid
Definition: postgres_ext.h:30

stringToNode
void * stringToNode(const char *str)
Definition: read.c:90

LEFT_BRACE
#define LEFT_BRACE
Definition: read.c:232

debackslash
char * debackslash(const char *token, int length)
Definition: read.c:214

OTHER_TOKEN
#define OTHER_TOKEN
Definition: read.c:233

nodeTokenType
static NodeTag nodeTokenType(const char *token, int length)
Definition: read.c:246

stringToNodeInternal
static void * stringToNodeInternal(const char *str, bool restore_loc_fields)
Definition: read.c:49

nodeRead
void * nodeRead(const char *token, int tok_len)
Definition: read.c:320

LEFT_PAREN
#define LEFT_PAREN
Definition: read.c:231

RIGHT_PAREN
#define RIGHT_PAREN
Definition: read.c:230

pg_strtok
const char * pg_strtok(int *length)
Definition: read.c:153

pg_strtok_ptr
static const char * pg_strtok_ptr
Definition: read.c:32

parseNodeString
Node * parseNodeString(void)
Definition: readfuncs.c:565

readfuncs.h

strtoint
int strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
Definition: string.c:50

string.h

Bitmapset
Definition: bitmapset.h:50

List
Definition: pg_list.h:54

Node
Definition: nodes.h:135

token
Definition: oauth-curl.c:192

makeInteger
Integer * makeInteger(int i)
Definition: value.c:23

makeString
String * makeString(char *str)
Definition: value.c:63

makeBitString
BitString * makeBitString(char *str)
Definition: value.c:77

makeFloat
Float * makeFloat(char *numericStr)
Definition: value.c:37

makeBoolean
Boolean * makeBoolean(bool val)
Definition: value.c:49

value.h

type
const char * type
Definition: wait_event_funcs.c:27