This graph shows which files directly or indirectly include this file:
Macros
#define	GETCHAR(t, locale) (t)
Functions
static int	MatchText (const char t, int tlen, const char p, int plen, pg_locale_t locale)
Macro Definition Documentation

◆ GETCHAR

#define GETCHAR	(	t,
		locale
	)	(t)
Definition at line 76 of file like_match.c.
Function Documentation

◆ MatchText()

static int MatchText	(	const char *	t,
		int	tlen,
		const char *	p,
		int	plen,
		pg_locale_t	locale
	)
static
Definition at line 80 of file like_match.c.
{
    /* Fast path for match-everything pattern */
    if (plen == 1 && *p == '%')
        return LIKE_TRUE;
 
    /* Since this function recurses, it could be driven to stack overflow */
    check_stack_depth();
 
    /*
     * In this loop, we advance by char when matching wildcards (and thus on
     * recursive entry to this function we are properly char-synced). On other
     * occasions it is safe to advance by byte, as the text and pattern will
     * be in lockstep. This allows us to perform all comparisons between the
     * text and pattern on a byte by byte basis, even for multi-byte
     * encodings.
     */
    while (tlen > 0 && plen > 0)
    {
        if (*p == '\\')
        {
            /* Next pattern byte must match literally, whatever it is */
            NextByte(p, plen);
            /* ... and there had better be one, per SQL standard */
            if (plen <= 0)
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                         errmsg("LIKE pattern must not end with escape character")));
            if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
                return LIKE_FALSE;
        }
        else if (*p == '%')
        {
            char        firstpat;
 
            /*
             * % processing is essentially a search for a text position at
             * which the remainder of the text matches the remainder of the
             * pattern, using a recursive call to check each potential match.
             *
             * If there are wildcards immediately following the %, we can skip
             * over them first, using the idea that any sequence of N _'s and
             * one or more %'s is equivalent to N _'s and one % (ie, it will
             * match any sequence of at least N text characters).  In this way
             * we will always run the recursive search loop using a pattern
             * fragment that begins with a literal character-to-match, thereby
             * not recursing more than we have to.
             */
            NextByte(p, plen);
 
            while (plen > 0)
            {
                if (*p == '%')
                    NextByte(p, plen);
                else if (*p == '_')
                {
                    /* If not enough text left to match the pattern, ABORT */
                    if (tlen <= 0)
                        return LIKE_ABORT;
                    NextChar(t, tlen);
                    NextByte(p, plen);
                }
                else
                    break;      /* Reached a non-wildcard pattern char */
            }
 
            /*
             * If we're at end of pattern, match: we have a trailing % which
             * matches any remaining text string.
             */
            if (plen <= 0)
                return LIKE_TRUE;
 
            /*
             * Otherwise, scan for a text position at which we can match the
             * rest of the pattern.  The first remaining pattern char is known
             * to be a regular or escaped literal character, so we can compare
             * the first pattern byte to each text byte to avoid recursing
             * more than we have to.  This fact also guarantees that we don't
             * have to consider a match to the zero-length substring at the
             * end of the text.  With a nondeterministic collation, we can't
             * rely on the first bytes being equal, so we have to recurse in
             * any case.
             */
            if (*p == '\\')
            {
                if (plen < 2)
                    ereport(ERROR,
                            (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                             errmsg("LIKE pattern must not end with escape character")));
                firstpat = GETCHAR(p[1], locale);
            }
            else
                firstpat = GETCHAR(*p, locale);
 
            while (tlen > 0)
            {
                if (GETCHAR(*t, locale) == firstpat || (locale && !locale->deterministic))
                {
                    int         matched = MatchText(t, tlen, p, plen, locale);
 
                    if (matched != LIKE_FALSE)
                        return matched; /* TRUE or ABORT */
                }
 
                NextChar(t, tlen);
            }
 
            /*
             * End of text with no match, so no point in trying later places
             * to start matching this pattern.
             */
            return LIKE_ABORT;
        }
        else if (*p == '_')
        {
            /* _ matches any single character, and we know there is one */
            NextChar(t, tlen);
            NextByte(p, plen);
            continue;
        }
        else if (locale && !locale->deterministic)
        {
            /*
             * For nondeterministic locales, we find the next substring of the
             * pattern that does not contain wildcards and try to find a
             * matching substring in the text.  Crucially, we cannot do this
             * character by character, as in the normal case, but must do it
             * substring by substring, partitioned by the wildcard characters.
             * (This is per SQL standard.)
             */
            const char *p1;
            size_t      p1len;
            const char *t1;
            size_t      t1len;
            bool        found_escape;
            const char *subpat;
            size_t      subpatlen;
            char       *buf = NULL;
 
            /*
             * Determine next substring of pattern without wildcards.  p is
             * the start of the subpattern, p1 is one past the last byte. Also
             * track if we found an escape character.
             */
            p1 = p;
            p1len = plen;
            found_escape = false;
            while (p1len > 0)
            {
                if (*p1 == '\\')
                {
                    found_escape = true;
                    NextByte(p1, p1len);
                    if (p1len == 0)
                        ereport(ERROR,
                                (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
                                 errmsg("LIKE pattern must not end with escape character")));
                }
                else if (*p1 == '_' || *p1 == '%')
                    break;
                NextByte(p1, p1len);
            }
 
            /*
             * If we found an escape character, then make an unescaped copy of
             * the subpattern.
             */
            if (found_escape)
            {
                char       *b;
 
                b = buf = palloc(p1 - p);
                for (const char *c = p; c < p1; c++)
                {
                    if (*c == '\\')
                        ;
                    else
                        *(b++) = *c;
                }
 
                subpat = buf;
                subpatlen = b - buf;
            }
            else
            {
                subpat = p;
                subpatlen = p1 - p;
            }
 
            /*
             * Shortcut: If this is the end of the pattern, then the rest of
             * the text has to match the rest of the pattern.
             */
            if (p1len == 0)
            {
                int         cmp;
 
                cmp = pg_strncoll(subpat, subpatlen, t, tlen, locale);
 
                if (buf)
                    pfree(buf);
                if (cmp == 0)
                    return LIKE_TRUE;
                else
                    return LIKE_FALSE;
            }
 
            /*
             * Now build a substring of the text and try to match it against
             * the subpattern.  t is the start of the text, t1 is one past the
             * last byte.  We start with a zero-length string.
             */
            t1 = t;
            t1len = tlen;
            for (;;)
            {
                int         cmp;
 
                CHECK_FOR_INTERRUPTS();
 
                cmp = pg_strncoll(subpat, subpatlen, t, (t1 - t), locale);
 
                /*
                 * If we found a match, we have to test if the rest of pattern
                 * can match against the rest of the string.  Otherwise we
                 * have to continue here try matching with a longer substring.
                 * (This is similar to the recursion for the '%' wildcard
                 * above.)
                 *
                 * Note that we can't just wind forward p and t and continue
                 * with the main loop.  This would fail for example with
                 *
                 * U&'\0061\0308bc' LIKE U&'\00E4_c' COLLATE ignore_accents
                 *
                 * You'd find that t=\0061 matches p=\00E4, but then the rest
                 * won't match; but t=\0061\0308 also matches p=\00E4, and
                 * then the rest will match.
                 */
                if (cmp == 0)
                {
                    int         matched = MatchText(t1, t1len, p1, p1len, locale);
 
                    if (matched == LIKE_TRUE)
                    {
                        if (buf)
                            pfree(buf);
                        return matched;
                    }
                }
 
                /*
                 * Didn't match.  If we used up the whole text, then the match
                 * fails.  Otherwise, try again with a longer substring.
                 */
                if (t1len == 0)
                {
                    if (buf)
                        pfree(buf);
                    return LIKE_FALSE;
                }
                else
                    NextChar(t1, t1len);
            }
        }
        else if (GETCHAR(*p, locale) != GETCHAR(*t, locale))
        {
            /* non-wildcard pattern char fails to match text char */
            return LIKE_FALSE;
        }
 
        /*
         * Pattern and text match, so advance.
         *
         * It is safe to use NextByte instead of NextChar here, even for
         * multi-byte character sets, because we are not following immediately
         * after a wildcard character. If we are in the middle of a multibyte
         * character, we must already have matched at least one byte of the
         * character from both text and pattern; so we cannot get out-of-sync
         * on character boundaries.  And we know that no backend-legal
         * encoding allows ASCII characters such as '%' to appear as non-first
         * bytes of characters, so we won't mistakenly detect a new wildcard.
         */
        NextByte(t, tlen);
        NextByte(p, plen);
    }
 
    if (tlen > 0)
        return LIKE_FALSE;      /* end of pattern, but not of text */
 
    /*
     * End of text, but perhaps not of pattern.  Match iff the remaining
     * pattern can match a zero-length string, ie, it's zero or more %'s.
     */
    while (plen > 0 && *p == '%')
        NextByte(p, plen);
    if (plen <= 0)
        return LIKE_TRUE;
 
    /*
     * End of text with no match, so no point in trying later places to start
     * matching this pattern.
     */
    return LIKE_ABORT;
}                               /* MatchText() */
References b, buf, CHECK_FOR_INTERRUPTS, check_stack_depth(), cmp(), ereport, errcode(), errmsg(), ERROR, GETCHAR, LIKE_ABORT, LIKE_FALSE, LIKE_TRUE, locale, MatchText(), NextByte, NextChar, palloc(), pfree(), and pg_strncoll().
Referenced by MatchText().
Macros

Functions

Macro Definition Documentation

◆ GETCHAR

Function Documentation

◆ MatchText()