trgm__op_8c_source.html

/*

 * contrib/pg_trgm/trgm_op.c

 */

#include "postgres.h"


#include <ctype.h>


#include "catalog/pg_collation_d.h"

#include "catalog/pg_type.h"

#include "common/int.h"

#include "lib/qunique.h"

#include "miscadmin.h"

#include "trgm.h"

#include "tsearch/ts_locale.h"

#include "utils/formatting.h"

#include "utils/guc.h"

#include "utils/lsyscache.h"

#include "utils/memutils.h"

#include "utils/pg_crc.h"


PG_MODULE_MAGIC_EXT(

                    .name = "pg_trgm",

                    .version = PG_VERSION

);


/* GUC variables */

double      similarity_threshold = 0.3f;

double      word_similarity_threshold = 0.6f;

double      strict_word_similarity_threshold = 0.5f;


PG_FUNCTION_INFO_V1(set_limit);

PG_FUNCTION_INFO_V1(show_limit);

PG_FUNCTION_INFO_V1(show_trgm);

PG_FUNCTION_INFO_V1(similarity);

PG_FUNCTION_INFO_V1(word_similarity);

PG_FUNCTION_INFO_V1(strict_word_similarity);

PG_FUNCTION_INFO_V1(similarity_dist);

PG_FUNCTION_INFO_V1(similarity_op);

PG_FUNCTION_INFO_V1(word_similarity_op);

PG_FUNCTION_INFO_V1(word_similarity_commutator_op);

PG_FUNCTION_INFO_V1(word_similarity_dist_op);

PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);

PG_FUNCTION_INFO_V1(strict_word_similarity_op);

PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);

PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);

PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);


static int  CMPTRGM_CHOOSE(const void *a, const void *b);

int         (*CMPTRGM) (const void *a, const void *b) = CMPTRGM_CHOOSE;


/* Trigram with position */

typedef struct

{

    trgm        trg;

    int         index;

} pos_trgm;


/* Trigram bound type */

typedef uint8 TrgmBound;

#define TRGM_BOUND_LEFT             0x01    /* trigram is left bound of word */

#define TRGM_BOUND_RIGHT            0x02    /* trigram is right bound of word */


/* Word similarity flags */

#define WORD_SIMILARITY_CHECK_ONLY  0x01    /* only check existence of similar

                                             * search pattern in text */

#define WORD_SIMILARITY_STRICT      0x02    /* force bounds of extent to match

                                             * word bounds */


/*

 * Module load callback

 */

void

_PG_init(void)

{

    /* Define custom GUC variables. */

    DefineCustomRealVariable("pg_trgm.similarity_threshold",

                             "Sets the threshold used by the % operator.",

                             "Valid range is 0.0 .. 1.0.",

                             &similarity_threshold,

                             0.3f,

                             0.0,

                             1.0,

                             PGC_USERSET,

                             0,

                             NULL,

                             NULL,

                             NULL);

    DefineCustomRealVariable("pg_trgm.word_similarity_threshold",

                             "Sets the threshold used by the <% operator.",

                             "Valid range is 0.0 .. 1.0.",

                             &word_similarity_threshold,

                             0.6f,

                             0.0,

                             1.0,

                             PGC_USERSET,

                             0,

                             NULL,

                             NULL,

                             NULL);

    DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",

                             "Sets the threshold used by the <<% operator.",

                             "Valid range is 0.0 .. 1.0.",

                             &strict_word_similarity_threshold,

                             0.5f,

                             0.0,

                             1.0,

                             PGC_USERSET,

                             0,

                             NULL,

                             NULL,

                             NULL);


    MarkGUCPrefixReserved("pg_trgm");

}


#define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )


/*

 * Functions for comparing two trgms while treating each char as "signed char" or

 * "unsigned char".

 */

static inline int

CMPTRGM_SIGNED(const void *a, const void *b)

{

#define CMPPCHAR_S(a,b,i)  CMPCHAR( *(((const signed char*)(a))+i), *(((const signed char*)(b))+i) )


    return CMPPCHAR_S(a, b, 0) ? CMPPCHAR_S(a, b, 0)

        : (CMPPCHAR_S(a, b, 1) ? CMPPCHAR_S(a, b, 1)

           : CMPPCHAR_S(a, b, 2));

}


static inline int

CMPTRGM_UNSIGNED(const void *a, const void *b)

{

#define CMPPCHAR_UNS(a,b,i)  CMPCHAR( *(((const unsigned char*)(a))+i), *(((const unsigned char*)(b))+i) )


    return CMPPCHAR_UNS(a, b, 0) ? CMPPCHAR_UNS(a, b, 0)

        : (CMPPCHAR_UNS(a, b, 1) ? CMPPCHAR_UNS(a, b, 1)

           : CMPPCHAR_UNS(a, b, 2));

}


/*

 * This gets called on the first call. It replaces the function pointer so

 * that subsequent calls are routed directly to the chosen implementation.

 */

static int

CMPTRGM_CHOOSE(const void *a, const void *b)

{

    if (GetDefaultCharSignedness())

        CMPTRGM = CMPTRGM_SIGNED;

    else

        CMPTRGM = CMPTRGM_UNSIGNED;


    return CMPTRGM(a, b);

}


/*

 * Deprecated function.

 * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.

 */

Datum

set_limit(PG_FUNCTION_ARGS)

{

    float4      nlimit = PG_GETARG_FLOAT4(0);

    char       *nlimit_str;

    Oid         func_out_oid;

    bool        is_varlena;


    getTypeOutputInfo(FLOAT4OID, &func_out_oid, &is_varlena);


    nlimit_str = OidOutputFunctionCall(func_out_oid, Float4GetDatum(nlimit));


    SetConfigOption("pg_trgm.similarity_threshold", nlimit_str,

                    PGC_USERSET, PGC_S_SESSION);


    PG_RETURN_FLOAT4(similarity_threshold);

}


/*

 * Get similarity threshold for given index scan strategy number.

 */

double

index_strategy_get_limit(StrategyNumber strategy)

{

    switch (strategy)

    {

        case SimilarityStrategyNumber:

            return similarity_threshold;

        case WordSimilarityStrategyNumber:

            return word_similarity_threshold;

        case StrictWordSimilarityStrategyNumber:

            return strict_word_similarity_threshold;

        default:

            elog(ERROR, "unrecognized strategy number: %d", strategy);

            break;

    }


    return 0.0;                 /* keep compiler quiet */

}


/*

 * Deprecated function.

 * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.

 */

Datum

show_limit(PG_FUNCTION_ARGS)

{

    PG_RETURN_FLOAT4(similarity_threshold);

}


static int

comp_trgm(const void *a, const void *b)

{

    return CMPTRGM(a, b);

}


/*

 * Finds first word in string, returns pointer to the word,

 * endword points to the character after word

 */

static char *

find_word(char *str, int lenstr, char **endword, int *charlen)

{

    char       *beginword = str;


    while (beginword - str < lenstr && !ISWORDCHR(beginword))

        beginword += pg_mblen(beginword);


    if (beginword - str >= lenstr)

        return NULL;


    *endword = beginword;

    *charlen = 0;

    while (*endword - str < lenstr && ISWORDCHR(*endword))

    {

        *endword += pg_mblen(*endword);

        (*charlen)++;

    }


    return beginword;

}


/*

 * Reduce a trigram (three possibly multi-byte characters) to a trgm,

 * which is always exactly three bytes.  If we have three single-byte

 * characters, we just use them as-is; otherwise we form a hash value.

 */

void

compact_trigram(trgm *tptr, char *str, int bytelen)

{

    if (bytelen == 3)

    {

        CPTRGM(tptr, str);

    }

    else

    {

        pg_crc32    crc;


        INIT_LEGACY_CRC32(crc);

        COMP_LEGACY_CRC32(crc, str, bytelen);

        FIN_LEGACY_CRC32(crc);


        /*

         * use only 3 upper bytes from crc, hope, it's good enough hashing

         */

        CPTRGM(tptr, &crc);

    }

}


/*

 * Adds trigrams from words (already padded).

 */

static trgm *

make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)

{

    char       *ptr = str;


    if (charlen < 3)

        return tptr;


    if (bytelen > charlen)

    {

        /* Find multibyte character boundaries and apply compact_trigram */

        int         lenfirst = pg_mblen(str),

                    lenmiddle = pg_mblen(str + lenfirst),

                    lenlast = pg_mblen(str + lenfirst + lenmiddle);


        while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)

        {

            compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);


            ptr += lenfirst;

            tptr++;


            lenfirst = lenmiddle;

            lenmiddle = lenlast;

            lenlast = pg_mblen(ptr + lenfirst + lenmiddle);

        }

    }

    else

    {

        /* Fast path when there are no multibyte characters */

        Assert(bytelen == charlen);


        while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )

        {

            CPTRGM(tptr, ptr);

            ptr++;

            tptr++;

        }

    }


    return tptr;

}


/*

 * Make array of trigrams without sorting and removing duplicate items.

 *

 * trg: where to return the array of trigrams.

 * str: source string, of length slen bytes.

 * bounds: where to return bounds of trigrams (if needed).

 *

 * Returns length of the generated array.

 */

static int

generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)

{

    trgm       *tptr;

    char       *buf;

    int         charlen,

                bytelen;

    char       *bword,

               *eword;


    if (slen + LPADDING + RPADDING < 3 || slen == 0)

        return 0;


    tptr = trg;


    /* Allocate a buffer for case-folded, blank-padded words */

    buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);


    if (LPADDING > 0)

    {

        *buf = ' ';

        if (LPADDING > 1)

            *(buf + 1) = ' ';

    }


    eword = str;

    while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)

    {

#ifdef IGNORECASE

        bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);

        bytelen = strlen(bword);

#else

        bytelen = eword - bword;

#endif


        memcpy(buf + LPADDING, bword, bytelen);


#ifdef IGNORECASE

        pfree(bword);

#endif


        buf[LPADDING + bytelen] = ' ';

        buf[LPADDING + bytelen + 1] = ' ';


        /* Calculate trigrams marking their bounds if needed */

        if (bounds)

            bounds[tptr - trg] |= TRGM_BOUND_LEFT;

        tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,

                             charlen + LPADDING + RPADDING);

        if (bounds)

            bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;

    }


    pfree(buf);


    return tptr - trg;

}


/*

 * Guard against possible overflow in the palloc requests below.  (We

 * don't worry about the additive constants, since palloc can detect

 * requests that are a little above MaxAllocSize --- we just need to

 * prevent integer overflow in the multiplications.)

 */

static void

protect_out_of_mem(int slen)

{

    if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||

        (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))

        ereport(ERROR,

                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

                 errmsg("out of memory")));

}


/*

 * Make array of trigrams with sorting and removing duplicate items.

 *

 * str: source string, of length slen bytes.

 *

 * Returns the sorted array of unique trigrams.

 */

TRGM *

generate_trgm(char *str, int slen)

{

    TRGM       *trg;

    int         len;


    protect_out_of_mem(slen);


    trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);

    trg->flag = ARRKEY;


    len = generate_trgm_only(GETARR(trg), str, slen, NULL);

    SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));


    if (len == 0)

        return trg;


    /*

     * Make trigrams unique.

     */

    if (len > 1)

    {

        qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);

        len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);

    }


    SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));


    return trg;

}


/*

 * Make array of positional trigrams from two trigram arrays trg1 and trg2.

 *

 * trg1: trigram array of search pattern, of length len1. trg1 is required

 *       word which positions don't matter and replaced with -1.

 * trg2: trigram array of text, of length len2. trg2 is haystack where we

 *       search and have to store its positions.

 *

 * Returns concatenated trigram array.

 */

static pos_trgm *

make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)

{

    pos_trgm   *result;

    int         i,

                len = len1 + len2;


    result = (pos_trgm *) palloc(sizeof(pos_trgm) * len);


    for (i = 0; i < len1; i++)

    {

        memcpy(&result[i].trg, &trg1[i], sizeof(trgm));

        result[i].index = -1;

    }


    for (i = 0; i < len2; i++)

    {

        memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm));

        result[i + len1].index = i;

    }


    return result;

}


/*

 * Compare position trigrams: compare trigrams first and position second.

 */

static int

comp_ptrgm(const void *v1, const void *v2)

{

    const pos_trgm *p1 = (const pos_trgm *) v1;

    const pos_trgm *p2 = (const pos_trgm *) v2;

    int         cmp;


    cmp = CMPTRGM(p1->trg, p2->trg);

    if (cmp != 0)

        return cmp;


    return pg_cmp_s32(p1->index, p2->index);

}


/*

 * Iterative search function which calculates maximum similarity with word in

 * the string. Maximum similarity is only calculated only if the flag

 * WORD_SIMILARITY_CHECK_ONLY isn't set.

 *

 * trg2indexes: array which stores indexes of the array "found".

 * found: array which stores true of false values.

 * ulen1: count of unique trigrams of array "trg1".

 * len2: length of array "trg2" and array "trg2indexes".

 * len: length of the array "found".

 * flags: set of boolean flags parameterizing similarity calculation.

 * bounds: whether each trigram is left/right bound of word.

 *

 * Returns word similarity.

 */

static float4

iterate_word_similarity(int *trg2indexes,

                        bool *found,

                        int ulen1,

                        int len2,

                        int len,

                        uint8 flags,

                        TrgmBound *bounds)

{

    int        *lastpos,

                i,

                ulen2 = 0,

                count = 0,

                upper = -1,

                lower;

    float4      smlr_cur,

                smlr_max = 0.0f;

    double      threshold;


    Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));


    /* Select appropriate threshold */

    threshold = (flags & WORD_SIMILARITY_STRICT) ?

        strict_word_similarity_threshold :

        word_similarity_threshold;


    /*

     * Consider first trigram as initial lower bound for strict word

     * similarity, or initialize it later with first trigram present for plain

     * word similarity.

     */

    lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;


    /* Memorise last position of each trigram */

    lastpos = (int *) palloc(sizeof(int) * len);

    memset(lastpos, -1, sizeof(int) * len);


    for (i = 0; i < len2; i++)

    {

        int         trgindex;


        CHECK_FOR_INTERRUPTS();


        /* Get index of next trigram */

        trgindex = trg2indexes[i];


        /* Update last position of this trigram */

        if (lower >= 0 || found[trgindex])

        {

            if (lastpos[trgindex] < 0)

            {

                ulen2++;

                if (found[trgindex])

                    count++;

            }

            lastpos[trgindex] = i;

        }


        /*

         * Adjust upper bound if trigram is upper bound of word for strict

         * word similarity, or if trigram is present in required substring for

         * plain word similarity

         */

        if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)

            : found[trgindex])

        {

            int         prev_lower,

                        tmp_ulen2,

                        tmp_lower,

                        tmp_count;


            upper = i;

            if (lower == -1)

            {

                lower = i;

                ulen2 = 1;

            }


            smlr_cur = CALCSML(count, ulen1, ulen2);


            /* Also try to adjust lower bound for greater similarity */

            tmp_count = count;

            tmp_ulen2 = ulen2;

            prev_lower = lower;

            for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)

            {

                float       smlr_tmp;

                int         tmp_trgindex;


                /*

                 * Adjust lower bound only if trigram is lower bound of word

                 * for strict word similarity, or consider every trigram as

                 * lower bound for plain word similarity.

                 */

                if (!(flags & WORD_SIMILARITY_STRICT)

                    || (bounds[tmp_lower] & TRGM_BOUND_LEFT))

                {

                    smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);

                    if (smlr_tmp > smlr_cur)

                    {

                        smlr_cur = smlr_tmp;

                        ulen2 = tmp_ulen2;

                        lower = tmp_lower;

                        count = tmp_count;

                    }


                    /*

                     * If we only check that word similarity is greater than

                     * threshold we do not need to calculate a maximum

                     * similarity.

                     */

                    if ((flags & WORD_SIMILARITY_CHECK_ONLY)

                        && smlr_cur >= threshold)

                        break;

                }


                tmp_trgindex = trg2indexes[tmp_lower];

                if (lastpos[tmp_trgindex] == tmp_lower)

                {

                    tmp_ulen2--;

                    if (found[tmp_trgindex])

                        tmp_count--;

                }

            }


            smlr_max = Max(smlr_max, smlr_cur);


            /*

             * if we only check that word similarity is greater than threshold

             * we do not need to calculate a maximum similarity.

             */

            if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)

                break;


            for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)

            {

                int         tmp_trgindex;


                tmp_trgindex = trg2indexes[tmp_lower];

                if (lastpos[tmp_trgindex] == tmp_lower)

                    lastpos[tmp_trgindex] = -1;

            }

        }

    }


    pfree(lastpos);


    return smlr_max;

}


/*

 * Calculate word similarity.

 * This function prepare two arrays: "trg2indexes" and "found". Then this arrays

 * are used to calculate word similarity using iterate_word_similarity().

 *

 * "trg2indexes" is array which stores indexes of the array "found".

 * In other words:

 * trg2indexes[j] = i;

 * found[i] = true (or false);

 * If found[i] == true then there is trigram trg2[j] in array "trg1".

 * If found[i] == false then there is not trigram trg2[j] in array "trg1".

 *

 * str1: search pattern string, of length slen1 bytes.

 * str2: text in which we are looking for a word, of length slen2 bytes.

 * flags: set of boolean flags parameterizing similarity calculation.

 *

 * Returns word similarity.

 */

static float4

calc_word_similarity(char *str1, int slen1, char *str2, int slen2,

                     uint8 flags)

{

    bool       *found;

    pos_trgm   *ptrg;

    trgm       *trg1;

    trgm       *trg2;

    int         len1,

                len2,

                len,

                i,

                j,

                ulen1;

    int        *trg2indexes;

    float4      result;

    TrgmBound  *bounds;


    protect_out_of_mem(slen1 + slen2);


    /* Make positional trigrams */

    trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);

    trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);

    if (flags & WORD_SIMILARITY_STRICT)

        bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);

    else

        bounds = NULL;


    len1 = generate_trgm_only(trg1, str1, slen1, NULL);

    len2 = generate_trgm_only(trg2, str2, slen2, bounds);


    ptrg = make_positional_trgm(trg1, len1, trg2, len2);

    len = len1 + len2;

    qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);


    pfree(trg1);

    pfree(trg2);


    /*

     * Merge positional trigrams array: enumerate each trigram and find its

     * presence in required word.

     */

    trg2indexes = (int *) palloc(sizeof(int) * len2);

    found = (bool *) palloc0(sizeof(bool) * len);


    ulen1 = 0;

    j = 0;

    for (i = 0; i < len; i++)

    {

        if (i > 0)

        {

            int         cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg);


            if (cmp != 0)

            {

                if (found[j])

                    ulen1++;

                j++;

            }

        }


        if (ptrg[i].index >= 0)

        {

            trg2indexes[ptrg[i].index] = j;

        }

        else

        {

            found[j] = true;

        }

    }

    if (found[j])

        ulen1++;


    /* Run iterative procedure to find maximum similarity with word */

    result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,

                                     flags, bounds);


    pfree(trg2indexes);

    pfree(found);

    pfree(ptrg);


    return result;

}


/*

 * Extract the next non-wildcard part of a search string, i.e. a word bounded

 * by '_' or '%' meta-characters, non-word characters or string end.

 *

 * str: source string, of length lenstr bytes (need not be null-terminated)

 * buf: where to return the substring (must be long enough)

 * *bytelen: receives byte length of the found substring

 * *charlen: receives character length of the found substring

 *

 * Returns pointer to end+1 of the found substring in the source string.

 * Returns NULL if no word found (in which case buf, bytelen, charlen not set)

 *

 * If the found word is bounded by non-word characters or string boundaries

 * then this function will include corresponding padding spaces into buf.

 */

static const char *

get_wildcard_part(const char *str, int lenstr,

                  char *buf, int *bytelen, int *charlen)

{

    const char *beginword = str;

    const char *endword;

    char       *s = buf;

    bool        in_leading_wildcard_meta = false;

    bool        in_trailing_wildcard_meta = false;

    bool        in_escape = false;

    int         clen;


    /*

     * Find the first word character, remembering whether preceding character

     * was wildcard meta-character.  Note that the in_escape state persists

     * from this loop to the next one, since we may exit at a word character

     * that is in_escape.

     */

    while (beginword - str < lenstr)

    {

        if (in_escape)

        {

            if (ISWORDCHR(beginword))

                break;

            in_escape = false;

            in_leading_wildcard_meta = false;

        }

        else

        {

            if (ISESCAPECHAR(beginword))

                in_escape = true;

            else if (ISWILDCARDCHAR(beginword))

                in_leading_wildcard_meta = true;

            else if (ISWORDCHR(beginword))

                break;

            else

                in_leading_wildcard_meta = false;

        }

        beginword += pg_mblen(beginword);

    }


    /*

     * Handle string end.

     */

    if (beginword - str >= lenstr)

        return NULL;


    /*

     * Add left padding spaces if preceding character wasn't wildcard

     * meta-character.

     */

    *charlen = 0;

    if (!in_leading_wildcard_meta)

    {

        if (LPADDING > 0)

        {

            *s++ = ' ';

            (*charlen)++;

            if (LPADDING > 1)

            {

                *s++ = ' ';

                (*charlen)++;

            }

        }

    }


    /*

     * Copy data into buf until wildcard meta-character, non-word character or

     * string boundary.  Strip escapes during copy.

     */

    endword = beginword;

    while (endword - str < lenstr)

    {

        clen = pg_mblen(endword);

        if (in_escape)

        {

            if (ISWORDCHR(endword))

            {

                memcpy(s, endword, clen);

                (*charlen)++;

                s += clen;

            }

            else

            {

                /*

                 * Back up endword to the escape character when stopping at an

                 * escaped char, so that subsequent get_wildcard_part will

                 * restart from the escape character.  We assume here that

                 * escape chars are single-byte.

                 */

                endword--;

                break;

            }

            in_escape = false;

        }

        else

        {

            if (ISESCAPECHAR(endword))

                in_escape = true;

            else if (ISWILDCARDCHAR(endword))

            {

                in_trailing_wildcard_meta = true;

                break;

            }

            else if (ISWORDCHR(endword))

            {

                memcpy(s, endword, clen);

                (*charlen)++;

                s += clen;

            }

            else

                break;

        }

        endword += clen;

    }


    /*

     * Add right padding spaces if next character isn't wildcard

     * meta-character.

     */

    if (!in_trailing_wildcard_meta)

    {

        if (RPADDING > 0)

        {

            *s++ = ' ';

            (*charlen)++;

            if (RPADDING > 1)

            {

                *s++ = ' ';

                (*charlen)++;

            }

        }

    }


    *bytelen = s - buf;

    return endword;

}


/*

 * Generates trigrams for wildcard search string.

 *

 * Returns array of trigrams that must occur in any string that matches the

 * wildcard string.  For example, given pattern "a%bcd%" the trigrams

 * " a", "bcd" would be extracted.

 */

TRGM *

generate_wildcard_trgm(const char *str, int slen)

{

    TRGM       *trg;

    char       *buf,

               *buf2;

    trgm       *tptr;

    int         len,

                charlen,

                bytelen;

    const char *eword;


    protect_out_of_mem(slen);


    trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);

    trg->flag = ARRKEY;

    SET_VARSIZE(trg, TRGMHDRSIZE);


    if (slen + LPADDING + RPADDING < 3 || slen == 0)

        return trg;


    tptr = GETARR(trg);


    /* Allocate a buffer for blank-padded, but not yet case-folded, words */

    buf = palloc(sizeof(char) * (slen + 4));


    /*

     * Extract trigrams from each substring extracted by get_wildcard_part.

     */

    eword = str;

    while ((eword = get_wildcard_part(eword, slen - (eword - str),

                                      buf, &bytelen, &charlen)) != NULL)

    {

#ifdef IGNORECASE

        buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);

        bytelen = strlen(buf2);

#else

        buf2 = buf;

#endif


        /*

         * count trigrams

         */

        tptr = make_trigrams(tptr, buf2, bytelen, charlen);


#ifdef IGNORECASE

        pfree(buf2);

#endif

    }


    pfree(buf);


    if ((len = tptr - GETARR(trg)) == 0)

        return trg;


    /*

     * Make trigrams unique.

     */

    if (len > 1)

    {

        qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);

        len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);

    }


    SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));


    return trg;

}


uint32

trgm2int(trgm *ptr)

{

    uint32      val = 0;


    val |= *(((unsigned char *) ptr));

    val <<= 8;

    val |= *(((unsigned char *) ptr) + 1);

    val <<= 8;

    val |= *(((unsigned char *) ptr) + 2);


    return val;

}


Datum

show_trgm(PG_FUNCTION_ARGS)

{

    text       *in = PG_GETARG_TEXT_PP(0);

    TRGM       *trg;

    Datum      *d;

    ArrayType  *a;

    trgm       *ptr;

    int         i;


    trg = generate_trgm(VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));

    d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg)));


    for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++)

    {

        text       *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3));


        if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr))

        {

            snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr));

            SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item)));

        }

        else

        {

            SET_VARSIZE(item, VARHDRSZ + 3);

            CPTRGM(VARDATA(item), ptr);

        }

        d[i] = PointerGetDatum(item);

    }


    a = construct_array_builtin(d, ARRNELEM(trg), TEXTOID);


    for (i = 0; i < ARRNELEM(trg); i++)

        pfree(DatumGetPointer(d[i]));


    pfree(d);

    pfree(trg);

    PG_FREE_IF_COPY(in, 0);


    PG_RETURN_POINTER(a);

}


float4

cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)

{

    trgm       *ptr1,

               *ptr2;

    int         count = 0;

    int         len1,

                len2;


    ptr1 = GETARR(trg1);

    ptr2 = GETARR(trg2);


    len1 = ARRNELEM(trg1);

    len2 = ARRNELEM(trg2);


    /* explicit test is needed to avoid 0/0 division when both lengths are 0 */

    if (len1 <= 0 || len2 <= 0)

        return (float4) 0.0;


    while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)

    {

        int         res = CMPTRGM(ptr1, ptr2);


        if (res < 0)

            ptr1++;

        else if (res > 0)

            ptr2++;

        else

        {

            ptr1++;

            ptr2++;

            count++;

        }

    }


    /*

     * If inexact then len2 is equal to count, because we don't know actual

     * length of second string in inexact search and we can assume that count

     * is a lower bound of len2.

     */

    return CALCSML(count, len1, inexact ? count : len2);

}


/*

 * Returns whether trg2 contains all trigrams in trg1.

 * This relies on the trigram arrays being sorted.

 */

bool

trgm_contained_by(TRGM *trg1, TRGM *trg2)

{

    trgm       *ptr1,

               *ptr2;

    int         len1,

                len2;


    ptr1 = GETARR(trg1);

    ptr2 = GETARR(trg2);


    len1 = ARRNELEM(trg1);

    len2 = ARRNELEM(trg2);


    while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)

    {

        int         res = CMPTRGM(ptr1, ptr2);


        if (res < 0)

            return false;

        else if (res > 0)

            ptr2++;

        else

        {

            ptr1++;

            ptr2++;

        }

    }

    if (ptr1 - GETARR(trg1) < len1)

        return false;

    else

        return true;

}


/*

 * Return a palloc'd boolean array showing, for each trigram in "query",

 * whether it is present in the trigram array "key".

 * This relies on the "key" array being sorted, but "query" need not be.

 */

bool *

trgm_presence_map(TRGM *query, TRGM *key)

{

    bool       *result;

    trgm       *ptrq = GETARR(query),

               *ptrk = GETARR(key);

    int         lenq = ARRNELEM(query),

                lenk = ARRNELEM(key),

                i;


    result = (bool *) palloc0(lenq * sizeof(bool));


    /* for each query trigram, do a binary search in the key array */

    for (i = 0; i < lenq; i++)

    {

        int         lo = 0;

        int         hi = lenk;


        while (lo < hi)

        {

            int         mid = (lo + hi) / 2;

            int         res = CMPTRGM(ptrq, ptrk + mid);


            if (res < 0)

                hi = mid;

            else if (res > 0)

                lo = mid + 1;

            else

            {

                result[i] = true;

                break;

            }

        }

        ptrq++;

    }


    return result;

}


Datum

similarity(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    TRGM       *trg1,

               *trg2;

    float4      res;


    trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1));

    trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2));


    res = cnt_sml(trg1, trg2, false);


    pfree(trg1);

    pfree(trg2);

    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);


    PG_RETURN_FLOAT4(res);

}


Datum

word_similarity(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               0);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_FLOAT4(res);

}


Datum

strict_word_similarity(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               WORD_SIMILARITY_STRICT);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_FLOAT4(res);

}


Datum

similarity_dist(PG_FUNCTION_ARGS)

{

    float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,

                                                         PG_GETARG_DATUM(0),

                                                         PG_GETARG_DATUM(1)));


    PG_RETURN_FLOAT4(1.0 - res);

}


Datum

similarity_op(PG_FUNCTION_ARGS)

{

    float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,

                                                         PG_GETARG_DATUM(0),

                                                         PG_GETARG_DATUM(1)));


    PG_RETURN_BOOL(res >= similarity_threshold);

}


Datum

word_similarity_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               WORD_SIMILARITY_CHECK_ONLY);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_BOOL(res >= word_similarity_threshold);

}


Datum

word_similarity_commutator_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               WORD_SIMILARITY_CHECK_ONLY);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_BOOL(res >= word_similarity_threshold);

}


Datum

word_similarity_dist_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               0);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_FLOAT4(1.0 - res);

}


Datum

word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               0);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_FLOAT4(1.0 - res);

}


Datum

strict_word_similarity_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_BOOL(res >= strict_word_similarity_threshold);

}


Datum

strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_BOOL(res >= strict_word_similarity_threshold);

}


Datum

strict_word_similarity_dist_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               WORD_SIMILARITY_STRICT);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_FLOAT4(1.0 - res);

}


Datum

strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)

{

    text       *in1 = PG_GETARG_TEXT_PP(0);

    text       *in2 = PG_GETARG_TEXT_PP(1);

    float4      res;


    res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),

                               VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),

                               WORD_SIMILARITY_STRICT);


    PG_FREE_IF_COPY(in1, 0);

    PG_FREE_IF_COPY(in2, 1);

    PG_RETURN_FLOAT4(1.0 - res);

}

construct_array_builtin
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
Definition: arrayfuncs.c:3381

uint8
uint8_t uint8
Definition: c.h:500

Max
#define Max(x, y)
Definition: c.h:969

VARHDRSZ
#define VARHDRSZ
Definition: c.h:663

uint32
uint32_t uint32
Definition: c.h:502

float4
float float4
Definition: c.h:600

Size
size_t Size
Definition: c.h:576

errcode
int errcode(int sqlerrcode)
Definition: elog.c:854

errmsg
int errmsg(const char *fmt,...)
Definition: elog.c:1071

ERROR
#define ERROR
Definition: elog.h:39

elog
#define elog(elevel,...)
Definition: elog.h:225

ereport
#define ereport(elevel,...)
Definition: elog.h:149

MaxAllocSize
#define MaxAllocSize
Definition: fe_memutils.h:22

OidOutputFunctionCall
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1763

PG_FREE_IF_COPY
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260

PG_GETARG_TEXT_PP
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309

DirectFunctionCall2
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:684

PG_GETARG_DATUM
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268

PG_GETARG_FLOAT4
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:281

PG_RETURN_POINTER
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361

PG_RETURN_FLOAT4
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:366

PG_FUNCTION_ARGS
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193

PG_RETURN_BOOL
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359

str_tolower
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1637

formatting.h

DefineCustomRealVariable
void DefineCustomRealVariable(const char *name, const char *short_desc, const char *long_desc, double *valueAddr, double bootValue, double minValue, double maxValue, GucContext context, int flags, GucRealCheckHook check_hook, GucRealAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5189

SetConfigOption
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4332

MarkGUCPrefixReserved
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5280

guc.h

PGC_S_SESSION
@ PGC_S_SESSION
Definition: guc.h:126

PGC_USERSET
@ PGC_USERSET
Definition: guc.h:79

Assert
Assert(PointerIsAligned(start, uint64))

str
const char * str
Definition: hashfn_unstable.h:254

CALCGTSIZE
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:60

val
long val
Definition: informix.c:689

int.h

pg_cmp_s32
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:646

b
int b
Definition: isn.c:74

a
int a
Definition: isn.c:73

j
int j
Definition: isn.c:78

i
int i
Definition: isn.c:77

getTypeOutputInfo
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:3047

lsyscache.h

pg_database_encoding_max_length
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1547

pg_mblen
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024

pfree
void pfree(void *pointer)
Definition: mcxt.c:1528

palloc0
void * palloc0(Size size)
Definition: mcxt.c:1351

palloc
void * palloc(Size size)
Definition: mcxt.c:1321

memutils.h

miscadmin.h

CHECK_FOR_INTERRUPTS
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122

sort-test.key
key
Definition: sort-test.py:19

lower
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49

upper
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80

len
const void size_t len
Definition: pg_crc32c_sse42.c:28

crc
return crc
Definition: pg_crc32c_sse42.c:72

pg_crc.h

pg_crc32
uint32 pg_crc32
Definition: pg_crc.h:37

INIT_LEGACY_CRC32
#define INIT_LEGACY_CRC32(crc)
Definition: pg_crc.h:79

COMP_LEGACY_CRC32
#define COMP_LEGACY_CRC32(crc, data, len)
Definition: pg_crc.h:81

FIN_LEGACY_CRC32
#define FIN_LEGACY_CRC32(crc)
Definition: pg_crc.h:80

buf
static char * buf
Definition: pg_test_fsync.c:72

pg_type.h

snprintf
#define snprintf
Definition: port.h:239

qsort
#define qsort(a, b, c, d)
Definition: port.h:479

postgres.h

PointerGetDatum
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327

Float4GetDatum
static Datum Float4GetDatum(float4 X)
Definition: postgres.h:480

Datum
uintptr_t Datum
Definition: postgres.h:69

DatumGetFloat4
static float4 DatumGetFloat4(Datum X)
Definition: postgres.h:463

DatumGetPointer
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317

Oid
unsigned int Oid
Definition: postgres_ext.h:30

qunique.h

qunique
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21

cmp
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743

StrategyNumber
uint16 StrategyNumber
Definition: stratnum.h:22

ArrayType
Definition: array.h:93

TRGM
Definition: trgm.h:58

TRGM::flag
uint8 flag
Definition: trgm.h:60

index
Definition: type.h:96

pos_trgm
Definition: trgm_op.c:53

pos_trgm::index
int index
Definition: trgm_op.c:55

pos_trgm::trg
trgm trg
Definition: trgm_op.c:54

varlena
Definition: c.h:658

trgm.h

CALCSML
#define CALCSML(count, len1, len2)
Definition: trgm.h:107

ISWORDCHR
#define ISWORDCHR(c)
Definition: trgm.h:50

WordSimilarityStrategyNumber
#define WordSimilarityStrategyNumber
Definition: trgm.h:35

StrictWordSimilarityStrategyNumber
#define StrictWordSimilarityStrategyNumber
Definition: trgm.h:37

ISESCAPECHAR
#define ISESCAPECHAR(x)
Definition: trgm.h:54

ARRNELEM
#define ARRNELEM(x)
Definition: trgm.h:98

RPADDING
#define RPADDING
Definition: trgm.h:17

LPADDING
#define LPADDING
Definition: trgm.h:16

SimilarityStrategyNumber
#define SimilarityStrategyNumber
Definition: trgm.h:29

ISWILDCARDCHAR
#define ISWILDCARDCHAR(x)
Definition: trgm.h:55

trgm
char trgm[3]
Definition: trgm.h:41

CPTRGM
#define CPTRGM(a, b)
Definition: trgm.h:43

ISPRINTABLETRGM
#define ISPRINTABLETRGM(t)
Definition: trgm.h:52

GETARR
#define GETARR(x)
Definition: trgm.h:97

ARRKEY
#define ARRKEY
Definition: trgm.h:87

TRGMHDRSIZE
#define TRGMHDRSIZE
Definition: trgm.h:64

iterate_word_similarity
static float4 iterate_word_similarity(int *trg2indexes, bool *found, int ulen1, int len2, int len, uint8 flags, TrgmBound *bounds)
Definition: trgm_op.c:503

TrgmBound
uint8 TrgmBound
Definition: trgm_op.c:59

strict_word_similarity_commutator_op
Datum strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1321

CMPTRGM_UNSIGNED
static int CMPTRGM_UNSIGNED(const void *a, const void *b)
Definition: trgm_op.c:131

_PG_init
void _PG_init(void)
Definition: trgm_op.c:71

set_limit
Datum set_limit(PG_FUNCTION_ARGS)
Definition: trgm_op.c:160

comp_trgm
static int comp_trgm(const void *a, const void *b)
Definition: trgm_op.c:211

strict_word_similarity_threshold
double strict_word_similarity_threshold
Definition: trgm_op.c:29

generate_trgm
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:406

trgm2int
uint32 trgm2int(trgm *ptr)
Definition: trgm_op.c:985

PG_MODULE_MAGIC_EXT
PG_MODULE_MAGIC_EXT(.name="pg_trgm",.version=PG_VERSION)

CMPTRGM_CHOOSE
static int CMPTRGM_CHOOSE(const void *a, const void *b)
Definition: trgm_op.c:145

CMPTRGM
int(* CMPTRGM)(const void *a, const void *b)
Definition: trgm_op.c:49

WORD_SIMILARITY_CHECK_ONLY
#define WORD_SIMILARITY_CHECK_ONLY
Definition: trgm_op.c:64

protect_out_of_mem
static void protect_out_of_mem(int slen)
Definition: trgm_op.c:389

word_similarity
Datum word_similarity(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1189

strict_word_similarity_dist_commutator_op
Datum strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1353

compact_trigram
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:248

trgm_presence_map
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1128

PG_FUNCTION_INFO_V1
PG_FUNCTION_INFO_V1(set_limit)

calc_word_similarity
static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, uint8 flags)
Definition: trgm_op.c:671

word_similarity_threshold
double word_similarity_threshold
Definition: trgm_op.c:28

word_similarity_dist_commutator_op
Datum word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1289

index_strategy_get_limit
double index_strategy_get_limit(StrategyNumber strategy)
Definition: trgm_op.c:182

similarity
Datum similarity(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1167

make_positional_trgm
static pos_trgm * make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
Definition: trgm_op.c:447

similarity_threshold
double similarity_threshold
Definition: trgm_op.c:27

CMPTRGM_SIGNED
static int CMPTRGM_SIGNED(const void *a, const void *b)
Definition: trgm_op.c:121

comp_ptrgm
static int comp_ptrgm(const void *v1, const void *v2)
Definition: trgm_op.c:474

find_word
static char * find_word(char *str, int lenstr, char **endword, int *charlen)
Definition: trgm_op.c:221

show_trgm
Datum show_trgm(PG_FUNCTION_ARGS)
Definition: trgm_op.c:999

trgm_contained_by
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:1089

CMPPCHAR_S
#define CMPPCHAR_S(a, b, i)

TRGM_BOUND_RIGHT
#define TRGM_BOUND_RIGHT
Definition: trgm_op.c:61

show_limit
Datum show_limit(PG_FUNCTION_ARGS)
Definition: trgm_op.c:205

CMPPCHAR_UNS
#define CMPPCHAR_UNS(a, b, i)

strict_word_similarity_dist_op
Datum strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1337

word_similarity_op
Datum word_similarity_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1241

word_similarity_commutator_op
Datum word_similarity_commutator_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1257

WORD_SIMILARITY_STRICT
#define WORD_SIMILARITY_STRICT
Definition: trgm_op.c:65

TRGM_BOUND_LEFT
#define TRGM_BOUND_LEFT
Definition: trgm_op.c:60

generate_wildcard_trgm
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:916

make_trigrams
static trgm * make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
Definition: trgm_op.c:273

cnt_sml
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:1041

generate_trgm_only
static int generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
Definition: trgm_op.c:325

similarity_op
Datum similarity_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1231

word_similarity_dist_op
Datum word_similarity_dist_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1273

get_wildcard_part
static const char * get_wildcard_part(const char *str, int lenstr, char *buf, int *bytelen, int *charlen)
Definition: trgm_op.c:771

strict_word_similarity
Datum strict_word_similarity(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1205

similarity_dist
Datum similarity_dist(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1221

strict_word_similarity_op
Datum strict_word_similarity_op(PG_FUNCTION_ARGS)
Definition: trgm_op.c:1305

ts_locale.h

VARDATA
#define VARDATA(PTR)
Definition: varatt.h:278

VARDATA_ANY
#define VARDATA_ANY(PTR)
Definition: varatt.h:324

SET_VARSIZE
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

VARSIZE_ANY_EXHDR
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

name
const char * name
Definition: wait_event_funcs.c:28

GetDefaultCharSignedness
bool GetDefaultCharSignedness(void)
Definition: xlog.c:4768