194#include "catalog/pg_collation_d.h"
221#define MAX_EXPANDED_STATES 128
222#define MAX_EXPANDED_ARCS 1024
223#define MAX_TRGM_COUNT 256
224#define WISH_TRGM_PENALTY 16
225#define COLOR_COUNT_LIMIT 256
288#define COLOR_UNKNOWN (-3)
289#define COLOR_BLANK (-4)
331#define TSTATE_INIT 0x01
332#define TSTATE_FIN 0x02
484 int cflags,
Oid collation);
506#ifdef TRGM_REGEXP_DEBUG
508static void printTrgmNFA(
TrgmNFA *trgmNFA);
539 "createTrgmNFA temporary context",
573 trgmNFA.
regex = regex;
578#ifdef TRGM_REGEXP_DEBUG
587#ifdef TRGM_REGEXP_DEBUG
588 printTrgmNFA(&trgmNFA);
613#ifdef TRGM_REGEXP_DEBUG
614 printTrgmPackedGraph(*graph, trg);
653 for (k =
j; k <
j + cnt; k++)
680 while (queueIn < queueOut)
684 int cnt =
state->arcsCount;
687 for (
i = 0;
i < cnt;
i++)
698 int nextstate =
arc->targetState;
748 pg_regerror(regcomp_result, regex, errMsg,
sizeof(errMsg));
750 (
errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
751 errmsg(
"invalid regular expression: %s", errMsg)));
770 trgmNFA->
ncolors = colorsCount;
778 for (
i = 0;
i < colorsCount;
i++)
808 for (
j = 0;
j < charsCount;
j++)
843 memset(s, 0,
sizeof(s));
861 char *lowerCased =
str_tolower(s, strlen(s), DEFAULT_COLLATION_OID);
863 if (strcmp(lowerCased, s) != 0)
919 MemSet(&initkey, 0,
sizeof(initkey));
924 initstate =
getState(trgmNFA, &initkey);
935 foreach(lc, trgmNFA->
queue)
1021 MemSet(&destKey, 0,
sizeof(destKey));
1028 foreach(cell,
state->enterKeys)
1069 for (
i = 0;
i < arcsCount;
i++)
1103 else if (
arc->
co >= 0)
1202 MemSet(&destKey, 0,
sizeof(destKey));
1213 foreach(cell,
state->enterKeys)
1221 for (
i = 0;
i < arcsCount;
i++)
1304 foreach(cell,
state->enterKeys)
1316 arc->ctrgm.colors[0] =
key->prefix.colors[0];
1317 arc->ctrgm.colors[1] =
key->prefix.colors[1];
1318 arc->ctrgm.colors[2] = co;
1400 state->parent = NULL;
1401 state->tentFlags = 0;
1402 state->tentParent = NULL;
1465 int64 totalTrgmCount;
1479 foreach(cell,
state->arcs)
1508 for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1540 totalTrgmPenalty = 0.0f;
1548 for (
j = 0;
j < 3;
j++)
1558 trgmInfo->
count = count;
1559 totalTrgmCount += count;
1561 totalTrgmPenalty += trgmInfo->
penalty;
1582 bool canRemove =
true;
1589#ifdef TRGM_REGEXP_DEBUG
1590 fprintf(stderr,
"considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1602 foreach(cell, trgmInfo->
arcs)
1606 *target = arcInfo->
target;
1610#ifdef TRGM_REGEXP_DEBUG
1611 fprintf(stderr,
"examining arc to s%d (%x) from s%d (%x)\n",
1612 -target->snumber, target->flags,
1619 while (target->parent)
1620 target = target->parent;
1622#ifdef TRGM_REGEXP_DEBUG
1623 fprintf(stderr,
" ... after completed merges: to s%d (%x) from s%d (%x)\n",
1624 -target->snumber, target->flags,
1630 while (
source->tentParent)
1635 target_flags = target->flags | target->tentFlags;
1636 while (target->tentParent)
1638 target = target->tentParent;
1639 target_flags |= target->flags | target->tentFlags;
1642#ifdef TRGM_REGEXP_DEBUG
1643 fprintf(stderr,
" ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1644 -target->snumber, target_flags,
1645 -
source->snumber, source_flags);
1659#ifdef TRGM_REGEXP_DEBUG
1660 fprintf(stderr,
" ... tentatively merging s%d into s%d\n",
1661 -target->snumber, -
source->snumber);
1663 target->tentParent =
source;
1664 source->tentFlags |= target_flags;
1679 foreach(cell, trgmInfo->
arcs)
1683 *target = arcInfo->
target;
1689 while (target->parent)
1690 target = target->parent;
1698 while ((ttarget = target->
tentParent) != NULL)
1700 target->tentParent = NULL;
1701 target->tentFlags = 0;
1709#ifdef TRGM_REGEXP_DEBUG
1710 fprintf(stderr,
" ... not ok to merge\n");
1716 foreach(cell, trgmInfo->
arcs)
1720 *target = arcInfo->
target;
1724 while (target->parent)
1725 target = target->parent;
1728#ifdef TRGM_REGEXP_DEBUG
1729 fprintf(stderr,
"merging s%d into s%d\n",
1730 -target->snumber, -
source->snumber);
1741 totalTrgmCount -= trgmInfo->
count;
1742 totalTrgmPenalty -= trgmInfo->
penalty;
1760 if (colorTrgms[
i].expanded)
1786 memset(blankChar.
bytes, 0,
sizeof(blankChar.
bytes));
1813 for (
j = 0;
j < 3;
j++)
1822 for (i1 = 0; i1 <
c[0]->wordCharsCount; i1++)
1824 s[0] =
c[0]->wordChars[i1];
1825 for (i2 = 0; i2 <
c[1]->wordCharsCount; i2++)
1827 s[1] =
c[1]->wordChars[i2];
1828 for (i3 = 0; i3 <
c[2]->wordCharsCount; i3++)
1830 s[2] =
c[2]->wordChars[i3];
1855 for (
i = 0;
i < 3;
i++)
1857 if (s[
i].bytes[0] != 0)
1860 *p++ = s[
i].bytes[
j];
1879 Assert(state1 != state2);
1912 if (penalty1 < penalty2)
1914 else if (penalty1 == penalty2)
1949 while (
state->parent)
1952 if (
state->snumber < 0)
1960 state->snumber = snumber;
1979 foreach(cell,
state->arcs)
2018 for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2026 arcsCount = (p2 - arcs) + 1;
2029 arcsCount = arcIndex;
2061 for (
i = 0;
i < snumber;
i++)
2066 while (
j < arcsCount && arcs[
j].sourceState ==
i)
2121#ifdef TRGM_REGEXP_DEBUG
2153 for (
i = 0;
i < arcsCount;
i++)
2156 state, arcs[
i].to, arcs[
i].co);
2170 for (
i = 0;
i < ncolors;
i++)
2176 if (
color->expandable)
2178 for (
j = 0;
j <
color->wordCharsCount;
j++)
2198 FILE *fp = fopen(
"/tmp/source.gv",
"w");
2235 foreach(cell,
state->arcs)
2240 -
state->snumber, -
arc->target->snumber);
2241 printTrgmColor(&
buf,
arc->ctrgm.colors[0]);
2243 printTrgmColor(&
buf,
arc->ctrgm.colors[1]);
2245 printTrgmColor(&
buf,
arc->ctrgm.colors[2]);
2260 FILE *fp = fopen(
"/tmp/transformed.gv",
"w");
2308 for (
j = 0;
j <
state->arcsCount;
j++)
2313 i,
arc->targetState,
arc->colorTrgm);
2332 for (
j = 0;
j < count;
j++)
2351 FILE *fp = fopen(
"/tmp/packed.gv",
"w");
#define MemSet(start, val, len)
#define fprintf(file, fmt, msg)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
void * hash_seq_search(HASH_SEQ_STATUS *status)
long hash_get_num_entries(HTAB *hashp)
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
Assert(PointerIsAligned(start, uint64))
static const FormData_pg_attribute a1
static const FormData_pg_attribute a2
#define CALCGTSIZE(flag, siglen)
List * lappend(List *list, void *datum)
List * list_concat(List *list1, const List *list2)
void list_free(List *list)
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
void * MemoryContextAlloc(MemoryContext context, Size size)
void * MemoryContextAllocZero(MemoryContext context, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
MemoryContext CurrentMemoryContext
void MemoryContextDelete(MemoryContext context)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
static int list_length(const List *l)
#define foreach_delete_current(lst, var_or_cell)
static rewind_source * source
#define MAX_MULTIBYTE_CHAR_LEN
#define qsort(a, b, c, d)
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
int pg_reg_getnumcolors(const regex_t *regex)
int pg_reg_getnumstates(const regex_t *regex)
int pg_reg_getfinalstate(const regex_t *regex)
int pg_reg_getinitialstate(const regex_t *regex)
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
int pg_reg_colorisend(const regex_t *regex, int co)
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
int pg_reg_getnumcharacters(const regex_t *regex, int co)
int pg_reg_colorisbegin(const regex_t *regex, int co)
void appendStringInfo(StringInfo str, const char *fmt,...)
void appendStringInfoString(StringInfo str, const char *s)
void appendStringInfoChar(StringInfo str, char ch)
void initStringInfo(StringInfo str)
ColorTrgmInfo * colorTrgms
TrgmColorInfo * colorInfo
bool * colorTrigramsActive
struct TrgmState * parent
struct TrgmState * tentParent
char bytes[MAX_MULTIBYTE_CHAR_LEN]
void compact_trigram(trgm *tptr, char *str, int bytelen)
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
#define MAX_EXPANDED_STATES
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
#define MAX_EXPANDED_ARCS
struct TrgmState TrgmState
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
static void transformGraph(TrgmNFA *trgmNFA)
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
static const float4 penalties[8]
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
static int packArcInfoCmp(const void *a1, const void *a2)
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
#define COLOR_COUNT_LIMIT
#define WISH_TRGM_PENALTY
static void mergeStates(TrgmState *state1, TrgmState *state2)
static int colorTrgmInfoCmp(const void *p1, const void *p2)
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)
static char chars[TZ_MAX_CHARS]