219 #define MAX_EXPANDED_STATES 128
220 #define MAX_EXPANDED_ARCS 1024
221 #define MAX_TRGM_COUNT 256
222 #define WISH_TRGM_PENALTY 16
223 #define COLOR_COUNT_LIMIT 256
286 #define COLOR_UNKNOWN (-3)
287 #define COLOR_BLANK (-4)
329 #define TSTATE_INIT 0x01
330 #define TSTATE_FIN 0x02
482 int cflags,
Oid collation);
504 #ifdef TRGM_REGEXP_DEBUG
506 static void printTrgmNFA(
TrgmNFA *trgmNFA);
537 "createTrgmNFA temporary context",
571 trgmNFA.
regex = regex;
576 #ifdef TRGM_REGEXP_DEBUG
585 #ifdef TRGM_REGEXP_DEBUG
586 printTrgmNFA(&trgmNFA);
611 #ifdef TRGM_REGEXP_DEBUG
612 printTrgmPackedGraph(*graph, trg);
651 for (k =
j; k <
j + cnt; k++)
678 while (queueIn < queueOut)
682 int cnt =
state->arcsCount;
685 for (
i = 0;
i < cnt;
i++)
696 int nextstate =
arc->targetState;
746 pg_regerror(regcomp_result, regex, errMsg,
sizeof(errMsg));
748 (
errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
749 errmsg(
"invalid regular expression: %s", errMsg)));
768 trgmNFA->
ncolors = colorsCount;
776 for (
i = 0;
i < colorsCount;
i++)
806 for (
j = 0;
j < charsCount;
j++)
841 memset(s, 0,
sizeof(s));
861 if (strcmp(lowerCased, s) != 0)
917 MemSet(&initkey, 0,
sizeof(initkey));
922 initstate =
getState(trgmNFA, &initkey);
933 foreach(lc, trgmNFA->
queue)
1019 MemSet(&destKey, 0,
sizeof(destKey));
1026 foreach(cell,
state->enterKeys)
1067 for (
i = 0;
i < arcsCount;
i++)
1101 else if (
arc->
co >= 0)
1200 MemSet(&destKey, 0,
sizeof(destKey));
1211 foreach(cell,
state->enterKeys)
1219 for (
i = 0;
i < arcsCount;
i++)
1302 foreach(cell,
state->enterKeys)
1314 arc->ctrgm.colors[0] =
key->prefix.colors[0];
1315 arc->ctrgm.colors[1] =
key->prefix.colors[1];
1316 arc->ctrgm.colors[2] = co;
1398 state->parent = NULL;
1399 state->tentFlags = 0;
1400 state->tentParent = NULL;
1463 int64 totalTrgmCount;
1477 foreach(cell,
state->arcs)
1506 for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1538 totalTrgmPenalty = 0.0f;
1546 for (
j = 0;
j < 3;
j++)
1556 trgmInfo->
count = count;
1557 totalTrgmCount += count;
1559 totalTrgmPenalty += trgmInfo->
penalty;
1580 bool canRemove =
true;
1587 #ifdef TRGM_REGEXP_DEBUG
1588 fprintf(stderr,
"considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1600 foreach(cell, trgmInfo->
arcs)
1604 *target = arcInfo->
target;
1608 #ifdef TRGM_REGEXP_DEBUG
1609 fprintf(stderr,
"examining arc to s%d (%x) from s%d (%x)\n",
1610 -target->snumber, target->flags,
1617 while (target->parent)
1618 target = target->parent;
1620 #ifdef TRGM_REGEXP_DEBUG
1621 fprintf(stderr,
" ... after completed merges: to s%d (%x) from s%d (%x)\n",
1622 -target->snumber, target->flags,
1628 while (
source->tentParent)
1633 target_flags = target->flags | target->tentFlags;
1634 while (target->tentParent)
1636 target = target->tentParent;
1637 target_flags |= target->flags | target->tentFlags;
1640 #ifdef TRGM_REGEXP_DEBUG
1641 fprintf(stderr,
" ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1642 -target->snumber, target_flags,
1643 -
source->snumber, source_flags);
1657 #ifdef TRGM_REGEXP_DEBUG
1658 fprintf(stderr,
" ... tentatively merging s%d into s%d\n",
1659 -target->snumber, -
source->snumber);
1661 target->tentParent =
source;
1662 source->tentFlags |= target_flags;
1677 foreach(cell, trgmInfo->
arcs)
1681 *target = arcInfo->
target;
1687 while (target->parent)
1688 target = target->parent;
1696 while ((ttarget = target->
tentParent) != NULL)
1698 target->tentParent = NULL;
1699 target->tentFlags = 0;
1707 #ifdef TRGM_REGEXP_DEBUG
1708 fprintf(stderr,
" ... not ok to merge\n");
1714 foreach(cell, trgmInfo->
arcs)
1718 *target = arcInfo->
target;
1722 while (target->parent)
1723 target = target->parent;
1726 #ifdef TRGM_REGEXP_DEBUG
1727 fprintf(stderr,
"merging s%d into s%d\n",
1728 -target->snumber, -
source->snumber);
1739 totalTrgmCount -= trgmInfo->
count;
1740 totalTrgmPenalty -= trgmInfo->
penalty;
1758 if (colorTrgms[
i].expanded)
1784 memset(blankChar.
bytes, 0,
sizeof(blankChar.
bytes));
1811 for (
j = 0;
j < 3;
j++)
1820 for (i1 = 0; i1 <
c[0]->wordCharsCount; i1++)
1822 s[0] =
c[0]->wordChars[i1];
1823 for (i2 = 0; i2 <
c[1]->wordCharsCount; i2++)
1825 s[1] =
c[1]->wordChars[i2];
1826 for (i3 = 0; i3 <
c[2]->wordCharsCount; i3++)
1828 s[2] =
c[2]->wordChars[i3];
1853 for (
i = 0;
i < 3;
i++)
1855 if (s[
i].bytes[0] != 0)
1858 *p++ = s[
i].bytes[
j];
1877 Assert(state1 != state2);
1910 if (penalty1 < penalty2)
1912 else if (penalty1 == penalty2)
1947 while (
state->parent)
1950 if (
state->snumber < 0)
1958 state->snumber = snumber;
1977 foreach(cell,
state->arcs)
2016 for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2024 arcsCount = (
p2 - arcs) + 1;
2027 arcsCount = arcIndex;
2059 for (
i = 0;
i < snumber;
i++)
2064 while (
j < arcsCount && arcs[
j].sourceState ==
i)
2119 #ifdef TRGM_REGEXP_DEBUG
2151 for (
i = 0;
i < arcsCount;
i++)
2154 state, arcs[
i].to, arcs[
i].co);
2168 for (
i = 0;
i < ncolors;
i++)
2174 if (
color->expandable)
2176 for (
j = 0;
j <
color->wordCharsCount;
j++)
2196 FILE *fp = fopen(
"/tmp/source.gv",
"w");
2209 printTrgmNFA(
TrgmNFA *trgmNFA)
2233 foreach(cell,
state->arcs)
2238 -
state->snumber, -
arc->target->snumber);
2239 printTrgmColor(&
buf,
arc->ctrgm.colors[0]);
2241 printTrgmColor(&
buf,
arc->ctrgm.colors[1]);
2243 printTrgmColor(&
buf,
arc->ctrgm.colors[2]);
2258 FILE *fp = fopen(
"/tmp/transformed.gv",
"w");
2306 for (
j = 0;
j <
state->arcsCount;
j++)
2311 i,
arc->targetState,
arc->colorTrgm);
2330 for (
j = 0;
j < count;
j++)
2349 FILE *fp = fopen(
"/tmp/packed.gv",
"w");
#define Assert(condition)
#define MemSet(start, val, len)
static void PGresult const char * p2
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
long hash_get_num_entries(HTAB *hashp)
void * hash_seq_search(HASH_SEQ_STATUS *status)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
static const FormData_pg_attribute a1
static const FormData_pg_attribute a2
#define CALCGTSIZE(flag, siglen)
List * lappend(List *list, void *datum)
void list_free(List *list)
List * list_concat(List *list1, const List *list2)
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
void pfree(void *pointer)
void * palloc0(Size size)
void * MemoryContextAllocZero(MemoryContext context, Size size)
MemoryContext CurrentMemoryContext
void * MemoryContextAlloc(MemoryContext context, Size size)
void MemoryContextDelete(MemoryContext context)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
static int list_length(const List *l)
#define foreach_delete_current(lst, var_or_cell)
static rewind_source * source
#define MAX_MULTIBYTE_CHAR_LEN
#define qsort(a, b, c, d)
MemoryContextSwitchTo(old_ctx)
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
int pg_reg_getnumcolors(const regex_t *regex)
int pg_reg_getnumstates(const regex_t *regex)
int pg_reg_getfinalstate(const regex_t *regex)
int pg_reg_getinitialstate(const regex_t *regex)
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
int pg_reg_colorisend(const regex_t *regex, int co)
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
int pg_reg_getnumcharacters(const regex_t *regex, int co)
int pg_reg_colorisbegin(const regex_t *regex, int co)
void appendStringInfo(StringInfo str, const char *fmt,...)
void appendStringInfoString(StringInfo str, const char *s)
void appendStringInfoChar(StringInfo str, char ch)
void initStringInfo(StringInfo str)
ColorTrgmInfo * colorTrgms
TrgmColorInfo * colorInfo
bool * colorTrigramsActive
struct TrgmState * parent
struct TrgmState * tentParent
char bytes[MAX_MULTIBYTE_CHAR_LEN]
void compact_trigram(trgm *tptr, char *str, int bytelen)
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
#define MAX_EXPANDED_STATES
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
#define MAX_EXPANDED_ARCS
struct TrgmState TrgmState
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
static void transformGraph(TrgmNFA *trgmNFA)
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
static const float4 penalties[8]
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
static int packArcInfoCmp(const void *a1, const void *a2)
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
#define COLOR_COUNT_LIMIT
#define WISH_TRGM_PENALTY
static void mergeStates(TrgmState *state1, TrgmState *state2)
static int colorTrgmInfoCmp(const void *p1, const void *p2)
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
char * lowerstr(const char *str)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)
static char chars[TZ_MAX_CHARS]