219 #define MAX_EXPANDED_STATES 128
220 #define MAX_EXPANDED_ARCS 1024
221 #define MAX_TRGM_COUNT 256
222 #define WISH_TRGM_PENALTY 16
223 #define COLOR_COUNT_LIMIT 256
286 #define COLOR_UNKNOWN (-3)
287 #define COLOR_BLANK (-4)
329 #define TSTATE_INIT 0x01
330 #define TSTATE_FIN 0x02
482 int cflags,
Oid collation);
504 #ifdef TRGM_REGEXP_DEBUG
506 static void printTrgmNFA(
TrgmNFA *trgmNFA);
537 "createTrgmNFA temporary context",
584 trgmNFA.
regex = regex;
589 #ifdef TRGM_REGEXP_DEBUG
598 #ifdef TRGM_REGEXP_DEBUG
599 printTrgmNFA(&trgmNFA);
624 #ifdef TRGM_REGEXP_DEBUG
625 printTrgmPackedGraph(*graph, trg);
664 for (k =
j; k <
j + cnt; k++)
691 while (queueIn < queueOut)
695 int cnt =
state->arcsCount;
698 for (
i = 0;
i < cnt;
i++)
709 int nextstate =
arc->targetState;
759 pg_regerror(regcomp_result, regex, errMsg,
sizeof(errMsg));
761 (
errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
762 errmsg(
"invalid regular expression: %s", errMsg)));
781 trgmNFA->
ncolors = colorsCount;
789 for (
i = 0;
i < colorsCount;
i++)
819 for (
j = 0;
j < charsCount;
j++)
854 memset(s, 0,
sizeof(s));
874 if (strcmp(lowerCased, s) != 0)
930 MemSet(&initkey, 0,
sizeof(initkey));
935 initstate =
getState(trgmNFA, &initkey);
946 foreach(lc, trgmNFA->
queue)
1032 MemSet(&destKey, 0,
sizeof(destKey));
1039 foreach(cell,
state->enterKeys)
1080 for (
i = 0;
i < arcsCount;
i++)
1114 else if (
arc->
co >= 0)
1213 MemSet(&destKey, 0,
sizeof(destKey));
1224 foreach(cell,
state->enterKeys)
1232 for (
i = 0;
i < arcsCount;
i++)
1315 foreach(cell,
state->enterKeys)
1327 arc->ctrgm.colors[0] =
key->prefix.colors[0];
1328 arc->ctrgm.colors[1] =
key->prefix.colors[1];
1329 arc->ctrgm.colors[2] = co;
1411 state->parent = NULL;
1412 state->tentFlags = 0;
1413 state->tentParent = NULL;
1476 int64 totalTrgmCount;
1490 foreach(cell,
state->arcs)
1519 for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1551 totalTrgmPenalty = 0.0f;
1559 for (
j = 0;
j < 3;
j++)
1569 trgmInfo->
count = count;
1570 totalTrgmCount += count;
1572 totalTrgmPenalty += trgmInfo->
penalty;
1593 bool canRemove =
true;
1600 #ifdef TRGM_REGEXP_DEBUG
1601 fprintf(stderr,
"considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1613 foreach(cell, trgmInfo->
arcs)
1617 *target = arcInfo->
target;
1621 #ifdef TRGM_REGEXP_DEBUG
1622 fprintf(stderr,
"examining arc to s%d (%x) from s%d (%x)\n",
1623 -target->snumber, target->flags,
1630 while (target->parent)
1631 target = target->parent;
1633 #ifdef TRGM_REGEXP_DEBUG
1634 fprintf(stderr,
" ... after completed merges: to s%d (%x) from s%d (%x)\n",
1635 -target->snumber, target->flags,
1641 while (
source->tentParent)
1646 target_flags = target->flags | target->tentFlags;
1647 while (target->tentParent)
1649 target = target->tentParent;
1650 target_flags |= target->flags | target->tentFlags;
1653 #ifdef TRGM_REGEXP_DEBUG
1654 fprintf(stderr,
" ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1655 -target->snumber, target_flags,
1656 -
source->snumber, source_flags);
1670 #ifdef TRGM_REGEXP_DEBUG
1671 fprintf(stderr,
" ... tentatively merging s%d into s%d\n",
1672 -target->snumber, -
source->snumber);
1674 target->tentParent =
source;
1675 source->tentFlags |= target_flags;
1690 foreach(cell, trgmInfo->
arcs)
1694 *target = arcInfo->
target;
1700 while (target->parent)
1701 target = target->parent;
1709 while ((ttarget = target->
tentParent) != NULL)
1711 target->tentParent = NULL;
1712 target->tentFlags = 0;
1720 #ifdef TRGM_REGEXP_DEBUG
1721 fprintf(stderr,
" ... not ok to merge\n");
1727 foreach(cell, trgmInfo->
arcs)
1731 *target = arcInfo->
target;
1735 while (target->parent)
1736 target = target->parent;
1739 #ifdef TRGM_REGEXP_DEBUG
1740 fprintf(stderr,
"merging s%d into s%d\n",
1741 -target->snumber, -
source->snumber);
1752 totalTrgmCount -= trgmInfo->
count;
1753 totalTrgmPenalty -= trgmInfo->
penalty;
1771 if (colorTrgms[
i].expanded)
1797 memset(blankChar.
bytes, 0,
sizeof(blankChar.
bytes));
1824 for (
j = 0;
j < 3;
j++)
1833 for (i1 = 0; i1 <
c[0]->wordCharsCount; i1++)
1835 s[0] =
c[0]->wordChars[i1];
1836 for (i2 = 0; i2 <
c[1]->wordCharsCount; i2++)
1838 s[1] =
c[1]->wordChars[i2];
1839 for (i3 = 0; i3 <
c[2]->wordCharsCount; i3++)
1841 s[2] =
c[2]->wordChars[i3];
1866 for (
i = 0;
i < 3;
i++)
1868 if (s[
i].bytes[0] != 0)
1871 *p++ = s[
i].bytes[
j];
1890 Assert(state1 != state2);
1923 if (penalty1 < penalty2)
1925 else if (penalty1 == penalty2)
1960 while (
state->parent)
1963 if (
state->snumber < 0)
1971 state->snumber = snumber;
1990 foreach(cell,
state->arcs)
2029 for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2037 arcsCount = (
p2 - arcs) + 1;
2040 arcsCount = arcIndex;
2072 for (
i = 0;
i < snumber;
i++)
2077 while (
j < arcsCount && arcs[
j].sourceState ==
i)
2132 #ifdef TRGM_REGEXP_DEBUG
2164 for (
i = 0;
i < arcsCount;
i++)
2167 state, arcs[
i].to, arcs[
i].co);
2181 for (
i = 0;
i < ncolors;
i++)
2187 if (
color->expandable)
2189 for (
j = 0;
j <
color->wordCharsCount;
j++)
2209 FILE *fp = fopen(
"/tmp/source.gv",
"w");
2222 printTrgmNFA(
TrgmNFA *trgmNFA)
2246 foreach(cell,
state->arcs)
2251 -
state->snumber, -
arc->target->snumber);
2252 printTrgmColor(&
buf,
arc->ctrgm.colors[0]);
2254 printTrgmColor(&
buf,
arc->ctrgm.colors[1]);
2256 printTrgmColor(&
buf,
arc->ctrgm.colors[2]);
2271 FILE *fp = fopen(
"/tmp/transformed.gv",
"w");
2319 for (
j = 0;
j <
state->arcsCount;
j++)
2324 i,
arc->targetState,
arc->colorTrgm);
2343 for (
j = 0;
j < count;
j++)
2362 FILE *fp = fopen(
"/tmp/packed.gv",
"w");
#define MemSet(start, val, len)
static void PGresult const char * p2
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
long hash_get_num_entries(HTAB *hashp)
void * hash_seq_search(HASH_SEQ_STATUS *status)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
static const FormData_pg_attribute a1
static const FormData_pg_attribute a2
#define CALCGTSIZE(flag, siglen)
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
void list_free(List *list)
List * list_concat(List *list1, const List *list2)
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
void pfree(void *pointer)
void * palloc0(Size size)
void * MemoryContextAllocZero(MemoryContext context, Size size)
MemoryContext CurrentMemoryContext
void * MemoryContextAlloc(MemoryContext context, Size size)
void MemoryContextDelete(MemoryContext context)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
static int list_length(const List *l)
#define foreach_delete_current(lst, cell)
static rewind_source * source
#define MAX_MULTIBYTE_CHAR_LEN
#define qsort(a, b, c, d)
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
int pg_reg_getnumcolors(const regex_t *regex)
int pg_reg_getnumstates(const regex_t *regex)
int pg_reg_getfinalstate(const regex_t *regex)
int pg_reg_getinitialstate(const regex_t *regex)
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
int pg_reg_colorisend(const regex_t *regex, int co)
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
int pg_reg_getnumcharacters(const regex_t *regex, int co)
int pg_reg_colorisbegin(const regex_t *regex, int co)
void pg_regfree(regex_t *re)
void appendStringInfo(StringInfo str, const char *fmt,...)
void appendStringInfoString(StringInfo str, const char *s)
void appendStringInfoChar(StringInfo str, char ch)
void initStringInfo(StringInfo str)
ColorTrgmInfo * colorTrgms
TrgmColorInfo * colorInfo
bool * colorTrigramsActive
struct TrgmState * parent
struct TrgmState * tentParent
char bytes[MAX_MULTIBYTE_CHAR_LEN]
void compact_trigram(trgm *tptr, char *str, int bytelen)
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
#define MAX_EXPANDED_STATES
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
#define MAX_EXPANDED_ARCS
struct TrgmState TrgmState
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
static void transformGraph(TrgmNFA *trgmNFA)
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
static const float4 penalties[8]
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
static int packArcInfoCmp(const void *a1, const void *a2)
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
#define COLOR_COUNT_LIMIT
#define WISH_TRGM_PENALTY
static void mergeStates(TrgmState *state1, TrgmState *state2)
static int colorTrgmInfoCmp(const void *p1, const void *p2)
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
char * lowerstr(const char *str)
#define SET_VARSIZE(PTR, len)
#define VARSIZE_ANY_EXHDR(PTR)
static char chars[TZ_MAX_CHARS]