218 #define MAX_EXPANDED_STATES 128 219 #define MAX_EXPANDED_ARCS 1024 220 #define MAX_TRGM_COUNT 256 221 #define WISH_TRGM_PENALTY 16 222 #define COLOR_COUNT_LIMIT 256 285 #define COLOR_UNKNOWN (-1) 286 #define COLOR_BLANK (-2) 328 #define TSTATE_INIT 0x01 329 #define TSTATE_FIN 0x02 481 int cflags,
Oid collation);
503 #ifdef TRGM_REGEXP_DEBUG 505 static void printTrgmNFA(
TrgmNFA *trgmNFA);
536 "createTrgmNFA temporary context",
581 trgmNFA.
regex = regex;
586 #ifdef TRGM_REGEXP_DEBUG 595 #ifdef TRGM_REGEXP_DEBUG 596 printTrgmNFA(&trgmNFA);
621 #ifdef TRGM_REGEXP_DEBUG 622 printTrgmPackedGraph(*graph, trg);
661 for (k = j; k < j + cnt; k++)
688 while (queueIn < queueOut)
695 for (i = 0; i < cnt; i++)
756 pg_regerror(regcomp_result, regex, errMsg,
sizeof(errMsg));
758 (
errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
759 errmsg(
"invalid regular expression: %s", errMsg)));
778 trgmNFA->
ncolors = colorsCount;
785 for (i = 0; i < colorsCount; i++)
815 for (j = 0; j < charsCount; j++)
850 memset(s, 0,
sizeof(s));
870 if (strcmp(lowerCased, s) != 0)
925 MemSet(&initkey, 0,
sizeof(initkey));
930 initstate =
getState(trgmNFA, &initkey);
979 addKey(trgmNFA, state, key);
1019 MemSet(&destKey, 0,
sizeof(destKey));
1067 for (i = 0; i < arcsCount; i++)
1192 MemSet(&destKey, 0,
sizeof(destKey));
1211 for (i = 0; i < arcsCount; i++)
1255 addArc(trgmNFA, state, key, arc->
co, &destKey);
1449 int64 totalTrgmCount;
1463 foreach(cell, state->
arcs)
1492 for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1524 totalTrgmPenalty = 0.0f;
1532 for (j = 0; j < 3; j++)
1542 trgmInfo->
count = count;
1543 totalTrgmCount += count;
1545 totalTrgmPenalty += trgmInfo->
penalty;
1566 bool canRemove =
true;
1573 #ifdef TRGM_REGEXP_DEBUG 1574 fprintf(stderr,
"considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1586 foreach(cell, trgmInfo->
arcs)
1590 *target = arcInfo->
target;
1594 #ifdef TRGM_REGEXP_DEBUG 1595 fprintf(stderr,
"examining arc to s%d (%x) from s%d (%x)\n",
1596 -target->snumber, target->flags,
1603 while (target->parent)
1606 #ifdef TRGM_REGEXP_DEBUG 1607 fprintf(stderr,
" ... after completed merges: to s%d (%x) from s%d (%x)\n",
1608 -target->snumber, target->flags,
1619 target_flags = target->flags | target->tentFlags;
1620 while (target->tentParent)
1622 target = target->tentParent;
1623 target_flags |= target->flags | target->tentFlags;
1626 #ifdef TRGM_REGEXP_DEBUG 1627 fprintf(stderr,
" ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1628 -target->snumber, target_flags,
1629 -source->
snumber, source_flags);
1641 if (source != target)
1643 #ifdef TRGM_REGEXP_DEBUG 1644 fprintf(stderr,
" ... tentatively merging s%d into s%d\n",
1645 -target->snumber, -source->
snumber);
1647 target->tentParent =
source;
1663 foreach(cell, trgmInfo->
arcs)
1667 *target = arcInfo->
target;
1673 while (target->parent)
1682 while ((ttarget = target->
tentParent) != NULL)
1693 #ifdef TRGM_REGEXP_DEBUG 1694 fprintf(stderr,
" ... not ok to merge\n");
1700 foreach(cell, trgmInfo->
arcs)
1704 *target = arcInfo->
target;
1708 while (target->parent)
1710 if (source != target)
1712 #ifdef TRGM_REGEXP_DEBUG 1713 fprintf(stderr,
"merging s%d into s%d\n",
1714 -target->snumber, -source->
snumber);
1725 totalTrgmCount -= trgmInfo->
count;
1726 totalTrgmPenalty -= trgmInfo->
penalty;
1744 if (colorTrgms[
i].expanded)
1770 memset(blankChar.
bytes, 0,
sizeof(blankChar.
bytes));
1797 for (j = 0; j < 3; j++)
1839 for (i = 0; i < 3; i++)
1841 if (s[i].
bytes[0] != 0)
1844 *p++ = s[i].
bytes[j];
1863 Assert(state1 != state2);
1896 if (penalty1 < penalty2)
1898 else if (penalty1 == penalty2)
1965 foreach(cell, state->
arcs)
1999 for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2007 arcsCount = (p2 -
arcs) + 1;
2044 while (j < arcsCount && arcs[j].sourceState == i)
2099 #ifdef TRGM_REGEXP_DEBUG 2116 for (state = 0; state < nstates; state++)
2131 for (i = 0; i < arcsCount; i++)
2134 state, arcs[i].to, arcs[i].co);
2148 for (i = 0; i < ncolors; i++)
2176 FILE *fp = fopen(
"/tmp/source.gv",
"w");
2189 printTrgmNFA(
TrgmNFA *trgmNFA)
2213 foreach(cell, state->
arcs)
2238 FILE *fp = fopen(
"/tmp/transformed.gv",
"w");
2310 for (j = 0; j < count; j++)
2329 FILE *fp = fopen(
"/tmp/packed.gv",
"w");
#define MAX_EXPANDED_ARCS
static void mergeStates(TrgmState *state1, TrgmState *state2)
bool * colorTrigramsActive
int pg_reg_getnumstates(const regex_t *regex)
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
struct TrgmState TrgmState
void MemoryContextDelete(MemoryContext context)
#define AllocSetContextCreate
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
struct TrgmState * tentParent
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
int pg_reg_getfinalstate(const regex_t *regex)
TrgmColorInfo * colorInfo
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
int pg_reg_getnumcharacters(const regex_t *regex, int co)
def bytes(source, encoding='ascii', errors='strict')
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
#define WISH_TRGM_PENALTY
List * list_concat(List *list1, const List *list2)
int errcode(int sqlerrcode)
#define MemSet(start, val, len)
long hash_get_num_entries(HTAB *hashp)
int pg_reg_colorisbegin(const regex_t *regex, int co)
char * lowerstr(const char *str)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
int pg_reg_getnumcolors(const regex_t *regex)
#define CALCGTSIZE(flag, siglen)
#define foreach_delete_current(lst, cell)
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
static int packArcInfoCmp(const void *a1, const void *a2)
static const FormData_pg_attribute a2
void pfree(void *pointer)
void appendStringInfo(StringInfo str, const char *fmt,...)
int pg_reg_getinitialstate(const regex_t *regex)
#define ALLOCSET_DEFAULT_SIZES
void appendStringInfoString(StringInfo str, const char *s)
ColorTrgmInfo * colorTrgms
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
MemoryContext CurrentMemoryContext
#define COLOR_COUNT_LIMIT
List * lappend(List *list, void *datum)
#define MAX_MULTIBYTE_CHAR_LEN
void appendStringInfoChar(StringInfo str, char ch)
void initStringInfo(StringInfo str)
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
void * palloc0(Size size)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
int pg_reg_colorisend(const regex_t *regex, int co)
void * MemoryContextAllocZero(MemoryContext context, Size size)
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
void compact_trigram(trgm *tptr, char *str, int bytelen)
#define ereport(elevel,...)
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
#define Assert(condition)
char bytes[MAX_MULTIBYTE_CHAR_LEN]
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
static rewind_source * source
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
static int list_length(const List *l)
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
void * hash_seq_search(HASH_SEQ_STATUS *status)
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
static const float4 penalties[8]
#define MAX_EXPANDED_STATES
static int colorTrgmInfoCmp(const void *p1, const void *p2)
struct TrgmState * parent
static void transformGraph(TrgmNFA *trgmNFA)
#define VARSIZE_ANY_EXHDR(PTR)
int errmsg(const char *fmt,...)
void * MemoryContextAlloc(MemoryContext context, Size size)
#define SET_VARSIZE(PTR, len)
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
static const FormData_pg_attribute a1
#define qsort(a, b, c, d)
void pg_regfree(regex_t *re)
static char chars[TZ_MAX_CHARS]
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
List * list_delete_first(List *list)