PostgreSQL Source Code git master
trgm_regexp.c File Reference
#include "postgres.h"
#include "catalog/pg_collation_d.h"
#include "regex/regexport.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
#include "utils/formatting.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "varatt.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
 
struct  TrgmColorInfo
 
struct  TrgmPrefix
 
struct  ColorTrgm
 
struct  TrgmStateKey
 
struct  TrgmState
 
struct  TrgmArc
 
struct  TrgmArcInfo
 
struct  ColorTrgmInfo
 
struct  TrgmNFA
 
struct  TrgmPackedArc
 
struct  TrgmPackedState
 
struct  TrgmPackedGraph
 
struct  TrgmPackArcInfo
 

Macros

#define MAX_EXPANDED_STATES   128
 
#define MAX_EXPANDED_ARCS   1024
 
#define MAX_TRGM_COUNT   256
 
#define WISH_TRGM_PENALTY   16
 
#define COLOR_COUNT_LIMIT   256
 
#define COLOR_UNKNOWN   (-3)
 
#define COLOR_BLANK   (-4)
 
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */
 
#define TSTATE_FIN   0x02 /* flag indicating this state is final */
 

Typedefs

typedef int TrgmColor
 
typedef struct TrgmState TrgmState
 

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
 
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
 
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
 
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
 
static void transformGraph (TrgmNFA *trgmNFA)
 
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
 
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
 
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
 
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
 
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
 
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
 
static void mergeStates (TrgmState *state1, TrgmState *state2)
 
static int colorTrgmInfoCmp (const void *p1, const void *p2)
 
static int colorTrgmInfoPenaltyCmp (const void *p1, const void *p2)
 
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static int packArcInfoCmp (const void *a1, const void *a2)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

static const float4 penalties [8]
 

Macro Definition Documentation

◆ COLOR_BLANK

#define COLOR_BLANK   (-4)

Definition at line 289 of file trgm_regexp.c.

◆ COLOR_COUNT_LIMIT

#define COLOR_COUNT_LIMIT   256

Definition at line 225 of file trgm_regexp.c.

◆ COLOR_UNKNOWN

#define COLOR_UNKNOWN   (-3)

Definition at line 288 of file trgm_regexp.c.

◆ MAX_EXPANDED_ARCS

#define MAX_EXPANDED_ARCS   1024

Definition at line 222 of file trgm_regexp.c.

◆ MAX_EXPANDED_STATES

#define MAX_EXPANDED_STATES   128

Definition at line 221 of file trgm_regexp.c.

◆ MAX_TRGM_COUNT

#define MAX_TRGM_COUNT   256

Definition at line 223 of file trgm_regexp.c.

◆ TSTATE_FIN

#define TSTATE_FIN   0x02 /* flag indicating this state is final */

Definition at line 332 of file trgm_regexp.c.

◆ TSTATE_INIT

#define TSTATE_INIT   0x01 /* flag indicating this state is initial */

Definition at line 331 of file trgm_regexp.c.

◆ WISH_TRGM_PENALTY

#define WISH_TRGM_PENALTY   16

Definition at line 224 of file trgm_regexp.c.

Typedef Documentation

◆ TrgmColor

typedef int TrgmColor

Definition at line 285 of file trgm_regexp.c.

◆ TrgmState

typedef struct TrgmState TrgmState

Function Documentation

◆ addArc()

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
)
static

Definition at line 1288 of file trgm_regexp.c.

1290{
1291 TrgmArc *arc;
1292 ListCell *cell;
1293
1294 /* Do nothing if this wouldn't be a valid arc label trigram */
1295 if (!validArcLabel(key, co))
1296 return;
1297
1298 /*
1299 * Check if we are going to reach key which is covered by a key which is
1300 * already listed in this state. If so arc is useless: the NFA can bypass
1301 * it through a path that doesn't require any predictable trigram, so
1302 * whether the arc's trigram is present or not doesn't really matter.
1303 */
1304 foreach(cell, state->enterKeys)
1305 {
1306 TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1307
1308 if (existingKey->nstate == destKey->nstate &&
1309 prefixContains(&existingKey->prefix, &destKey->prefix))
1310 return;
1311 }
1312
1313 /* Checks were successful, add new arc */
1314 arc = (TrgmArc *) palloc(sizeof(TrgmArc));
1315 arc->target = getState(trgmNFA, destKey);
1316 arc->ctrgm.colors[0] = key->prefix.colors[0];
1317 arc->ctrgm.colors[1] = key->prefix.colors[1];
1318 arc->ctrgm.colors[2] = co;
1319
1320 state->arcs = lappend(state->arcs, arc);
1321 trgmNFA->arcsCount++;
1322}
List * lappend(List *list, void *datum)
Definition: list.c:339
void * palloc(Size size)
Definition: mcxt.c:1317
#define lfirst(lc)
Definition: pg_list.h:172
int arcsCount
Definition: trgm_regexp.c:417
TrgmPrefix prefix
Definition: trgm_regexp.c:312
Definition: regguts.h:296
Definition: regguts.h:323
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1330
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1385
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1416

References TrgmNFA::arcsCount, getState(), sort-test::key, lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), and validArcLabel().

Referenced by addArcs().

◆ addArcs()

static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 1190 of file trgm_regexp.c.

1191{
1192 TrgmStateKey destKey;
1193 ListCell *cell;
1194 regex_arc_t *arcs;
1195 int arcsCount,
1196 i;
1197
1198 /*
1199 * Ensure any pad bytes in destKey are zero, since it may get used as a
1200 * hashtable key by getState.
1201 */
1202 MemSet(&destKey, 0, sizeof(destKey));
1203
1204 /*
1205 * Iterate over enter keys associated with this expanded-graph state. This
1206 * includes both the state's own stateKey, and any enter keys we added to
1207 * it during addKey (which represent expanded-graph states that are not
1208 * distinguishable from this one by means of trigrams). For each such
1209 * enter key, examine all the out-arcs of the key's underlying NFA state,
1210 * and try to make a trigram arc leading to where the out-arc leads.
1211 * (addArc will deal with whether the arc is valid or not.)
1212 */
1213 foreach(cell, state->enterKeys)
1214 {
1215 TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
1216
1217 arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1218 arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1219 pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1220
1221 for (i = 0; i < arcsCount; i++)
1222 {
1223 regex_arc_t *arc = &arcs[i];
1224 TrgmColorInfo *colorInfo;
1225
1226 /*
1227 * Ignore non-expandable colors; addKey already handled the case.
1228 *
1229 * We need no special check for WHITE or begin/end pseudocolors
1230 * here. We don't need to do any processing for them, and they
1231 * will be marked non-expandable since the regex engine will have
1232 * reported them that way. We do have to watch out for RAINBOW,
1233 * which has a negative color number.
1234 */
1235 if (arc->co < 0)
1236 continue;
1237 Assert(arc->co < trgmNFA->ncolors);
1238
1239 colorInfo = &trgmNFA->colorInfo[arc->co];
1240 if (!colorInfo->expandable)
1241 continue;
1242
1243 if (colorInfo->containsNonWord)
1244 {
1245 /*
1246 * Color includes non-word character(s).
1247 *
1248 * Generate an arc, treating this transition as occurring on
1249 * BLANK. This allows word-ending trigrams to be manufactured
1250 * if possible.
1251 */
1252 destKey.prefix.colors[0] = key->prefix.colors[1];
1253 destKey.prefix.colors[1] = COLOR_BLANK;
1254 destKey.nstate = arc->to;
1255
1256 addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
1257 }
1258
1259 if (colorInfo->wordCharsCount > 0)
1260 {
1261 /*
1262 * Color includes word character(s).
1263 *
1264 * Generate an arc. Color is pushed into prefix of target
1265 * state.
1266 */
1267 destKey.prefix.colors[0] = key->prefix.colors[1];
1268 destKey.prefix.colors[1] = arc->co;
1269 destKey.nstate = arc->to;
1270
1271 addArc(trgmNFA, state, key, arc->co, &destKey);
1272 }
1273 }
1274
1275 pfree(arcs);
1276 }
1277}
#define Assert(condition)
Definition: c.h:815
#define MemSet(start, val, len)
Definition: c.h:977
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
bool containsNonWord
Definition: trgm_regexp.c:264
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:406
int ncolors
Definition: trgm_regexp.c:407
regex_t * regex
Definition: trgm_regexp.c:405
TrgmColor colors[2]
Definition: trgm_regexp.c:293
color co
Definition: regguts.h:298
struct state * to
Definition: regguts.h:300
#define COLOR_BLANK
Definition: trgm_regexp.c:289
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
Definition: trgm_regexp.c:1288

References addArc(), Assert, arc::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmColorInfo::expandable, i, sort-test::key, lfirst, MemSet, TrgmNFA::ncolors, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, arc::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

◆ addKey()

static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
)
static

Definition at line 1009 of file trgm_regexp.c.

1010{
1011 regex_arc_t *arcs;
1012 TrgmStateKey destKey;
1013 ListCell *cell;
1014 int i,
1015 arcsCount;
1016
1017 /*
1018 * Ensure any pad bytes in destKey are zero, since it may get used as a
1019 * hashtable key by getState.
1020 */
1021 MemSet(&destKey, 0, sizeof(destKey));
1022
1023 /*
1024 * Compare key to each existing enter key of the state to check for
1025 * redundancy. We can drop either old key(s) or the new key if we find
1026 * redundancy.
1027 */
1028 foreach(cell, state->enterKeys)
1029 {
1030 TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1031
1032 if (existingKey->nstate == key->nstate)
1033 {
1034 if (prefixContains(&existingKey->prefix, &key->prefix))
1035 {
1036 /* This old key already covers the new key. Nothing to do */
1037 return;
1038 }
1039 if (prefixContains(&key->prefix, &existingKey->prefix))
1040 {
1041 /*
1042 * The new key covers this old key. Remove the old key, it's
1043 * no longer needed once we add this key to the list.
1044 */
1045 state->enterKeys = foreach_delete_current(state->enterKeys,
1046 cell);
1047 }
1048 }
1049 }
1050
1051 /* No redundancy, so add this key to the state's list */
1052 state->enterKeys = lappend(state->enterKeys, key);
1053
1054 /* If state is now known final, mark it and we're done */
1055 if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
1056 {
1057 state->flags |= TSTATE_FIN;
1058 return;
1059 }
1060
1061 /*
1062 * Loop through all outgoing arcs of the corresponding state in the
1063 * original NFA.
1064 */
1065 arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1066 arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1067 pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1068
1069 for (i = 0; i < arcsCount; i++)
1070 {
1071 regex_arc_t *arc = &arcs[i];
1072
1073 if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
1074 {
1075 /*
1076 * Start of line/string (^). Trigram extraction treats start of
1077 * line same as start of word: double space prefix is added.
1078 * Hence, make an enter key showing we can reach the arc
1079 * destination with all-blank prefix.
1080 */
1081 destKey.prefix.colors[0] = COLOR_BLANK;
1082 destKey.prefix.colors[1] = COLOR_BLANK;
1083 destKey.nstate = arc->to;
1084
1085 /* Add enter key to this state */
1086 addKeyToQueue(trgmNFA, &destKey);
1087 }
1088 else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
1089 {
1090 /*
1091 * End of line/string ($). We must consider this arc as a
1092 * transition that doesn't read anything. The reason for adding
1093 * this enter key to the state is that if the arc leads to the
1094 * NFA's final state, we must mark this expanded state as final.
1095 */
1096 destKey.prefix.colors[0] = COLOR_UNKNOWN;
1097 destKey.prefix.colors[1] = COLOR_UNKNOWN;
1098 destKey.nstate = arc->to;
1099
1100 /* Add enter key to this state */
1101 addKeyToQueue(trgmNFA, &destKey);
1102 }
1103 else if (arc->co >= 0)
1104 {
1105 /* Regular color (including WHITE) */
1106 TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1107
1108 if (colorInfo->expandable)
1109 {
1110 if (colorInfo->containsNonWord &&
1112 {
1113 /*
1114 * We can reach the arc destination after reading a
1115 * non-word character, but the prefix is not something
1116 * that addArc will accept with COLOR_BLANK, so no trigram
1117 * arc can get made for this transition. We must make an
1118 * enter key to show that the arc destination is
1119 * reachable. Set it up with an all-blank prefix, since
1120 * that corresponds to what the trigram extraction code
1121 * will do at a word starting boundary.
1122 */
1123 destKey.prefix.colors[0] = COLOR_BLANK;
1124 destKey.prefix.colors[1] = COLOR_BLANK;
1125 destKey.nstate = arc->to;
1126 addKeyToQueue(trgmNFA, &destKey);
1127 }
1128
1129 if (colorInfo->wordCharsCount > 0 &&
1130 !validArcLabel(key, arc->co))
1131 {
1132 /*
1133 * We can reach the arc destination after reading a word
1134 * character, but the prefix is not something that addArc
1135 * will accept, so no trigram arc can get made for this
1136 * transition. We must make an enter key to show that the
1137 * arc destination is reachable. The prefix for the enter
1138 * key should reflect the info we have for this arc.
1139 */
1140 destKey.prefix.colors[0] = key->prefix.colors[1];
1141 destKey.prefix.colors[1] = arc->co;
1142 destKey.nstate = arc->to;
1143 addKeyToQueue(trgmNFA, &destKey);
1144 }
1145 }
1146 else
1147 {
1148 /*
1149 * Unexpandable color. Add enter key with ambiguous prefix,
1150 * showing we can reach the destination from this state, but
1151 * the preceding colors will be uncertain. (We do not set the
1152 * first prefix color to key->prefix.colors[1], because a
1153 * prefix of known followed by unknown is invalid.)
1154 */
1155 destKey.prefix.colors[0] = COLOR_UNKNOWN;
1156 destKey.prefix.colors[1] = COLOR_UNKNOWN;
1157 destKey.nstate = arc->to;
1158 addKeyToQueue(trgmNFA, &destKey);
1159 }
1160 }
1161 else
1162 {
1163 /* RAINBOW: treat as unexpandable color */
1164 destKey.prefix.colors[0] = COLOR_UNKNOWN;
1165 destKey.prefix.colors[1] = COLOR_UNKNOWN;
1166 destKey.nstate = arc->to;
1167 addKeyToQueue(trgmNFA, &destKey);
1168 }
1169 }
1170
1171 pfree(arcs);
1172}
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1178
#define TSTATE_FIN
Definition: trgm_regexp.c:332
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:288

References addKeyToQueue(), arc::co, COLOR_BLANK, COLOR_UNKNOWN, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmColorInfo::expandable, foreach_delete_current, i, sort-test::key, lappend(), lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, arc::to, TSTATE_FIN, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

◆ addKeyToQueue()

static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1178 of file trgm_regexp.c.

1179{
1180 TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
1181
1182 memcpy(keyCopy, key, sizeof(TrgmStateKey));
1183 trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
1184}
List * keysQueue
Definition: trgm_regexp.c:416

References sort-test::key, TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

◆ colorTrgmInfoCmp()

static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1894 of file trgm_regexp.c.

1895{
1896 const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
1897 const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
1898
1899 return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
1900}
ColorTrgm ctrgm
Definition: trgm_regexp.c:377

References ColorTrgmInfo::ctrgm, and p2.

Referenced by packGraph(), and selectColorTrigrams().

◆ colorTrgmInfoPenaltyCmp()

static int colorTrgmInfoPenaltyCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1907 of file trgm_regexp.c.

1908{
1909 float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
1910 float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
1911
1912 if (penalty1 < penalty2)
1913 return 1;
1914 else if (penalty1 == penalty2)
1915 return 0;
1916 else
1917 return -1;
1918}
float float4
Definition: c.h:586

References p2.

Referenced by selectColorTrigrams().

◆ convertPgWchar()

static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
)
static

Definition at line 829 of file trgm_regexp.c.

830{
831 /* "s" has enough space for a multibyte character and a trailing NUL */
832 char s[MAX_MULTIBYTE_CHAR_LEN + 1];
833
834 /*
835 * We can ignore the NUL character, since it can never appear in a PG text
836 * string. This avoids the need for various special cases when
837 * reconstructing trigrams.
838 */
839 if (c == 0)
840 return false;
841
842 /* Do the conversion, making sure the result is NUL-terminated */
843 memset(s, 0, sizeof(s));
844 pg_wchar2mb_with_len(&c, s, 1);
845
846 /*
847 * In IGNORECASE mode, we can ignore uppercase characters. We assume that
848 * the regex engine generated both uppercase and lowercase equivalents
849 * within each color, since we used the REG_ICASE option; so there's no
850 * need to process the uppercase version.
851 *
852 * XXX this code is dependent on the assumption that str_tolower() works
853 * the same as the regex engine's internal case folding machinery. Might
854 * be wiser to expose pg_wc_tolower and test whether c ==
855 * pg_wc_tolower(c). On the other hand, the trigrams in the index were
856 * created using str_tolower(), so we're probably screwed if there's any
857 * incompatibility anyway.
858 */
859#ifdef IGNORECASE
860 {
861 char *lowerCased = str_tolower(s, strlen(s), DEFAULT_COLLATION_OID);
862
863 if (strcmp(lowerCased, s) != 0)
864 {
865 pfree(lowerCased);
866 return false;
867 }
868 pfree(lowerCased);
869 }
870#endif
871
872 /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
873 memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
874 return true;
875}
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1591
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:1008
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:33
char * c
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:245

References trgm_mb_char::bytes, MAX_MULTIBYTE_CHAR_LEN, pfree(), pg_wchar2mb_with_len(), and str_tolower().

Referenced by getColorInfo().

◆ createTrgmNFA()

TRGM * createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 524 of file trgm_regexp.c.

526{
527 TRGM *trg;
528 regex_t regex;
529 MemoryContext tmpcontext;
530 MemoryContext oldcontext;
531
532 /*
533 * This processing generates a great deal of cruft, which we'd like to
534 * clean up before returning (since this function may be called in a
535 * query-lifespan memory context). Make a temp context we can work in so
536 * that cleanup is easy.
537 */
539 "createTrgmNFA temporary context",
541 oldcontext = MemoryContextSwitchTo(tmpcontext);
542
543 /*
544 * Stage 1: Compile the regexp into a NFA, using the regexp library.
545 */
546#ifdef IGNORECASE
547 RE_compile(&regex, text_re,
548 REG_ADVANCED | REG_NOSUB | REG_ICASE, collation);
549#else
550 RE_compile(&regex, text_re,
551 REG_ADVANCED | REG_NOSUB, collation);
552#endif
553
554 trg = createTrgmNFAInternal(&regex, graph, rcontext);
555
556 /* Clean up all the cruft we created (including regex) */
557 MemoryContextSwitchTo(oldcontext);
558 MemoryContextDelete(tmpcontext);
559
560 return trg;
561}
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define REG_ICASE
Definition: regex.h:184
#define REG_ADVANCED
Definition: regex.h:181
#define REG_NOSUB
Definition: regex.h:185
#define regex_t
Definition: regex.h:245
Definition: trgm.h:61
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:567
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:721

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), RE_compile(), REG_ADVANCED, REG_ICASE, REG_NOSUB, and regex_t.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

◆ createTrgmNFAInternal()

static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)
static

Definition at line 567 of file trgm_regexp.c.

569{
570 TRGM *trg;
571 TrgmNFA trgmNFA;
572
573 trgmNFA.regex = regex;
574
575 /* Collect color information from the regex */
576 getColorInfo(regex, &trgmNFA);
577
578#ifdef TRGM_REGEXP_DEBUG
579 printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
580#endif
581
582 /*
583 * Stage 2: Create an expanded graph from the source NFA.
584 */
585 transformGraph(&trgmNFA);
586
587#ifdef TRGM_REGEXP_DEBUG
588 printTrgmNFA(&trgmNFA);
589#endif
590
591 /*
592 * Fail if we were unable to make a nontrivial graph, ie it is possible to
593 * get from the initial state to the final state without reading any
594 * predictable trigram.
595 */
596 if (trgmNFA.initState->flags & TSTATE_FIN)
597 return NULL;
598
599 /*
600 * Stage 3: Select color trigrams to expand. Fail if too many trigrams.
601 */
602 if (!selectColorTrigrams(&trgmNFA))
603 return NULL;
604
605 /*
606 * Stage 4: Expand color trigrams and pack graph into final
607 * representation.
608 */
609 trg = expandColorTrigrams(&trgmNFA, rcontext);
610
611 *graph = packGraph(&trgmNFA, rcontext);
612
613#ifdef TRGM_REGEXP_DEBUG
614 printTrgmPackedGraph(*graph, trg);
615#endif
616
617 return trg;
618}
TrgmState * initState
Definition: trgm_regexp.c:411
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:765
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:1458
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1777
static void transformGraph(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:895
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1932

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::flags, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, packGraph(), TrgmNFA::regex, selectColorTrigrams(), transformGraph(), and TSTATE_FIN.

Referenced by createTrgmNFA().

◆ expandColorTrigrams()

static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1777 of file trgm_regexp.c.

1778{
1779 TRGM *trg;
1780 trgm *p;
1781 int i;
1782 TrgmColorInfo blankColor;
1783 trgm_mb_char blankChar;
1784
1785 /* Set up "blank" color structure containing a single zero character */
1786 memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
1787 blankColor.wordCharsCount = 1;
1788 blankColor.wordChars = &blankChar;
1789
1790 /* Construct the trgm array */
1791 trg = (TRGM *)
1792 MemoryContextAllocZero(rcontext,
1793 TRGMHDRSIZE +
1794 trgmNFA->totalTrgmCount * sizeof(trgm));
1795 trg->flag = ARRKEY;
1797 p = GETARR(trg);
1798 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1799 {
1800 ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
1801 TrgmColorInfo *c[3];
1802 trgm_mb_char s[3];
1803 int j,
1804 i1,
1805 i2,
1806 i3;
1807
1808 /* Ignore any unexpanded trigrams ... */
1809 if (!colorTrgm->expanded)
1810 continue;
1811
1812 /* Get colors, substituting the dummy struct for COLOR_BLANK */
1813 for (j = 0; j < 3; j++)
1814 {
1815 if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
1816 c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
1817 else
1818 c[j] = &blankColor;
1819 }
1820
1821 /* Iterate over all possible combinations of colors' characters */
1822 for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
1823 {
1824 s[0] = c[0]->wordChars[i1];
1825 for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
1826 {
1827 s[1] = c[1]->wordChars[i2];
1828 for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
1829 {
1830 s[2] = c[2]->wordChars[i3];
1831 fillTrgm(p, s);
1832 p++;
1833 }
1834 }
1835 }
1836 }
1837
1838 return trg;
1839}
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:60
int j
Definition: isn.c:73
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1215
TrgmColor colors[3]
Definition: trgm_regexp.c:302
uint8 flag
Definition: trgm.h:63
trgm_mb_char * wordChars
Definition: trgm_regexp.c:266
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:421
int colorTrgmsCount
Definition: trgm_regexp.c:422
int totalTrgmCount
Definition: trgm_regexp.c:423
char trgm[3]
Definition: trgm.h:41
#define GETARR(x)
Definition: trgm.h:100
#define ARRKEY
Definition: trgm.h:90
#define TRGMHDRSIZE
Definition: trgm.h:67
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
Definition: trgm_regexp.c:1845
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, j, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ fillTrgm()

static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
)
static

Definition at line 1845 of file trgm_regexp.c.

1846{
1847 char str[3 * MAX_MULTIBYTE_CHAR_LEN],
1848 *p;
1849 int i,
1850 j;
1851
1852 /* Write multibyte string into "str" (we don't need null termination) */
1853 p = str;
1854
1855 for (i = 0; i < 3; i++)
1856 {
1857 if (s[i].bytes[0] != 0)
1858 {
1859 for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
1860 *p++ = s[i].bytes[j];
1861 }
1862 else
1863 {
1864 /* Emit a space in place of COLOR_BLANK */
1865 *p++ = ' ';
1866 }
1867 }
1868
1869 /* Convert "str" to a standard trigram (possibly hashing it) */
1870 compact_trigram(ptrgm, str, p - str);
1871}
const char * str
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:201

References trgm_mb_char::bytes, compact_trigram(), i, j, MAX_MULTIBYTE_CHAR_LEN, and str.

Referenced by expandColorTrigrams().

◆ getColorInfo()

static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
)
static

Definition at line 765 of file trgm_regexp.c.

766{
767 int colorsCount = pg_reg_getnumcolors(regex);
768 int i;
769
770 trgmNFA->ncolors = colorsCount;
771 trgmNFA->colorInfo = (TrgmColorInfo *)
772 palloc0(colorsCount * sizeof(TrgmColorInfo));
773
774 /*
775 * Loop over colors, filling TrgmColorInfo about each. Note we include
776 * WHITE (0) even though we know it'll be reported as non-expandable.
777 */
778 for (i = 0; i < colorsCount; i++)
779 {
780 TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
781 int charsCount = pg_reg_getnumcharacters(regex, i);
783 int j;
784
785 if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
786 {
787 /* Non expandable, or too large to work with */
788 colorInfo->expandable = false;
789 continue;
790 }
791
792 colorInfo->expandable = true;
793 colorInfo->containsNonWord = false;
794 colorInfo->wordChars = (trgm_mb_char *)
795 palloc(sizeof(trgm_mb_char) * charsCount);
796 colorInfo->wordCharsCount = 0;
797
798 /* Extract all the chars in this color */
799 chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
800 pg_reg_getcharacters(regex, i, chars, charsCount);
801
802 /*
803 * Convert characters back to multibyte form, and save only those that
804 * are word characters. Set "containsNonWord" if any non-word
805 * character. (Note: it'd probably be nicer to keep the chars in
806 * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
807 */
808 for (j = 0; j < charsCount; j++)
809 {
811
812 if (!convertPgWchar(chars[j], &c))
813 continue; /* ok to ignore it altogether */
814 if (ISWORDCHR(c.bytes))
815 colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
816 else
817 colorInfo->containsNonWord = true;
818 }
819
820 pfree(chars);
821 }
822}
unsigned int pg_wchar
Definition: mbprint.c:31
void * palloc0(Size size)
Definition: mcxt.c:1347
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:266
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:230
#define ISWORDCHR(c)
Definition: trgm.h:53
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
Definition: trgm_regexp.c:829
#define COLOR_COUNT_LIMIT
Definition: trgm_regexp.c:225
static char chars[TZ_MAX_CHARS]
Definition: zic.c:401

References chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, j, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ getState()

static TrgmState * getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1385 of file trgm_regexp.c.

1386{
1388 bool found;
1389
1391 &found);
1392 if (!found)
1393 {
1394 /* New state: initialize and queue it */
1395 state->arcs = NIL;
1396 state->enterKeys = NIL;
1397 state->flags = 0;
1398 /* states are initially given negative numbers */
1399 state->snumber = -(++trgmNFA->nstates);
1400 state->parent = NULL;
1401 state->tentFlags = 0;
1402 state->tentParent = NULL;
1403
1404 trgmNFA->queue = lappend(trgmNFA->queue, state);
1405 }
1406 return state;
1407}
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_ENTER
Definition: hsearch.h:114
#define NIL
Definition: pg_list.h:68
int nstates
Definition: trgm_regexp.c:412
List * queue
Definition: trgm_regexp.c:415
HTAB * states
Definition: trgm_regexp.c:410

References HASH_ENTER, hash_search(), sort-test::key, lappend(), NIL, TrgmNFA::nstates, TrgmNFA::queue, and TrgmNFA::states.

Referenced by addArc(), and transformGraph().

◆ mergeStates()

static void mergeStates ( TrgmState state1,
TrgmState state2 
)
static

Definition at line 1877 of file trgm_regexp.c.

1878{
1879 Assert(state1 != state2);
1880 Assert(!state1->parent);
1881 Assert(!state2->parent);
1882
1883 /* state1 absorbs state2's flags */
1884 state1->flags |= state2->flags;
1885
1886 /* state2, and indirectly all its children, become children of state1 */
1887 state2->parent = state1;
1888}
struct TrgmState * parent
Definition: trgm_regexp.c:341

References Assert, TrgmState::flags, and TrgmState::parent.

Referenced by selectColorTrigrams().

◆ packArcInfoCmp()

static int packArcInfoCmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 2093 of file trgm_regexp.c.

2094{
2095 const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
2096 const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
2097
2098 if (p1->sourceState < p2->sourceState)
2099 return -1;
2100 if (p1->sourceState > p2->sourceState)
2101 return 1;
2102 if (p1->colorTrgm < p2->colorTrgm)
2103 return -1;
2104 if (p1->colorTrgm > p2->colorTrgm)
2105 return 1;
2106 if (p1->targetState < p2->targetState)
2107 return -1;
2108 if (p1->targetState > p2->targetState)
2109 return 1;
2110 return 0;
2111}
static const FormData_pg_attribute a1
Definition: heap.c:143
static const FormData_pg_attribute a2
Definition: heap.c:156

References a1, a2, TrgmPackArcInfo::colorTrgm, p2, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.

Referenced by packGraph().

◆ packGraph()

static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1932 of file trgm_regexp.c.

1933{
1934 int snumber = 2,
1935 arcIndex,
1936 arcsCount;
1937 HASH_SEQ_STATUS scan_status;
1939 TrgmPackArcInfo *arcs;
1940 TrgmPackedArc *packedArcs;
1941 TrgmPackedGraph *result;
1942 int i,
1943 j;
1944
1945 /* Enumerate surviving states, giving init and fin reserved numbers */
1946 hash_seq_init(&scan_status, trgmNFA->states);
1947 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1948 {
1949 while (state->parent)
1950 state = state->parent;
1951
1952 if (state->snumber < 0)
1953 {
1954 if (state->flags & TSTATE_INIT)
1955 state->snumber = 0;
1956 else if (state->flags & TSTATE_FIN)
1957 state->snumber = 1;
1958 else
1959 {
1960 state->snumber = snumber;
1961 snumber++;
1962 }
1963 }
1964 }
1965
1966 /* Collect array of all arcs */
1967 arcs = (TrgmPackArcInfo *)
1968 palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
1969 arcIndex = 0;
1970 hash_seq_init(&scan_status, trgmNFA->states);
1971 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1972 {
1974 ListCell *cell;
1975
1976 while (source->parent)
1977 source = source->parent;
1978
1979 foreach(cell, state->arcs)
1980 {
1981 TrgmArc *arc = (TrgmArc *) lfirst(cell);
1982 TrgmState *target = arc->target;
1983
1984 while (target->parent)
1985 target = target->parent;
1986
1987 if (source->snumber != target->snumber)
1988 {
1989 ColorTrgmInfo *ctrgm;
1990
1991 ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
1992 trgmNFA->colorTrgms,
1993 trgmNFA->colorTrgmsCount,
1994 sizeof(ColorTrgmInfo),
1996 Assert(ctrgm != NULL);
1997 Assert(ctrgm->expanded);
1998
1999 arcs[arcIndex].sourceState = source->snumber;
2000 arcs[arcIndex].targetState = target->snumber;
2001 arcs[arcIndex].colorTrgm = ctrgm->cnumber;
2002 arcIndex++;
2003 }
2004 }
2005 }
2006
2007 /* Sort arcs to ease duplicate detection */
2008 qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
2009
2010 /* We could have duplicates because states were merged. Remove them. */
2011 if (arcIndex > 1)
2012 {
2013 /* p1 is probe point, p2 is last known non-duplicate. */
2014 TrgmPackArcInfo *p1,
2015 *p2;
2016
2017 p2 = arcs;
2018 for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2019 {
2020 if (packArcInfoCmp(p1, p2) > 0)
2021 {
2022 p2++;
2023 *p2 = *p1;
2024 }
2025 }
2026 arcsCount = (p2 - arcs) + 1;
2027 }
2028 else
2029 arcsCount = arcIndex;
2030
2031 /* Create packed representation */
2032 result = (TrgmPackedGraph *)
2033 MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
2034
2035 /* Pack color trigrams information */
2036 result->colorTrigramsCount = 0;
2037 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2038 {
2039 if (trgmNFA->colorTrgms[i].expanded)
2040 result->colorTrigramsCount++;
2041 }
2042 result->colorTrigramGroups = (int *)
2043 MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
2044 j = 0;
2045 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2046 {
2047 if (trgmNFA->colorTrgms[i].expanded)
2048 {
2049 result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
2050 j++;
2051 }
2052 }
2053
2054 /* Pack states and arcs information */
2055 result->statesCount = snumber;
2056 result->states = (TrgmPackedState *)
2057 MemoryContextAlloc(rcontext, snumber * sizeof(TrgmPackedState));
2058 packedArcs = (TrgmPackedArc *)
2059 MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
2060 j = 0;
2061 for (i = 0; i < snumber; i++)
2062 {
2063 int cnt = 0;
2064
2065 result->states[i].arcs = &packedArcs[j];
2066 while (j < arcsCount && arcs[j].sourceState == i)
2067 {
2068 packedArcs[j].targetState = arcs[j].targetState;
2069 packedArcs[j].colorTrgm = arcs[j].colorTrgm;
2070 cnt++;
2071 j++;
2072 }
2073 result->states[i].arcsCount = cnt;
2074 }
2075
2076 /* Allocate working memory for trigramsMatchGraph() */
2077 result->colorTrigramsActive = (bool *)
2078 MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
2079 result->statesActive = (bool *)
2080 MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
2081 result->statesQueue = (int *)
2082 MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
2083
2084 return result;
2085}
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
static rewind_source * source
Definition: pg_rewind.c:89
#define qsort(a, b, c, d)
Definition: port.h:474
TrgmPackedState * states
Definition: trgm_regexp.c:461
bool * statesActive
Definition: trgm_regexp.c:465
bool * colorTrigramsActive
Definition: trgm_regexp.c:464
int * colorTrigramGroups
Definition: trgm_regexp.c:454
TrgmPackedArc * arcs
Definition: trgm_regexp.c:438
#define TSTATE_INIT
Definition: trgm_regexp.c:331
static int packArcInfoCmp(const void *a1, const void *a2)
Definition: trgm_regexp.c:2093
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1894

References TrgmPackedState::arcs, TrgmNFA::arcsCount, TrgmPackedState::arcsCount, Assert, ColorTrgmInfo::cnumber, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, colorTrgmInfoCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, ColorTrgmInfo::expanded, hash_seq_init(), hash_seq_search(), i, j, lfirst, MemoryContextAlloc(), p2, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmState::snumber, source, TrgmPackArcInfo::sourceState, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

◆ prefixContains()

static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
)
static

Definition at line 1416 of file trgm_regexp.c.

1417{
1418 if (prefix1->colors[1] == COLOR_UNKNOWN)
1419 {
1420 /* Fully ambiguous prefix contains everything */
1421 return true;
1422 }
1423 else if (prefix1->colors[0] == COLOR_UNKNOWN)
1424 {
1425 /*
1426 * Prefix with only first unknown color contains every prefix with
1427 * same second color.
1428 */
1429 if (prefix1->colors[1] == prefix2->colors[1])
1430 return true;
1431 else
1432 return false;
1433 }
1434 else
1435 {
1436 /* Exact prefix contains only the exact same prefix */
1437 if (prefix1->colors[0] == prefix2->colors[0] &&
1438 prefix1->colors[1] == prefix2->colors[1])
1439 return true;
1440 else
1441 return false;
1442 }
1443}

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

◆ processState()

static void processState ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 959 of file trgm_regexp.c.

960{
961 ListCell *lc;
962
963 /* keysQueue should be NIL already, but make sure */
964 trgmNFA->keysQueue = NIL;
965
966 /*
967 * Add state's own key, and then process all keys added to keysQueue until
968 * queue is finished. But we can quit if the state gets marked final.
969 */
970 addKey(trgmNFA, state, &state->stateKey);
971 foreach(lc, trgmNFA->keysQueue)
972 {
974
975 if (state->flags & TSTATE_FIN)
976 break;
977 addKey(trgmNFA, state, key);
978 }
979
980 /* Release keysQueue to clean up for next cycle */
981 list_free(trgmNFA->keysQueue);
982 trgmNFA->keysQueue = NIL;
983
984 /*
985 * Add outgoing arcs only if state isn't final (we have no interest in
986 * outgoing arcs if we already match)
987 */
988 if (!(state->flags & TSTATE_FIN))
989 addArcs(trgmNFA, state);
990}
void list_free(List *list)
Definition: list.c:1546
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
Definition: trgm_regexp.c:1009
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:1190

References addArcs(), addKey(), sort-test::key, TrgmNFA::keysQueue, lfirst, list_free(), NIL, and TSTATE_FIN.

Referenced by transformGraph().

◆ RE_compile()

static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
)
static

Definition at line 721 of file trgm_regexp.c.

722{
723 int text_re_len = VARSIZE_ANY_EXHDR(text_re);
724 char *text_re_val = VARDATA_ANY(text_re);
725 pg_wchar *pattern;
726 int pattern_len;
727 int regcomp_result;
728 char errMsg[100];
729
730 /* Convert pattern string to wide characters */
731 pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
732 pattern_len = pg_mb2wchar_with_len(text_re_val,
733 pattern,
734 text_re_len);
735
736 /* Compile regex */
737 regcomp_result = pg_regcomp(regex,
738 pattern,
739 pattern_len,
740 cflags,
741 collation);
742
743 pfree(pattern);
744
745 if (regcomp_result != REG_OKAY)
746 {
747 /* re didn't compile (no need for pg_regfree, if so) */
748 pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
750 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
751 errmsg("invalid regular expression: %s", errMsg)));
752 }
753}
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:986
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:372
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define REG_OKAY
Definition: regex.h:215
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

◆ selectColorTrigrams()

static bool selectColorTrigrams ( TrgmNFA trgmNFA)
static

Definition at line 1458 of file trgm_regexp.c.

1459{
1460 HASH_SEQ_STATUS scan_status;
1461 int arcsCount = trgmNFA->arcsCount,
1462 i;
1464 ColorTrgmInfo *colorTrgms;
1465 int64 totalTrgmCount;
1466 float4 totalTrgmPenalty;
1467 int cnumber;
1468
1469 /* Collect color trigrams from all arcs */
1470 colorTrgms = (ColorTrgmInfo *) palloc0(sizeof(ColorTrgmInfo) * arcsCount);
1471 trgmNFA->colorTrgms = colorTrgms;
1472
1473 i = 0;
1474 hash_seq_init(&scan_status, trgmNFA->states);
1475 while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1476 {
1477 ListCell *cell;
1478
1479 foreach(cell, state->arcs)
1480 {
1481 TrgmArc *arc = (TrgmArc *) lfirst(cell);
1482 TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
1483 ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1484
1485 arcInfo->source = state;
1486 arcInfo->target = arc->target;
1487 trgmInfo->ctrgm = arc->ctrgm;
1488 trgmInfo->cnumber = -1;
1489 /* count and penalty will be set below */
1490 trgmInfo->expanded = true;
1491 trgmInfo->arcs = list_make1(arcInfo);
1492 i++;
1493 }
1494 }
1495 Assert(i == arcsCount);
1496
1497 /* Remove duplicates, merging their arcs lists */
1498 if (arcsCount >= 2)
1499 {
1500 ColorTrgmInfo *p1,
1501 *p2;
1502
1503 /* Sort trigrams to ease duplicate detection */
1504 qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
1505
1506 /* p1 is probe point, p2 is last known non-duplicate. */
1507 p2 = colorTrgms;
1508 for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1509 {
1510 if (colorTrgmInfoCmp(p1, p2) > 0)
1511 {
1512 p2++;
1513 *p2 = *p1;
1514 }
1515 else
1516 {
1517 p2->arcs = list_concat(p2->arcs, p1->arcs);
1518 }
1519 }
1520 trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
1521 }
1522 else
1523 {
1524 trgmNFA->colorTrgmsCount = arcsCount;
1525 }
1526
1527 /*
1528 * Count number of simple trigrams generated by each color trigram, and
1529 * also compute a penalty value, which is the number of simple trigrams
1530 * times a multiplier that depends on its whitespace content.
1531 *
1532 * Note: per-color-trigram counts cannot overflow an int so long as
1533 * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
1534 * 1290. However, the grand total totalTrgmCount might conceivably
1535 * overflow an int, so we use int64 for that within this routine. Also,
1536 * penalties are calculated in float4 arithmetic to avoid any overflow
1537 * worries.
1538 */
1539 totalTrgmCount = 0;
1540 totalTrgmPenalty = 0.0f;
1541 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1542 {
1543 ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1544 int j,
1545 count = 1,
1546 typeIndex = 0;
1547
1548 for (j = 0; j < 3; j++)
1549 {
1550 TrgmColor c = trgmInfo->ctrgm.colors[j];
1551
1552 typeIndex *= 2;
1553 if (c == COLOR_BLANK)
1554 typeIndex++;
1555 else
1556 count *= trgmNFA->colorInfo[c].wordCharsCount;
1557 }
1558 trgmInfo->count = count;
1559 totalTrgmCount += count;
1560 trgmInfo->penalty = penalties[typeIndex] * (float4) count;
1561 totalTrgmPenalty += trgmInfo->penalty;
1562 }
1563
1564 /* Sort color trigrams in descending order of their penalties */
1565 qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1567
1568 /*
1569 * Remove color trigrams from the graph so long as total penalty of color
1570 * trigrams exceeds WISH_TRGM_PENALTY. (If we fail to get down to
1571 * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
1572 * MAX_TRGM_COUNT.) We prefer to remove color trigrams with higher
1573 * penalty, since those are the most promising for reducing the total
1574 * penalty. When removing a color trigram we have to merge states
1575 * connected by arcs labeled with that trigram. It's necessary to not
1576 * merge initial and final states, because our graph becomes useless if
1577 * that happens; so we cannot always remove the trigram we'd prefer to.
1578 */
1579 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1580 {
1581 ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1582 bool canRemove = true;
1583 ListCell *cell;
1584
1585 /* Done if we've reached the target */
1586 if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
1587 break;
1588
1589#ifdef TRGM_REGEXP_DEBUG
1590 fprintf(stderr, "considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1591 trgmInfo->ctrgm.colors[0],
1592 trgmInfo->ctrgm.colors[1],
1593 trgmInfo->ctrgm.colors[2],
1594 trgmInfo->penalty,
1595 list_length(trgmInfo->arcs));
1596#endif
1597
1598 /*
1599 * Does any arc of this color trigram connect initial and final
1600 * states? If so we can't remove it.
1601 */
1602 foreach(cell, trgmInfo->arcs)
1603 {
1604 TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1605 TrgmState *source = arcInfo->source,
1606 *target = arcInfo->target;
1607 int source_flags,
1608 target_flags;
1609
1610#ifdef TRGM_REGEXP_DEBUG
1611 fprintf(stderr, "examining arc to s%d (%x) from s%d (%x)\n",
1612 -target->snumber, target->flags,
1613 -source->snumber, source->flags);
1614#endif
1615
1616 /* examine parent states, if any merging has already happened */
1617 while (source->parent)
1618 source = source->parent;
1619 while (target->parent)
1620 target = target->parent;
1621
1622#ifdef TRGM_REGEXP_DEBUG
1623 fprintf(stderr, " ... after completed merges: to s%d (%x) from s%d (%x)\n",
1624 -target->snumber, target->flags,
1625 -source->snumber, source->flags);
1626#endif
1627
1628 /* we must also consider merges we are planning right now */
1629 source_flags = source->flags | source->tentFlags;
1630 while (source->tentParent)
1631 {
1632 source = source->tentParent;
1633 source_flags |= source->flags | source->tentFlags;
1634 }
1635 target_flags = target->flags | target->tentFlags;
1636 while (target->tentParent)
1637 {
1638 target = target->tentParent;
1639 target_flags |= target->flags | target->tentFlags;
1640 }
1641
1642#ifdef TRGM_REGEXP_DEBUG
1643 fprintf(stderr, " ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1644 -target->snumber, target_flags,
1645 -source->snumber, source_flags);
1646#endif
1647
1648 /* would fully-merged state have both INIT and FIN set? */
1649 if (((source_flags | target_flags) & (TSTATE_INIT | TSTATE_FIN)) ==
1651 {
1652 canRemove = false;
1653 break;
1654 }
1655
1656 /* ok so far, so remember planned merge */
1657 if (source != target)
1658 {
1659#ifdef TRGM_REGEXP_DEBUG
1660 fprintf(stderr, " ... tentatively merging s%d into s%d\n",
1661 -target->snumber, -source->snumber);
1662#endif
1663 target->tentParent = source;
1664 source->tentFlags |= target_flags;
1665 }
1666 }
1667
1668 /*
1669 * We must reset all the tentFlags/tentParent fields before
1670 * continuing. tentFlags could only have become set in states that
1671 * are the source or parent or tentative parent of one of the current
1672 * arcs; likewise tentParent could only have become set in states that
1673 * are the target or parent or tentative parent of one of the current
1674 * arcs. There might be some overlap between those sets, but if we
1675 * clear tentFlags in target states as well as source states, we
1676 * should be okay even if we visit a state as target before visiting
1677 * it as a source.
1678 */
1679 foreach(cell, trgmInfo->arcs)
1680 {
1681 TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1682 TrgmState *source = arcInfo->source,
1683 *target = arcInfo->target;
1684 TrgmState *ttarget;
1685
1686 /* no need to touch previously-merged states */
1687 while (source->parent)
1688 source = source->parent;
1689 while (target->parent)
1690 target = target->parent;
1691
1692 while (source)
1693 {
1694 source->tentFlags = 0;
1695 source = source->tentParent;
1696 }
1697
1698 while ((ttarget = target->tentParent) != NULL)
1699 {
1700 target->tentParent = NULL;
1701 target->tentFlags = 0; /* in case it was also a source */
1702 target = ttarget;
1703 }
1704 }
1705
1706 /* Now, move on if we can't drop this trigram */
1707 if (!canRemove)
1708 {
1709#ifdef TRGM_REGEXP_DEBUG
1710 fprintf(stderr, " ... not ok to merge\n");
1711#endif
1712 continue;
1713 }
1714
1715 /* OK, merge states linked by each arc labeled by the trigram */
1716 foreach(cell, trgmInfo->arcs)
1717 {
1718 TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1719 TrgmState *source = arcInfo->source,
1720 *target = arcInfo->target;
1721
1722 while (source->parent)
1723 source = source->parent;
1724 while (target->parent)
1725 target = target->parent;
1726 if (source != target)
1727 {
1728#ifdef TRGM_REGEXP_DEBUG
1729 fprintf(stderr, "merging s%d into s%d\n",
1730 -target->snumber, -source->snumber);
1731#endif
1732 mergeStates(source, target);
1733 /* Assert we didn't merge initial and final states */
1734 Assert((source->flags & (TSTATE_INIT | TSTATE_FIN)) !=
1736 }
1737 }
1738
1739 /* Mark trigram unexpanded, and update totals */
1740 trgmInfo->expanded = false;
1741 totalTrgmCount -= trgmInfo->count;
1742 totalTrgmPenalty -= trgmInfo->penalty;
1743 }
1744
1745 /* Did we succeed in fitting into MAX_TRGM_COUNT? */
1746 if (totalTrgmCount > MAX_TRGM_COUNT)
1747 return false;
1748
1749 trgmNFA->totalTrgmCount = (int) totalTrgmCount;
1750
1751 /*
1752 * Sort color trigrams by colors (will be useful for bsearch in packGraph)
1753 * and enumerate the color trigrams that are expanded.
1754 */
1755 cnumber = 0;
1756 qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1758 for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1759 {
1760 if (colorTrgms[i].expanded)
1761 {
1762 colorTrgms[i].cnumber = cnumber;
1763 cnumber++;
1764 }
1765 }
1766
1767 return true;
1768}
int64_t int64
Definition: c.h:485
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
static int list_length(const List *l)
Definition: pg_list.h:152
#define list_make1(x1)
Definition: pg_list.h:212
float4 penalty
Definition: trgm_regexp.c:380
TrgmState * source
Definition: trgm_regexp.c:362
TrgmState * target
Definition: trgm_regexp.c:363
int TrgmColor
Definition: trgm_regexp.c:285
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1907
#define MAX_TRGM_COUNT
Definition: trgm_regexp.c:223
static const float4 penalties[8]
Definition: trgm_regexp.c:231
#define WISH_TRGM_PENALTY
Definition: trgm_regexp.c:224
static void mergeStates(TrgmState *state1, TrgmState *state2)
Definition: trgm_regexp.c:1877

References ColorTrgmInfo::arcs, TrgmNFA::arcsCount, Assert, ColorTrgmInfo::cnumber, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoPenaltyCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fprintf, hash_seq_init(), hash_seq_search(), i, j, lfirst, list_concat(), list_length(), list_make1, MAX_TRGM_COUNT, mergeStates(), p2, palloc(), palloc0(), penalties, ColorTrgmInfo::penalty, qsort, TrgmArcInfo::source, source, TrgmNFA::states, TrgmArcInfo::target, TrgmState::tentParent, TrgmNFA::totalTrgmCount, TSTATE_FIN, TSTATE_INIT, WISH_TRGM_PENALTY, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ transformGraph()

static void transformGraph ( TrgmNFA trgmNFA)
static

Definition at line 895 of file trgm_regexp.c.

896{
897 HASHCTL hashCtl;
898 TrgmStateKey initkey;
899 TrgmState *initstate;
900 ListCell *lc;
901
902 /* Initialize this stage's workspace in trgmNFA struct */
903 trgmNFA->queue = NIL;
904 trgmNFA->keysQueue = NIL;
905 trgmNFA->arcsCount = 0;
906 trgmNFA->overflowed = false;
907
908 /* Create hashtable for states */
909 hashCtl.keysize = sizeof(TrgmStateKey);
910 hashCtl.entrysize = sizeof(TrgmState);
911 hashCtl.hcxt = CurrentMemoryContext;
912 trgmNFA->states = hash_create("Trigram NFA",
913 1024,
914 &hashCtl,
916 trgmNFA->nstates = 0;
917
918 /* Create initial state: ambiguous prefix, NFA's initial state */
919 MemSet(&initkey, 0, sizeof(initkey));
920 initkey.prefix.colors[0] = COLOR_UNKNOWN;
921 initkey.prefix.colors[1] = COLOR_UNKNOWN;
922 initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
923
924 initstate = getState(trgmNFA, &initkey);
925 initstate->flags |= TSTATE_INIT;
926 trgmNFA->initState = initstate;
927
928 /*
929 * Recursively build the expanded graph by processing queue of states
930 * (breadth-first search). getState already put initstate in the queue.
931 * Note that getState will append new states to the queue within the loop,
932 * too; this works as long as we don't do repeat fetches using the "lc"
933 * pointer.
934 */
935 foreach(lc, trgmNFA->queue)
936 {
937 TrgmState *state = (TrgmState *) lfirst(lc);
938
939 /*
940 * If we overflowed then just mark state as final. Otherwise do
941 * actual processing.
942 */
943 if (trgmNFA->overflowed)
944 state->flags |= TSTATE_FIN;
945 else
946 processState(trgmNFA, state);
947
948 /* Did we overflow? */
949 if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
951 trgmNFA->overflowed = true;
952 }
953}
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
MemoryContext hcxt
Definition: hsearch.h:86
bool overflowed
Definition: trgm_regexp.c:418
#define MAX_EXPANDED_STATES
Definition: trgm_regexp.c:221
#define MAX_EXPANDED_ARCS
Definition: trgm_regexp.c:222
struct TrgmState TrgmState
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:959

References TrgmNFA::arcsCount, COLOR_UNKNOWN, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::flags, getState(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, hash_get_num_entries(), HASHCTL::hcxt, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, lfirst, MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::nstates, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, TrgmNFA::states, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

◆ trigramsMatchGraph()

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool *  check 
)

Definition at line 628 of file trgm_regexp.c.

629{
630 int i,
631 j,
632 k,
633 queueIn,
634 queueOut;
635
636 /*
637 * Reset temporary working areas.
638 */
639 memset(graph->colorTrigramsActive, 0,
640 sizeof(bool) * graph->colorTrigramsCount);
641 memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
642
643 /*
644 * Check which color trigrams were matched. A match for any simple
645 * trigram associated with a color trigram counts as a match of the color
646 * trigram.
647 */
648 j = 0;
649 for (i = 0; i < graph->colorTrigramsCount; i++)
650 {
651 int cnt = graph->colorTrigramGroups[i];
652
653 for (k = j; k < j + cnt; k++)
654 {
655 if (check[k])
656 {
657 /*
658 * Found one matched trigram in the group. Can skip the rest
659 * of them and go to the next group.
660 */
661 graph->colorTrigramsActive[i] = true;
662 break;
663 }
664 }
665 j = j + cnt;
666 }
667
668 /*
669 * Initialize the statesQueue to hold just the initial state. Note:
670 * statesQueue has room for statesCount entries, which is certainly enough
671 * since no state will be put in the queue more than once. The
672 * statesActive array marks which states have been queued.
673 */
674 graph->statesActive[0] = true;
675 graph->statesQueue[0] = 0;
676 queueIn = 0;
677 queueOut = 1;
678
679 /* Process queued states as long as there are any. */
680 while (queueIn < queueOut)
681 {
682 int stateno = graph->statesQueue[queueIn++];
683 TrgmPackedState *state = &graph->states[stateno];
684 int cnt = state->arcsCount;
685
686 /* Loop over state's out-arcs */
687 for (i = 0; i < cnt; i++)
688 {
689 TrgmPackedArc *arc = &state->arcs[i];
690
691 /*
692 * If corresponding color trigram is present then activate the
693 * corresponding state. We're done if that's the final state,
694 * otherwise queue the state if it's not been queued already.
695 */
696 if (graph->colorTrigramsActive[arc->colorTrgm])
697 {
698 int nextstate = arc->targetState;
699
700 if (nextstate == 1)
701 return true; /* success: final state is reachable */
702
703 if (!graph->statesActive[nextstate])
704 {
705 graph->statesActive[nextstate] = true;
706 graph->statesQueue[queueOut++] = nextstate;
707 }
708 }
709 }
710 }
711
712 /* Queue is empty, so match fails. */
713 return false;
714}

References TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, j, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, and TrgmPackedGraph::statesQueue.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

◆ validArcLabel()

static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
)
static

Definition at line 1330 of file trgm_regexp.c.

1331{
1332 /*
1333 * We have to know full trigram in order to add outgoing arc. So we can't
1334 * do it if prefix is ambiguous.
1335 */
1336 if (key->prefix.colors[0] == COLOR_UNKNOWN)
1337 return false;
1338
1339 /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
1340 Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
1341 /* And we should not be called with an unknown arc color anytime */
1342 Assert(co != COLOR_UNKNOWN);
1343
1344 /*
1345 * We don't bother with making arcs representing three non-word
1346 * characters, since that's useless for trigram extraction.
1347 */
1348 if (key->prefix.colors[0] == COLOR_BLANK &&
1349 key->prefix.colors[1] == COLOR_BLANK &&
1350 co == COLOR_BLANK)
1351 return false;
1352
1353 /*
1354 * We also reject nonblank-blank-anything. The nonblank-blank-nonblank
1355 * case doesn't correspond to any trigram the trigram extraction code
1356 * would make. The nonblank-blank-blank case is also not possible with
1357 * RPADDING = 1. (Note that in many cases we'd fail to generate such a
1358 * trigram even if it were valid, for example processing "foo bar" will
1359 * not result in considering the trigram "o ". So if you want to support
1360 * RPADDING = 2, there's more to do than just twiddle this test.)
1361 */
1362 if (key->prefix.colors[0] != COLOR_BLANK &&
1363 key->prefix.colors[1] == COLOR_BLANK)
1364 return false;
1365
1366 /*
1367 * Other combinations involving blank are valid, in particular we assume
1368 * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
1369 *
1370 * Note: Using again the example "foo bar", we will not consider the
1371 * trigram " b", though this trigram would be found by the trigram
1372 * extraction code. Since we will find " ba", it doesn't seem worth
1373 * trying to hack the algorithm to generate the additional trigram.
1374 */
1375
1376 /* arc label is valid */
1377 return true;
1378}

References Assert, COLOR_BLANK, COLOR_UNKNOWN, and sort-test::key.

Referenced by addArc(), and addKey().

Variable Documentation

◆ penalties

const float4 penalties[8]
static
Initial value:
= {
1.0f,
3.5f,
0.0f,
0.0f,
4.2f,
2.1f,
25.0f,
0.0f
}

Definition at line 231 of file trgm_regexp.c.

Referenced by selectColorTrigrams().