PostgreSQL Source Code  git master
trgm_regexp.c File Reference
#include "postgres.h"
#include "regex/regexport.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "varatt.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
 
struct  TrgmColorInfo
 
struct  TrgmPrefix
 
struct  ColorTrgm
 
struct  TrgmStateKey
 
struct  TrgmState
 
struct  TrgmArc
 
struct  TrgmArcInfo
 
struct  ColorTrgmInfo
 
struct  TrgmNFA
 
struct  TrgmPackedArc
 
struct  TrgmPackedState
 
struct  TrgmPackedGraph
 
struct  TrgmPackArcInfo
 

Macros

#define MAX_EXPANDED_STATES   128
 
#define MAX_EXPANDED_ARCS   1024
 
#define MAX_TRGM_COUNT   256
 
#define WISH_TRGM_PENALTY   16
 
#define COLOR_COUNT_LIMIT   256
 
#define COLOR_UNKNOWN   (-3)
 
#define COLOR_BLANK   (-4)
 
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */
 
#define TSTATE_FIN   0x02 /* flag indicating this state is final */
 

Typedefs

typedef int TrgmColor
 
typedef struct TrgmState TrgmState
 

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
 
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
 
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
 
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
 
static void transformGraph (TrgmNFA *trgmNFA)
 
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
 
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
 
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
 
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
 
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
 
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
 
static void mergeStates (TrgmState *state1, TrgmState *state2)
 
static int colorTrgmInfoCmp (const void *p1, const void *p2)
 
static int colorTrgmInfoPenaltyCmp (const void *p1, const void *p2)
 
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static int packArcInfoCmp (const void *a1, const void *a2)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

static const float4 penalties [8]
 

Macro Definition Documentation

◆ COLOR_BLANK

#define COLOR_BLANK   (-4)

Definition at line 287 of file trgm_regexp.c.

◆ COLOR_COUNT_LIMIT

#define COLOR_COUNT_LIMIT   256

Definition at line 223 of file trgm_regexp.c.

◆ COLOR_UNKNOWN

#define COLOR_UNKNOWN   (-3)

Definition at line 286 of file trgm_regexp.c.

◆ MAX_EXPANDED_ARCS

#define MAX_EXPANDED_ARCS   1024

Definition at line 220 of file trgm_regexp.c.

◆ MAX_EXPANDED_STATES

#define MAX_EXPANDED_STATES   128

Definition at line 219 of file trgm_regexp.c.

◆ MAX_TRGM_COUNT

#define MAX_TRGM_COUNT   256

Definition at line 221 of file trgm_regexp.c.

◆ TSTATE_FIN

#define TSTATE_FIN   0x02 /* flag indicating this state is final */

Definition at line 330 of file trgm_regexp.c.

◆ TSTATE_INIT

#define TSTATE_INIT   0x01 /* flag indicating this state is initial */

Definition at line 329 of file trgm_regexp.c.

◆ WISH_TRGM_PENALTY

#define WISH_TRGM_PENALTY   16

Definition at line 222 of file trgm_regexp.c.

Typedef Documentation

◆ TrgmColor

typedef int TrgmColor

Definition at line 283 of file trgm_regexp.c.

◆ TrgmState

typedef struct TrgmState TrgmState

Function Documentation

◆ addArc()

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
)
static

Definition at line 1286 of file trgm_regexp.c.

1288 {
1289  TrgmArc *arc;
1290  ListCell *cell;
1291 
1292  /* Do nothing if this wouldn't be a valid arc label trigram */
1293  if (!validArcLabel(key, co))
1294  return;
1295 
1296  /*
1297  * Check if we are going to reach key which is covered by a key which is
1298  * already listed in this state. If so arc is useless: the NFA can bypass
1299  * it through a path that doesn't require any predictable trigram, so
1300  * whether the arc's trigram is present or not doesn't really matter.
1301  */
1302  foreach(cell, state->enterKeys)
1303  {
1304  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1305 
1306  if (existingKey->nstate == destKey->nstate &&
1307  prefixContains(&existingKey->prefix, &destKey->prefix))
1308  return;
1309  }
1310 
1311  /* Checks were successful, add new arc */
1312  arc = (TrgmArc *) palloc(sizeof(TrgmArc));
1313  arc->target = getState(trgmNFA, destKey);
1314  arc->ctrgm.colors[0] = key->prefix.colors[0];
1315  arc->ctrgm.colors[1] = key->prefix.colors[1];
1316  arc->ctrgm.colors[2] = co;
1317 
1318  state->arcs = lappend(state->arcs, arc);
1319  trgmNFA->arcsCount++;
1320 }
List * lappend(List *list, void *datum)
Definition: list.c:339
void * palloc(Size size)
Definition: mcxt.c:1317
#define lfirst(lc)
Definition: pg_list.h:172
int arcsCount
Definition: trgm_regexp.c:415
TrgmPrefix prefix
Definition: trgm_regexp.c:310
Definition: regguts.h:296
Definition: regguts.h:323
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1328
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1383
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1414

References TrgmNFA::arcsCount, getState(), sort-test::key, lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), and validArcLabel().

Referenced by addArcs().

◆ addArcs()

static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 1188 of file trgm_regexp.c.

1189 {
1190  TrgmStateKey destKey;
1191  ListCell *cell;
1192  regex_arc_t *arcs;
1193  int arcsCount,
1194  i;
1195 
1196  /*
1197  * Ensure any pad bytes in destKey are zero, since it may get used as a
1198  * hashtable key by getState.
1199  */
1200  MemSet(&destKey, 0, sizeof(destKey));
1201 
1202  /*
1203  * Iterate over enter keys associated with this expanded-graph state. This
1204  * includes both the state's own stateKey, and any enter keys we added to
1205  * it during addKey (which represent expanded-graph states that are not
1206  * distinguishable from this one by means of trigrams). For each such
1207  * enter key, examine all the out-arcs of the key's underlying NFA state,
1208  * and try to make a trigram arc leading to where the out-arc leads.
1209  * (addArc will deal with whether the arc is valid or not.)
1210  */
1211  foreach(cell, state->enterKeys)
1212  {
1213  TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
1214 
1215  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1216  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1217  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1218 
1219  for (i = 0; i < arcsCount; i++)
1220  {
1221  regex_arc_t *arc = &arcs[i];
1222  TrgmColorInfo *colorInfo;
1223 
1224  /*
1225  * Ignore non-expandable colors; addKey already handled the case.
1226  *
1227  * We need no special check for WHITE or begin/end pseudocolors
1228  * here. We don't need to do any processing for them, and they
1229  * will be marked non-expandable since the regex engine will have
1230  * reported them that way. We do have to watch out for RAINBOW,
1231  * which has a negative color number.
1232  */
1233  if (arc->co < 0)
1234  continue;
1235  Assert(arc->co < trgmNFA->ncolors);
1236 
1237  colorInfo = &trgmNFA->colorInfo[arc->co];
1238  if (!colorInfo->expandable)
1239  continue;
1240 
1241  if (colorInfo->containsNonWord)
1242  {
1243  /*
1244  * Color includes non-word character(s).
1245  *
1246  * Generate an arc, treating this transition as occurring on
1247  * BLANK. This allows word-ending trigrams to be manufactured
1248  * if possible.
1249  */
1250  destKey.prefix.colors[0] = key->prefix.colors[1];
1251  destKey.prefix.colors[1] = COLOR_BLANK;
1252  destKey.nstate = arc->to;
1253 
1254  addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
1255  }
1256 
1257  if (colorInfo->wordCharsCount > 0)
1258  {
1259  /*
1260  * Color includes word character(s).
1261  *
1262  * Generate an arc. Color is pushed into prefix of target
1263  * state.
1264  */
1265  destKey.prefix.colors[0] = key->prefix.colors[1];
1266  destKey.prefix.colors[1] = arc->co;
1267  destKey.nstate = arc->to;
1268 
1269  addArc(trgmNFA, state, key, arc->co, &destKey);
1270  }
1271  }
1272 
1273  pfree(arcs);
1274  }
1275 }
#define Assert(condition)
Definition: c.h:858
#define MemSet(start, val, len)
Definition: c.h:1020
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1521
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
bool containsNonWord
Definition: trgm_regexp.c:262
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:404
int ncolors
Definition: trgm_regexp.c:405
regex_t * regex
Definition: trgm_regexp.c:403
TrgmColor colors[2]
Definition: trgm_regexp.c:291
color co
Definition: regguts.h:298
struct state * to
Definition: regguts.h:300
#define COLOR_BLANK
Definition: trgm_regexp.c:287
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
Definition: trgm_regexp.c:1286

References addArc(), Assert, arc::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmColorInfo::expandable, i, sort-test::key, lfirst, MemSet, TrgmNFA::ncolors, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, arc::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

◆ addKey()

static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
)
static

Definition at line 1007 of file trgm_regexp.c.

1008 {
1009  regex_arc_t *arcs;
1010  TrgmStateKey destKey;
1011  ListCell *cell;
1012  int i,
1013  arcsCount;
1014 
1015  /*
1016  * Ensure any pad bytes in destKey are zero, since it may get used as a
1017  * hashtable key by getState.
1018  */
1019  MemSet(&destKey, 0, sizeof(destKey));
1020 
1021  /*
1022  * Compare key to each existing enter key of the state to check for
1023  * redundancy. We can drop either old key(s) or the new key if we find
1024  * redundancy.
1025  */
1026  foreach(cell, state->enterKeys)
1027  {
1028  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1029 
1030  if (existingKey->nstate == key->nstate)
1031  {
1032  if (prefixContains(&existingKey->prefix, &key->prefix))
1033  {
1034  /* This old key already covers the new key. Nothing to do */
1035  return;
1036  }
1037  if (prefixContains(&key->prefix, &existingKey->prefix))
1038  {
1039  /*
1040  * The new key covers this old key. Remove the old key, it's
1041  * no longer needed once we add this key to the list.
1042  */
1043  state->enterKeys = foreach_delete_current(state->enterKeys,
1044  cell);
1045  }
1046  }
1047  }
1048 
1049  /* No redundancy, so add this key to the state's list */
1050  state->enterKeys = lappend(state->enterKeys, key);
1051 
1052  /* If state is now known final, mark it and we're done */
1053  if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
1054  {
1055  state->flags |= TSTATE_FIN;
1056  return;
1057  }
1058 
1059  /*
1060  * Loop through all outgoing arcs of the corresponding state in the
1061  * original NFA.
1062  */
1063  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1064  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1065  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1066 
1067  for (i = 0; i < arcsCount; i++)
1068  {
1069  regex_arc_t *arc = &arcs[i];
1070 
1071  if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
1072  {
1073  /*
1074  * Start of line/string (^). Trigram extraction treats start of
1075  * line same as start of word: double space prefix is added.
1076  * Hence, make an enter key showing we can reach the arc
1077  * destination with all-blank prefix.
1078  */
1079  destKey.prefix.colors[0] = COLOR_BLANK;
1080  destKey.prefix.colors[1] = COLOR_BLANK;
1081  destKey.nstate = arc->to;
1082 
1083  /* Add enter key to this state */
1084  addKeyToQueue(trgmNFA, &destKey);
1085  }
1086  else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
1087  {
1088  /*
1089  * End of line/string ($). We must consider this arc as a
1090  * transition that doesn't read anything. The reason for adding
1091  * this enter key to the state is that if the arc leads to the
1092  * NFA's final state, we must mark this expanded state as final.
1093  */
1094  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1095  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1096  destKey.nstate = arc->to;
1097 
1098  /* Add enter key to this state */
1099  addKeyToQueue(trgmNFA, &destKey);
1100  }
1101  else if (arc->co >= 0)
1102  {
1103  /* Regular color (including WHITE) */
1104  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1105 
1106  if (colorInfo->expandable)
1107  {
1108  if (colorInfo->containsNonWord &&
1110  {
1111  /*
1112  * We can reach the arc destination after reading a
1113  * non-word character, but the prefix is not something
1114  * that addArc will accept with COLOR_BLANK, so no trigram
1115  * arc can get made for this transition. We must make an
1116  * enter key to show that the arc destination is
1117  * reachable. Set it up with an all-blank prefix, since
1118  * that corresponds to what the trigram extraction code
1119  * will do at a word starting boundary.
1120  */
1121  destKey.prefix.colors[0] = COLOR_BLANK;
1122  destKey.prefix.colors[1] = COLOR_BLANK;
1123  destKey.nstate = arc->to;
1124  addKeyToQueue(trgmNFA, &destKey);
1125  }
1126 
1127  if (colorInfo->wordCharsCount > 0 &&
1128  !validArcLabel(key, arc->co))
1129  {
1130  /*
1131  * We can reach the arc destination after reading a word
1132  * character, but the prefix is not something that addArc
1133  * will accept, so no trigram arc can get made for this
1134  * transition. We must make an enter key to show that the
1135  * arc destination is reachable. The prefix for the enter
1136  * key should reflect the info we have for this arc.
1137  */
1138  destKey.prefix.colors[0] = key->prefix.colors[1];
1139  destKey.prefix.colors[1] = arc->co;
1140  destKey.nstate = arc->to;
1141  addKeyToQueue(trgmNFA, &destKey);
1142  }
1143  }
1144  else
1145  {
1146  /*
1147  * Unexpandable color. Add enter key with ambiguous prefix,
1148  * showing we can reach the destination from this state, but
1149  * the preceding colors will be uncertain. (We do not set the
1150  * first prefix color to key->prefix.colors[1], because a
1151  * prefix of known followed by unknown is invalid.)
1152  */
1153  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1154  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1155  destKey.nstate = arc->to;
1156  addKeyToQueue(trgmNFA, &destKey);
1157  }
1158  }
1159  else
1160  {
1161  /* RAINBOW: treat as unexpandable color */
1162  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1163  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1164  destKey.nstate = arc->to;
1165  addKeyToQueue(trgmNFA, &destKey);
1166  }
1167  }
1168 
1169  pfree(arcs);
1170 }
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1176
#define TSTATE_FIN
Definition: trgm_regexp.c:330
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:286

References addKeyToQueue(), arc::co, COLOR_BLANK, COLOR_UNKNOWN, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmColorInfo::expandable, foreach_delete_current, i, sort-test::key, lappend(), lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, arc::to, TSTATE_FIN, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

◆ addKeyToQueue()

static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1176 of file trgm_regexp.c.

1177 {
1178  TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
1179 
1180  memcpy(keyCopy, key, sizeof(TrgmStateKey));
1181  trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
1182 }
List * keysQueue
Definition: trgm_regexp.c:414

References sort-test::key, TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

◆ colorTrgmInfoCmp()

static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1892 of file trgm_regexp.c.

1893 {
1894  const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
1895  const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
1896 
1897  return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
1898 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:375

References ColorTrgmInfo::ctrgm, and p2.

Referenced by packGraph(), and selectColorTrigrams().

◆ colorTrgmInfoPenaltyCmp()

static int colorTrgmInfoPenaltyCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1905 of file trgm_regexp.c.

1906 {
1907  float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
1908  float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
1909 
1910  if (penalty1 < penalty2)
1911  return 1;
1912  else if (penalty1 == penalty2)
1913  return 0;
1914  else
1915  return -1;
1916 }
float float4
Definition: c.h:629

References p2.

Referenced by selectColorTrigrams().

◆ convertPgWchar()

static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
)
static

Definition at line 827 of file trgm_regexp.c.

828 {
829  /* "s" has enough space for a multibyte character and a trailing NUL */
830  char s[MAX_MULTIBYTE_CHAR_LEN + 1];
831 
832  /*
833  * We can ignore the NUL character, since it can never appear in a PG text
834  * string. This avoids the need for various special cases when
835  * reconstructing trigrams.
836  */
837  if (c == 0)
838  return false;
839 
840  /* Do the conversion, making sure the result is NUL-terminated */
841  memset(s, 0, sizeof(s));
842  pg_wchar2mb_with_len(&c, s, 1);
843 
844  /*
845  * In IGNORECASE mode, we can ignore uppercase characters. We assume that
846  * the regex engine generated both uppercase and lowercase equivalents
847  * within each color, since we used the REG_ICASE option; so there's no
848  * need to process the uppercase version.
849  *
850  * XXX this code is dependent on the assumption that lowerstr() works the
851  * same as the regex engine's internal case folding machinery. Might be
852  * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
853  * On the other hand, the trigrams in the index were created using
854  * lowerstr(), so we're probably screwed if there's any incompatibility
855  * anyway.
856  */
857 #ifdef IGNORECASE
858  {
859  char *lowerCased = lowerstr(s);
860 
861  if (strcmp(lowerCased, s) != 0)
862  {
863  pfree(lowerCased);
864  return false;
865  }
866  pfree(lowerCased);
867  }
868 #endif
869 
870  /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
871  memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
872  return true;
873 }
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:1008
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:33
char * c
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:243
char * lowerstr(const char *str)
Definition: ts_locale.c:253

References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().

Referenced by getColorInfo().

◆ createTrgmNFA()

TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 522 of file trgm_regexp.c.

524 {
525  TRGM *trg;
526  regex_t regex;
527  MemoryContext tmpcontext;
528  MemoryContext oldcontext;
529 
530  /*
531  * This processing generates a great deal of cruft, which we'd like to
532  * clean up before returning (since this function may be called in a
533  * query-lifespan memory context). Make a temp context we can work in so
534  * that cleanup is easy.
535  */
537  "createTrgmNFA temporary context",
539  oldcontext = MemoryContextSwitchTo(tmpcontext);
540 
541  /*
542  * Stage 1: Compile the regexp into a NFA, using the regexp library.
543  */
544 #ifdef IGNORECASE
545  RE_compile(&regex, text_re,
546  REG_ADVANCED | REG_NOSUB | REG_ICASE, collation);
547 #else
548  RE_compile(&regex, text_re,
549  REG_ADVANCED | REG_NOSUB, collation);
550 #endif
551 
552  trg = createTrgmNFAInternal(&regex, graph, rcontext);
553 
554  /* Clean up all the cruft we created (including regex) */
555  MemoryContextSwitchTo(oldcontext);
556  MemoryContextDelete(tmpcontext);
557 
558  return trg;
559 }
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
MemoryContextSwitchTo(old_ctx)
#define REG_ICASE
Definition: regex.h:184
#define REG_ADVANCED
Definition: regex.h:181
#define REG_NOSUB
Definition: regex.h:185
#define regex_t
Definition: regex.h:245
Definition: trgm.h:67
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:565
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:719

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), RE_compile(), REG_ADVANCED, REG_ICASE, REG_NOSUB, and regex_t.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

◆ createTrgmNFAInternal()

static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)
static

Definition at line 565 of file trgm_regexp.c.

567 {
568  TRGM *trg;
569  TrgmNFA trgmNFA;
570 
571  trgmNFA.regex = regex;
572 
573  /* Collect color information from the regex */
574  getColorInfo(regex, &trgmNFA);
575 
576 #ifdef TRGM_REGEXP_DEBUG
577  printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
578 #endif
579 
580  /*
581  * Stage 2: Create an expanded graph from the source NFA.
582  */
583  transformGraph(&trgmNFA);
584 
585 #ifdef TRGM_REGEXP_DEBUG
586  printTrgmNFA(&trgmNFA);
587 #endif
588 
589  /*
590  * Fail if we were unable to make a nontrivial graph, ie it is possible to
591  * get from the initial state to the final state without reading any
592  * predictable trigram.
593  */
594  if (trgmNFA.initState->flags & TSTATE_FIN)
595  return NULL;
596 
597  /*
598  * Stage 3: Select color trigrams to expand. Fail if too many trigrams.
599  */
600  if (!selectColorTrigrams(&trgmNFA))
601  return NULL;
602 
603  /*
604  * Stage 4: Expand color trigrams and pack graph into final
605  * representation.
606  */
607  trg = expandColorTrigrams(&trgmNFA, rcontext);
608 
609  *graph = packGraph(&trgmNFA, rcontext);
610 
611 #ifdef TRGM_REGEXP_DEBUG
612  printTrgmPackedGraph(*graph, trg);
613 #endif
614 
615  return trg;
616 }
TrgmState * initState
Definition: trgm_regexp.c:409
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:763
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:1456
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1775
static void transformGraph(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:893
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1930

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::flags, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, packGraph(), TrgmNFA::regex, selectColorTrigrams(), transformGraph(), and TSTATE_FIN.

Referenced by createTrgmNFA().

◆ expandColorTrigrams()

static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1775 of file trgm_regexp.c.

1776 {
1777  TRGM *trg;
1778  trgm *p;
1779  int i;
1780  TrgmColorInfo blankColor;
1781  trgm_mb_char blankChar;
1782 
1783  /* Set up "blank" color structure containing a single zero character */
1784  memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
1785  blankColor.wordCharsCount = 1;
1786  blankColor.wordChars = &blankChar;
1787 
1788  /* Construct the trgm array */
1789  trg = (TRGM *)
1790  MemoryContextAllocZero(rcontext,
1791  TRGMHDRSIZE +
1792  trgmNFA->totalTrgmCount * sizeof(trgm));
1793  trg->flag = ARRKEY;
1794  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
1795  p = GETARR(trg);
1796  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1797  {
1798  ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
1799  TrgmColorInfo *c[3];
1800  trgm_mb_char s[3];
1801  int j,
1802  i1,
1803  i2,
1804  i3;
1805 
1806  /* Ignore any unexpanded trigrams ... */
1807  if (!colorTrgm->expanded)
1808  continue;
1809 
1810  /* Get colors, substituting the dummy struct for COLOR_BLANK */
1811  for (j = 0; j < 3; j++)
1812  {
1813  if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
1814  c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
1815  else
1816  c[j] = &blankColor;
1817  }
1818 
1819  /* Iterate over all possible combinations of colors' characters */
1820  for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
1821  {
1822  s[0] = c[0]->wordChars[i1];
1823  for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
1824  {
1825  s[1] = c[1]->wordChars[i2];
1826  for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
1827  {
1828  s[2] = c[2]->wordChars[i3];
1829  fillTrgm(p, s);
1830  p++;
1831  }
1832  }
1833  }
1834  }
1835 
1836  return trg;
1837 }
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:60
int j
Definition: isn.c:74
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1215
TrgmColor colors[3]
Definition: trgm_regexp.c:300
uint8 flag
Definition: trgm.h:69
trgm_mb_char * wordChars
Definition: trgm_regexp.c:264
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:419
int colorTrgmsCount
Definition: trgm_regexp.c:420
int totalTrgmCount
Definition: trgm_regexp.c:421
char trgm[3]
Definition: trgm.h:42
#define GETARR(x)
Definition: trgm.h:106
#define ARRKEY
Definition: trgm.h:96
#define TRGMHDRSIZE
Definition: trgm.h:73
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
Definition: trgm_regexp.c:1843
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, j, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ fillTrgm()

static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
)
static

Definition at line 1843 of file trgm_regexp.c.

1844 {
1845  char str[3 * MAX_MULTIBYTE_CHAR_LEN],
1846  *p;
1847  int i,
1848  j;
1849 
1850  /* Write multibyte string into "str" (we don't need null termination) */
1851  p = str;
1852 
1853  for (i = 0; i < 3; i++)
1854  {
1855  if (s[i].bytes[0] != 0)
1856  {
1857  for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
1858  *p++ = s[i].bytes[j];
1859  }
1860  else
1861  {
1862  /* Emit a space in place of COLOR_BLANK */
1863  *p++ = ' ';
1864  }
1865  }
1866 
1867  /* Convert "str" to a standard trigram (possibly hashing it) */
1868  compact_trigram(ptrgm, str, p - str);
1869 }
const char * str
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:199

References trgm_mb_char::bytes, compact_trigram(), i, j, MAX_MULTIBYTE_CHAR_LEN, and str.

Referenced by expandColorTrigrams().

◆ getColorInfo()

static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
)
static

Definition at line 763 of file trgm_regexp.c.

764 {
765  int colorsCount = pg_reg_getnumcolors(regex);
766  int i;
767 
768  trgmNFA->ncolors = colorsCount;
769  trgmNFA->colorInfo = (TrgmColorInfo *)
770  palloc0(colorsCount * sizeof(TrgmColorInfo));
771 
772  /*
773  * Loop over colors, filling TrgmColorInfo about each. Note we include
774  * WHITE (0) even though we know it'll be reported as non-expandable.
775  */
776  for (i = 0; i < colorsCount; i++)
777  {
778  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
779  int charsCount = pg_reg_getnumcharacters(regex, i);
780  pg_wchar *chars;
781  int j;
782 
783  if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
784  {
785  /* Non expandable, or too large to work with */
786  colorInfo->expandable = false;
787  continue;
788  }
789 
790  colorInfo->expandable = true;
791  colorInfo->containsNonWord = false;
792  colorInfo->wordChars = (trgm_mb_char *)
793  palloc(sizeof(trgm_mb_char) * charsCount);
794  colorInfo->wordCharsCount = 0;
795 
796  /* Extract all the chars in this color */
797  chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
798  pg_reg_getcharacters(regex, i, chars, charsCount);
799 
800  /*
801  * Convert characters back to multibyte form, and save only those that
802  * are word characters. Set "containsNonWord" if any non-word
803  * character. (Note: it'd probably be nicer to keep the chars in
804  * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
805  */
806  for (j = 0; j < charsCount; j++)
807  {
808  trgm_mb_char c;
809 
810  if (!convertPgWchar(chars[j], &c))
811  continue; /* ok to ignore it altogether */
812  if (ISWORDCHR(c.bytes))
813  colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
814  else
815  colorInfo->containsNonWord = true;
816  }
817 
818  pfree(chars);
819  }
820 }
unsigned int pg_wchar
Definition: mbprint.c:31
void * palloc0(Size size)
Definition: mcxt.c:1347
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:266
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:230
#define ISWORDCHR(c)
Definition: trgm.h:55
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
Definition: trgm_regexp.c:827
#define COLOR_COUNT_LIMIT
Definition: trgm_regexp.c:223
static char chars[TZ_MAX_CHARS]
Definition: zic.c:401

References chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, j, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ getState()

static TrgmState * getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1383 of file trgm_regexp.c.

1384 {
1385  TrgmState *state;
1386  bool found;
1387 
1388  state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
1389  &found);
1390  if (!found)
1391  {
1392  /* New state: initialize and queue it */
1393  state->arcs = NIL;
1394  state->enterKeys = NIL;
1395  state->flags = 0;
1396  /* states are initially given negative numbers */
1397  state->snumber = -(++trgmNFA->nstates);
1398  state->parent = NULL;
1399  state->tentFlags = 0;
1400  state->tentParent = NULL;
1401 
1402  trgmNFA->queue = lappend(trgmNFA->queue, state);
1403  }
1404  return state;
1405 }
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_ENTER
Definition: hsearch.h:114
#define NIL
Definition: pg_list.h:68
int nstates
Definition: trgm_regexp.c:410
List * queue
Definition: trgm_regexp.c:413
HTAB * states
Definition: trgm_regexp.c:408

References HASH_ENTER, hash_search(), sort-test::key, lappend(), NIL, TrgmNFA::nstates, TrgmNFA::queue, and TrgmNFA::states.

Referenced by addArc(), and transformGraph().

◆ mergeStates()

static void mergeStates ( TrgmState state1,
TrgmState state2 
)
static

Definition at line 1875 of file trgm_regexp.c.

1876 {
1877  Assert(state1 != state2);
1878  Assert(!state1->parent);
1879  Assert(!state2->parent);
1880 
1881  /* state1 absorbs state2's flags */
1882  state1->flags |= state2->flags;
1883 
1884  /* state2, and indirectly all its children, become children of state1 */
1885  state2->parent = state1;
1886 }
struct TrgmState * parent
Definition: trgm_regexp.c:339

References Assert, TrgmState::flags, and TrgmState::parent.

Referenced by selectColorTrigrams().

◆ packArcInfoCmp()

static int packArcInfoCmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 2091 of file trgm_regexp.c.

2092 {
2093  const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
2094  const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
2095 
2096  if (p1->sourceState < p2->sourceState)
2097  return -1;
2098  if (p1->sourceState > p2->sourceState)
2099  return 1;
2100  if (p1->colorTrgm < p2->colorTrgm)
2101  return -1;
2102  if (p1->colorTrgm > p2->colorTrgm)
2103  return 1;
2104  if (p1->targetState < p2->targetState)
2105  return -1;
2106  if (p1->targetState > p2->targetState)
2107  return 1;
2108  return 0;
2109 }
static const FormData_pg_attribute a1
Definition: heap.c:142
static const FormData_pg_attribute a2
Definition: heap.c:156

References a1, a2, TrgmPackArcInfo::colorTrgm, p2, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.

Referenced by packGraph().

◆ packGraph()

static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1930 of file trgm_regexp.c.

1931 {
1932  int snumber = 2,
1933  arcIndex,
1934  arcsCount;
1935  HASH_SEQ_STATUS scan_status;
1936  TrgmState *state;
1937  TrgmPackArcInfo *arcs;
1938  TrgmPackedArc *packedArcs;
1939  TrgmPackedGraph *result;
1940  int i,
1941  j;
1942 
1943  /* Enumerate surviving states, giving init and fin reserved numbers */
1944  hash_seq_init(&scan_status, trgmNFA->states);
1945  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1946  {
1947  while (state->parent)
1948  state = state->parent;
1949 
1950  if (state->snumber < 0)
1951  {
1952  if (state->flags & TSTATE_INIT)
1953  state->snumber = 0;
1954  else if (state->flags & TSTATE_FIN)
1955  state->snumber = 1;
1956  else
1957  {
1958  state->snumber = snumber;
1959  snumber++;
1960  }
1961  }
1962  }
1963 
1964  /* Collect array of all arcs */
1965  arcs = (TrgmPackArcInfo *)
1966  palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
1967  arcIndex = 0;
1968  hash_seq_init(&scan_status, trgmNFA->states);
1969  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1970  {
1971  TrgmState *source = state;
1972  ListCell *cell;
1973 
1974  while (source->parent)
1975  source = source->parent;
1976 
1977  foreach(cell, state->arcs)
1978  {
1979  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1980  TrgmState *target = arc->target;
1981 
1982  while (target->parent)
1983  target = target->parent;
1984 
1985  if (source->snumber != target->snumber)
1986  {
1987  ColorTrgmInfo *ctrgm;
1988 
1989  ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
1990  trgmNFA->colorTrgms,
1991  trgmNFA->colorTrgmsCount,
1992  sizeof(ColorTrgmInfo),
1994  Assert(ctrgm != NULL);
1995  Assert(ctrgm->expanded);
1996 
1997  arcs[arcIndex].sourceState = source->snumber;
1998  arcs[arcIndex].targetState = target->snumber;
1999  arcs[arcIndex].colorTrgm = ctrgm->cnumber;
2000  arcIndex++;
2001  }
2002  }
2003  }
2004 
2005  /* Sort arcs to ease duplicate detection */
2006  qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
2007 
2008  /* We could have duplicates because states were merged. Remove them. */
2009  if (arcIndex > 1)
2010  {
2011  /* p1 is probe point, p2 is last known non-duplicate. */
2012  TrgmPackArcInfo *p1,
2013  *p2;
2014 
2015  p2 = arcs;
2016  for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2017  {
2018  if (packArcInfoCmp(p1, p2) > 0)
2019  {
2020  p2++;
2021  *p2 = *p1;
2022  }
2023  }
2024  arcsCount = (p2 - arcs) + 1;
2025  }
2026  else
2027  arcsCount = arcIndex;
2028 
2029  /* Create packed representation */
2030  result = (TrgmPackedGraph *)
2031  MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
2032 
2033  /* Pack color trigrams information */
2034  result->colorTrigramsCount = 0;
2035  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2036  {
2037  if (trgmNFA->colorTrgms[i].expanded)
2038  result->colorTrigramsCount++;
2039  }
2040  result->colorTrigramGroups = (int *)
2041  MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
2042  j = 0;
2043  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2044  {
2045  if (trgmNFA->colorTrgms[i].expanded)
2046  {
2047  result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
2048  j++;
2049  }
2050  }
2051 
2052  /* Pack states and arcs information */
2053  result->statesCount = snumber;
2054  result->states = (TrgmPackedState *)
2055  MemoryContextAlloc(rcontext, snumber * sizeof(TrgmPackedState));
2056  packedArcs = (TrgmPackedArc *)
2057  MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
2058  j = 0;
2059  for (i = 0; i < snumber; i++)
2060  {
2061  int cnt = 0;
2062 
2063  result->states[i].arcs = &packedArcs[j];
2064  while (j < arcsCount && arcs[j].sourceState == i)
2065  {
2066  packedArcs[j].targetState = arcs[j].targetState;
2067  packedArcs[j].colorTrgm = arcs[j].colorTrgm;
2068  cnt++;
2069  j++;
2070  }
2071  result->states[i].arcsCount = cnt;
2072  }
2073 
2074  /* Allocate working memory for trigramsMatchGraph() */
2075  result->colorTrigramsActive = (bool *)
2076  MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
2077  result->statesActive = (bool *)
2078  MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
2079  result->statesQueue = (int *)
2080  MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
2081 
2082  return result;
2083 }
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
static rewind_source * source
Definition: pg_rewind.c:89
#define qsort(a, b, c, d)
Definition: port.h:447
TrgmPackedState * states
Definition: trgm_regexp.c:459
bool * statesActive
Definition: trgm_regexp.c:463
bool * colorTrigramsActive
Definition: trgm_regexp.c:462
int * colorTrigramGroups
Definition: trgm_regexp.c:452
TrgmPackedArc * arcs
Definition: trgm_regexp.c:436
#define TSTATE_INIT
Definition: trgm_regexp.c:329
static int packArcInfoCmp(const void *a1, const void *a2)
Definition: trgm_regexp.c:2091
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1892

References TrgmPackedState::arcs, TrgmNFA::arcsCount, TrgmPackedState::arcsCount, Assert, ColorTrgmInfo::cnumber, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, colorTrgmInfoCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, ColorTrgmInfo::expanded, hash_seq_init(), hash_seq_search(), i, j, lfirst, MemoryContextAlloc(), p2, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmState::snumber, source, TrgmPackArcInfo::sourceState, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

◆ prefixContains()

static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
)
static

Definition at line 1414 of file trgm_regexp.c.

1415 {
1416  if (prefix1->colors[1] == COLOR_UNKNOWN)
1417  {
1418  /* Fully ambiguous prefix contains everything */
1419  return true;
1420  }
1421  else if (prefix1->colors[0] == COLOR_UNKNOWN)
1422  {
1423  /*
1424  * Prefix with only first unknown color contains every prefix with
1425  * same second color.
1426  */
1427  if (prefix1->colors[1] == prefix2->colors[1])
1428  return true;
1429  else
1430  return false;
1431  }
1432  else
1433  {
1434  /* Exact prefix contains only the exact same prefix */
1435  if (prefix1->colors[0] == prefix2->colors[0] &&
1436  prefix1->colors[1] == prefix2->colors[1])
1437  return true;
1438  else
1439  return false;
1440  }
1441 }

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

◆ processState()

static void processState ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 957 of file trgm_regexp.c.

958 {
959  ListCell *lc;
960 
961  /* keysQueue should be NIL already, but make sure */
962  trgmNFA->keysQueue = NIL;
963 
964  /*
965  * Add state's own key, and then process all keys added to keysQueue until
966  * queue is finished. But we can quit if the state gets marked final.
967  */
968  addKey(trgmNFA, state, &state->stateKey);
969  foreach(lc, trgmNFA->keysQueue)
970  {
971  TrgmStateKey *key = (TrgmStateKey *) lfirst(lc);
972 
973  if (state->flags & TSTATE_FIN)
974  break;
975  addKey(trgmNFA, state, key);
976  }
977 
978  /* Release keysQueue to clean up for next cycle */
979  list_free(trgmNFA->keysQueue);
980  trgmNFA->keysQueue = NIL;
981 
982  /*
983  * Add outgoing arcs only if state isn't final (we have no interest in
984  * outgoing arcs if we already match)
985  */
986  if (!(state->flags & TSTATE_FIN))
987  addArcs(trgmNFA, state);
988 }
void list_free(List *list)
Definition: list.c:1546
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
Definition: trgm_regexp.c:1007
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:1188

References addArcs(), addKey(), sort-test::key, TrgmNFA::keysQueue, lfirst, list_free(), NIL, and TSTATE_FIN.

Referenced by transformGraph().

◆ RE_compile()

static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
)
static

Definition at line 719 of file trgm_regexp.c.

720 {
721  int text_re_len = VARSIZE_ANY_EXHDR(text_re);
722  char *text_re_val = VARDATA_ANY(text_re);
723  pg_wchar *pattern;
724  int pattern_len;
725  int regcomp_result;
726  char errMsg[100];
727 
728  /* Convert pattern string to wide characters */
729  pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
730  pattern_len = pg_mb2wchar_with_len(text_re_val,
731  pattern,
732  text_re_len);
733 
734  /* Compile regex */
735  regcomp_result = pg_regcomp(regex,
736  pattern,
737  pattern_len,
738  cflags,
739  collation);
740 
741  pfree(pattern);
742 
743  if (regcomp_result != REG_OKAY)
744  {
745  /* re didn't compile (no need for pg_regfree, if so) */
746  pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
747  ereport(ERROR,
748  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
749  errmsg("invalid regular expression: %s", errMsg)));
750  }
751 }
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:986
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:370
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define REG_OKAY
Definition: regex.h:215
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

◆ selectColorTrigrams()

static bool selectColorTrigrams ( TrgmNFA trgmNFA)
static

Definition at line 1456 of file trgm_regexp.c.

1457 {
1458  HASH_SEQ_STATUS scan_status;
1459  int arcsCount = trgmNFA->arcsCount,
1460  i;
1461  TrgmState *state;
1462  ColorTrgmInfo *colorTrgms;
1463  int64 totalTrgmCount;
1464  float4 totalTrgmPenalty;
1465  int cnumber;
1466 
1467  /* Collect color trigrams from all arcs */
1468  colorTrgms = (ColorTrgmInfo *) palloc0(sizeof(ColorTrgmInfo) * arcsCount);
1469  trgmNFA->colorTrgms = colorTrgms;
1470 
1471  i = 0;
1472  hash_seq_init(&scan_status, trgmNFA->states);
1473  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1474  {
1475  ListCell *cell;
1476 
1477  foreach(cell, state->arcs)
1478  {
1479  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1480  TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
1481  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1482 
1483  arcInfo->source = state;
1484  arcInfo->target = arc->target;
1485  trgmInfo->ctrgm = arc->ctrgm;
1486  trgmInfo->cnumber = -1;
1487  /* count and penalty will be set below */
1488  trgmInfo->expanded = true;
1489  trgmInfo->arcs = list_make1(arcInfo);
1490  i++;
1491  }
1492  }
1493  Assert(i == arcsCount);
1494 
1495  /* Remove duplicates, merging their arcs lists */
1496  if (arcsCount >= 2)
1497  {
1498  ColorTrgmInfo *p1,
1499  *p2;
1500 
1501  /* Sort trigrams to ease duplicate detection */
1502  qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
1503 
1504  /* p1 is probe point, p2 is last known non-duplicate. */
1505  p2 = colorTrgms;
1506  for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1507  {
1508  if (colorTrgmInfoCmp(p1, p2) > 0)
1509  {
1510  p2++;
1511  *p2 = *p1;
1512  }
1513  else
1514  {
1515  p2->arcs = list_concat(p2->arcs, p1->arcs);
1516  }
1517  }
1518  trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
1519  }
1520  else
1521  {
1522  trgmNFA->colorTrgmsCount = arcsCount;
1523  }
1524 
1525  /*
1526  * Count number of simple trigrams generated by each color trigram, and
1527  * also compute a penalty value, which is the number of simple trigrams
1528  * times a multiplier that depends on its whitespace content.
1529  *
1530  * Note: per-color-trigram counts cannot overflow an int so long as
1531  * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
1532  * 1290. However, the grand total totalTrgmCount might conceivably
1533  * overflow an int, so we use int64 for that within this routine. Also,
1534  * penalties are calculated in float4 arithmetic to avoid any overflow
1535  * worries.
1536  */
1537  totalTrgmCount = 0;
1538  totalTrgmPenalty = 0.0f;
1539  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1540  {
1541  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1542  int j,
1543  count = 1,
1544  typeIndex = 0;
1545 
1546  for (j = 0; j < 3; j++)
1547  {
1548  TrgmColor c = trgmInfo->ctrgm.colors[j];
1549 
1550  typeIndex *= 2;
1551  if (c == COLOR_BLANK)
1552  typeIndex++;
1553  else
1554  count *= trgmNFA->colorInfo[c].wordCharsCount;
1555  }
1556  trgmInfo->count = count;
1557  totalTrgmCount += count;
1558  trgmInfo->penalty = penalties[typeIndex] * (float4) count;
1559  totalTrgmPenalty += trgmInfo->penalty;
1560  }
1561 
1562  /* Sort color trigrams in descending order of their penalties */
1563  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1565 
1566  /*
1567  * Remove color trigrams from the graph so long as total penalty of color
1568  * trigrams exceeds WISH_TRGM_PENALTY. (If we fail to get down to
1569  * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
1570  * MAX_TRGM_COUNT.) We prefer to remove color trigrams with higher
1571  * penalty, since those are the most promising for reducing the total
1572  * penalty. When removing a color trigram we have to merge states
1573  * connected by arcs labeled with that trigram. It's necessary to not
1574  * merge initial and final states, because our graph becomes useless if
1575  * that happens; so we cannot always remove the trigram we'd prefer to.
1576  */
1577  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1578  {
1579  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1580  bool canRemove = true;
1581  ListCell *cell;
1582 
1583  /* Done if we've reached the target */
1584  if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
1585  break;
1586 
1587 #ifdef TRGM_REGEXP_DEBUG
1588  fprintf(stderr, "considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1589  trgmInfo->ctrgm.colors[0],
1590  trgmInfo->ctrgm.colors[1],
1591  trgmInfo->ctrgm.colors[2],
1592  trgmInfo->penalty,
1593  list_length(trgmInfo->arcs));
1594 #endif
1595 
1596  /*
1597  * Does any arc of this color trigram connect initial and final
1598  * states? If so we can't remove it.
1599  */
1600  foreach(cell, trgmInfo->arcs)
1601  {
1602  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1603  TrgmState *source = arcInfo->source,
1604  *target = arcInfo->target;
1605  int source_flags,
1606  target_flags;
1607 
1608 #ifdef TRGM_REGEXP_DEBUG
1609  fprintf(stderr, "examining arc to s%d (%x) from s%d (%x)\n",
1610  -target->snumber, target->flags,
1611  -source->snumber, source->flags);
1612 #endif
1613 
1614  /* examine parent states, if any merging has already happened */
1615  while (source->parent)
1616  source = source->parent;
1617  while (target->parent)
1618  target = target->parent;
1619 
1620 #ifdef TRGM_REGEXP_DEBUG
1621  fprintf(stderr, " ... after completed merges: to s%d (%x) from s%d (%x)\n",
1622  -target->snumber, target->flags,
1623  -source->snumber, source->flags);
1624 #endif
1625 
1626  /* we must also consider merges we are planning right now */
1627  source_flags = source->flags | source->tentFlags;
1628  while (source->tentParent)
1629  {
1630  source = source->tentParent;
1631  source_flags |= source->flags | source->tentFlags;
1632  }
1633  target_flags = target->flags | target->tentFlags;
1634  while (target->tentParent)
1635  {
1636  target = target->tentParent;
1637  target_flags |= target->flags | target->tentFlags;
1638  }
1639 
1640 #ifdef TRGM_REGEXP_DEBUG
1641  fprintf(stderr, " ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1642  -target->snumber, target_flags,
1643  -source->snumber, source_flags);
1644 #endif
1645 
1646  /* would fully-merged state have both INIT and FIN set? */
1647  if (((source_flags | target_flags) & (TSTATE_INIT | TSTATE_FIN)) ==
1648  (TSTATE_INIT | TSTATE_FIN))
1649  {
1650  canRemove = false;
1651  break;
1652  }
1653 
1654  /* ok so far, so remember planned merge */
1655  if (source != target)
1656  {
1657 #ifdef TRGM_REGEXP_DEBUG
1658  fprintf(stderr, " ... tentatively merging s%d into s%d\n",
1659  -target->snumber, -source->snumber);
1660 #endif
1661  target->tentParent = source;
1662  source->tentFlags |= target_flags;
1663  }
1664  }
1665 
1666  /*
1667  * We must reset all the tentFlags/tentParent fields before
1668  * continuing. tentFlags could only have become set in states that
1669  * are the source or parent or tentative parent of one of the current
1670  * arcs; likewise tentParent could only have become set in states that
1671  * are the target or parent or tentative parent of one of the current
1672  * arcs. There might be some overlap between those sets, but if we
1673  * clear tentFlags in target states as well as source states, we
1674  * should be okay even if we visit a state as target before visiting
1675  * it as a source.
1676  */
1677  foreach(cell, trgmInfo->arcs)
1678  {
1679  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1680  TrgmState *source = arcInfo->source,
1681  *target = arcInfo->target;
1682  TrgmState *ttarget;
1683 
1684  /* no need to touch previously-merged states */
1685  while (source->parent)
1686  source = source->parent;
1687  while (target->parent)
1688  target = target->parent;
1689 
1690  while (source)
1691  {
1692  source->tentFlags = 0;
1693  source = source->tentParent;
1694  }
1695 
1696  while ((ttarget = target->tentParent) != NULL)
1697  {
1698  target->tentParent = NULL;
1699  target->tentFlags = 0; /* in case it was also a source */
1700  target = ttarget;
1701  }
1702  }
1703 
1704  /* Now, move on if we can't drop this trigram */
1705  if (!canRemove)
1706  {
1707 #ifdef TRGM_REGEXP_DEBUG
1708  fprintf(stderr, " ... not ok to merge\n");
1709 #endif
1710  continue;
1711  }
1712 
1713  /* OK, merge states linked by each arc labeled by the trigram */
1714  foreach(cell, trgmInfo->arcs)
1715  {
1716  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1717  TrgmState *source = arcInfo->source,
1718  *target = arcInfo->target;
1719 
1720  while (source->parent)
1721  source = source->parent;
1722  while (target->parent)
1723  target = target->parent;
1724  if (source != target)
1725  {
1726 #ifdef TRGM_REGEXP_DEBUG
1727  fprintf(stderr, "merging s%d into s%d\n",
1728  -target->snumber, -source->snumber);
1729 #endif
1730  mergeStates(source, target);
1731  /* Assert we didn't merge initial and final states */
1732  Assert((source->flags & (TSTATE_INIT | TSTATE_FIN)) !=
1733  (TSTATE_INIT | TSTATE_FIN));
1734  }
1735  }
1736 
1737  /* Mark trigram unexpanded, and update totals */
1738  trgmInfo->expanded = false;
1739  totalTrgmCount -= trgmInfo->count;
1740  totalTrgmPenalty -= trgmInfo->penalty;
1741  }
1742 
1743  /* Did we succeed in fitting into MAX_TRGM_COUNT? */
1744  if (totalTrgmCount > MAX_TRGM_COUNT)
1745  return false;
1746 
1747  trgmNFA->totalTrgmCount = (int) totalTrgmCount;
1748 
1749  /*
1750  * Sort color trigrams by colors (will be useful for bsearch in packGraph)
1751  * and enumerate the color trigrams that are expanded.
1752  */
1753  cnumber = 0;
1754  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1756  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1757  {
1758  if (colorTrgms[i].expanded)
1759  {
1760  colorTrgms[i].cnumber = cnumber;
1761  cnumber++;
1762  }
1763  }
1764 
1765  return true;
1766 }
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
static int list_length(const List *l)
Definition: pg_list.h:152
#define list_make1(x1)
Definition: pg_list.h:212
#define fprintf
Definition: port.h:242
float4 penalty
Definition: trgm_regexp.c:378
TrgmState * source
Definition: trgm_regexp.c:360
TrgmState * target
Definition: trgm_regexp.c:361
int TrgmColor
Definition: trgm_regexp.c:283
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1905
#define MAX_TRGM_COUNT
Definition: trgm_regexp.c:221
static const float4 penalties[8]
Definition: trgm_regexp.c:229
#define WISH_TRGM_PENALTY
Definition: trgm_regexp.c:222
static void mergeStates(TrgmState *state1, TrgmState *state2)
Definition: trgm_regexp.c:1875

References ColorTrgmInfo::arcs, TrgmNFA::arcsCount, Assert, ColorTrgmInfo::cnumber, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoPenaltyCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fprintf, hash_seq_init(), hash_seq_search(), i, j, lfirst, list_concat(), list_length(), list_make1, MAX_TRGM_COUNT, mergeStates(), p2, palloc(), palloc0(), penalties, ColorTrgmInfo::penalty, qsort, TrgmArcInfo::source, source, TrgmNFA::states, TrgmArcInfo::target, TrgmState::tentParent, TrgmNFA::totalTrgmCount, TSTATE_FIN, TSTATE_INIT, WISH_TRGM_PENALTY, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ transformGraph()

static void transformGraph ( TrgmNFA trgmNFA)
static

Definition at line 893 of file trgm_regexp.c.

894 {
895  HASHCTL hashCtl;
896  TrgmStateKey initkey;
897  TrgmState *initstate;
898  ListCell *lc;
899 
900  /* Initialize this stage's workspace in trgmNFA struct */
901  trgmNFA->queue = NIL;
902  trgmNFA->keysQueue = NIL;
903  trgmNFA->arcsCount = 0;
904  trgmNFA->overflowed = false;
905 
906  /* Create hashtable for states */
907  hashCtl.keysize = sizeof(TrgmStateKey);
908  hashCtl.entrysize = sizeof(TrgmState);
909  hashCtl.hcxt = CurrentMemoryContext;
910  trgmNFA->states = hash_create("Trigram NFA",
911  1024,
912  &hashCtl,
914  trgmNFA->nstates = 0;
915 
916  /* Create initial state: ambiguous prefix, NFA's initial state */
917  MemSet(&initkey, 0, sizeof(initkey));
918  initkey.prefix.colors[0] = COLOR_UNKNOWN;
919  initkey.prefix.colors[1] = COLOR_UNKNOWN;
920  initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
921 
922  initstate = getState(trgmNFA, &initkey);
923  initstate->flags |= TSTATE_INIT;
924  trgmNFA->initState = initstate;
925 
926  /*
927  * Recursively build the expanded graph by processing queue of states
928  * (breadth-first search). getState already put initstate in the queue.
929  * Note that getState will append new states to the queue within the loop,
930  * too; this works as long as we don't do repeat fetches using the "lc"
931  * pointer.
932  */
933  foreach(lc, trgmNFA->queue)
934  {
935  TrgmState *state = (TrgmState *) lfirst(lc);
936 
937  /*
938  * If we overflowed then just mark state as final. Otherwise do
939  * actual processing.
940  */
941  if (trgmNFA->overflowed)
942  state->flags |= TSTATE_FIN;
943  else
944  processState(trgmNFA, state);
945 
946  /* Did we overflow? */
947  if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
949  trgmNFA->overflowed = true;
950  }
951 }
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
MemoryContext hcxt
Definition: hsearch.h:86
bool overflowed
Definition: trgm_regexp.c:416
#define MAX_EXPANDED_STATES
Definition: trgm_regexp.c:219
#define MAX_EXPANDED_ARCS
Definition: trgm_regexp.c:220
struct TrgmState TrgmState
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:957

References TrgmNFA::arcsCount, COLOR_UNKNOWN, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::flags, getState(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, hash_get_num_entries(), HASHCTL::hcxt, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, lfirst, MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::nstates, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, TrgmNFA::states, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

◆ trigramsMatchGraph()

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 626 of file trgm_regexp.c.

627 {
628  int i,
629  j,
630  k,
631  queueIn,
632  queueOut;
633 
634  /*
635  * Reset temporary working areas.
636  */
637  memset(graph->colorTrigramsActive, 0,
638  sizeof(bool) * graph->colorTrigramsCount);
639  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
640 
641  /*
642  * Check which color trigrams were matched. A match for any simple
643  * trigram associated with a color trigram counts as a match of the color
644  * trigram.
645  */
646  j = 0;
647  for (i = 0; i < graph->colorTrigramsCount; i++)
648  {
649  int cnt = graph->colorTrigramGroups[i];
650 
651  for (k = j; k < j + cnt; k++)
652  {
653  if (check[k])
654  {
655  /*
656  * Found one matched trigram in the group. Can skip the rest
657  * of them and go to the next group.
658  */
659  graph->colorTrigramsActive[i] = true;
660  break;
661  }
662  }
663  j = j + cnt;
664  }
665 
666  /*
667  * Initialize the statesQueue to hold just the initial state. Note:
668  * statesQueue has room for statesCount entries, which is certainly enough
669  * since no state will be put in the queue more than once. The
670  * statesActive array marks which states have been queued.
671  */
672  graph->statesActive[0] = true;
673  graph->statesQueue[0] = 0;
674  queueIn = 0;
675  queueOut = 1;
676 
677  /* Process queued states as long as there are any. */
678  while (queueIn < queueOut)
679  {
680  int stateno = graph->statesQueue[queueIn++];
681  TrgmPackedState *state = &graph->states[stateno];
682  int cnt = state->arcsCount;
683 
684  /* Loop over state's out-arcs */
685  for (i = 0; i < cnt; i++)
686  {
687  TrgmPackedArc *arc = &state->arcs[i];
688 
689  /*
690  * If corresponding color trigram is present then activate the
691  * corresponding state. We're done if that's the final state,
692  * otherwise queue the state if it's not been queued already.
693  */
694  if (graph->colorTrigramsActive[arc->colorTrgm])
695  {
696  int nextstate = arc->targetState;
697 
698  if (nextstate == 1)
699  return true; /* success: final state is reachable */
700 
701  if (!graph->statesActive[nextstate])
702  {
703  graph->statesActive[nextstate] = true;
704  graph->statesQueue[queueOut++] = nextstate;
705  }
706  }
707  }
708  }
709 
710  /* Queue is empty, so match fails. */
711  return false;
712 }

References TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, j, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, and TrgmPackedGraph::statesQueue.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

◆ validArcLabel()

static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
)
static

Definition at line 1328 of file trgm_regexp.c.

1329 {
1330  /*
1331  * We have to know full trigram in order to add outgoing arc. So we can't
1332  * do it if prefix is ambiguous.
1333  */
1334  if (key->prefix.colors[0] == COLOR_UNKNOWN)
1335  return false;
1336 
1337  /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
1338  Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
1339  /* And we should not be called with an unknown arc color anytime */
1340  Assert(co != COLOR_UNKNOWN);
1341 
1342  /*
1343  * We don't bother with making arcs representing three non-word
1344  * characters, since that's useless for trigram extraction.
1345  */
1346  if (key->prefix.colors[0] == COLOR_BLANK &&
1347  key->prefix.colors[1] == COLOR_BLANK &&
1348  co == COLOR_BLANK)
1349  return false;
1350 
1351  /*
1352  * We also reject nonblank-blank-anything. The nonblank-blank-nonblank
1353  * case doesn't correspond to any trigram the trigram extraction code
1354  * would make. The nonblank-blank-blank case is also not possible with
1355  * RPADDING = 1. (Note that in many cases we'd fail to generate such a
1356  * trigram even if it were valid, for example processing "foo bar" will
1357  * not result in considering the trigram "o ". So if you want to support
1358  * RPADDING = 2, there's more to do than just twiddle this test.)
1359  */
1360  if (key->prefix.colors[0] != COLOR_BLANK &&
1361  key->prefix.colors[1] == COLOR_BLANK)
1362  return false;
1363 
1364  /*
1365  * Other combinations involving blank are valid, in particular we assume
1366  * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
1367  *
1368  * Note: Using again the example "foo bar", we will not consider the
1369  * trigram " b", though this trigram would be found by the trigram
1370  * extraction code. Since we will find " ba", it doesn't seem worth
1371  * trying to hack the algorithm to generate the additional trigram.
1372  */
1373 
1374  /* arc label is valid */
1375  return true;
1376 }

References Assert, COLOR_BLANK, COLOR_UNKNOWN, and sort-test::key.

Referenced by addArc(), and addKey().

Variable Documentation

◆ penalties

const float4 penalties[8]
static
Initial value:
= {
1.0f,
3.5f,
0.0f,
0.0f,
4.2f,
2.1f,
25.0f,
0.0f
}

Definition at line 229 of file trgm_regexp.c.

Referenced by selectColorTrigrams().