PostgreSQL Source Code  git master
trgm_regexp.c File Reference
#include "postgres.h"
#include "regex/regexport.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
 
struct  TrgmColorInfo
 
struct  TrgmPrefix
 
struct  ColorTrgm
 
struct  TrgmStateKey
 
struct  TrgmState
 
struct  TrgmArc
 
struct  TrgmArcInfo
 
struct  ColorTrgmInfo
 
struct  TrgmNFA
 
struct  TrgmPackedArc
 
struct  TrgmPackedState
 
struct  TrgmPackedGraph
 
struct  TrgmPackArcInfo
 

Macros

#define MAX_EXPANDED_STATES   128
 
#define MAX_EXPANDED_ARCS   1024
 
#define MAX_TRGM_COUNT   256
 
#define WISH_TRGM_PENALTY   16
 
#define COLOR_COUNT_LIMIT   256
 
#define COLOR_UNKNOWN   (-3)
 
#define COLOR_BLANK   (-4)
 
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */
 
#define TSTATE_FIN   0x02 /* flag indicating this state is final */
 

Typedefs

typedef int TrgmColor
 
typedef struct TrgmState TrgmState
 

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
 
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
 
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
 
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
 
static void transformGraph (TrgmNFA *trgmNFA)
 
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
 
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
 
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
 
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
 
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
 
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
 
static void mergeStates (TrgmState *state1, TrgmState *state2)
 
static int colorTrgmInfoCmp (const void *p1, const void *p2)
 
static int colorTrgmInfoPenaltyCmp (const void *p1, const void *p2)
 
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static int packArcInfoCmp (const void *a1, const void *a2)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

static const float4 penalties [8]
 

Macro Definition Documentation

◆ COLOR_BLANK

#define COLOR_BLANK   (-4)

Definition at line 286 of file trgm_regexp.c.

◆ COLOR_COUNT_LIMIT

#define COLOR_COUNT_LIMIT   256

Definition at line 222 of file trgm_regexp.c.

◆ COLOR_UNKNOWN

#define COLOR_UNKNOWN   (-3)

Definition at line 285 of file trgm_regexp.c.

◆ MAX_EXPANDED_ARCS

#define MAX_EXPANDED_ARCS   1024

Definition at line 219 of file trgm_regexp.c.

◆ MAX_EXPANDED_STATES

#define MAX_EXPANDED_STATES   128

Definition at line 218 of file trgm_regexp.c.

◆ MAX_TRGM_COUNT

#define MAX_TRGM_COUNT   256

Definition at line 220 of file trgm_regexp.c.

◆ TSTATE_FIN

#define TSTATE_FIN   0x02 /* flag indicating this state is final */

Definition at line 329 of file trgm_regexp.c.

◆ TSTATE_INIT

#define TSTATE_INIT   0x01 /* flag indicating this state is initial */

Definition at line 328 of file trgm_regexp.c.

◆ WISH_TRGM_PENALTY

#define WISH_TRGM_PENALTY   16

Definition at line 221 of file trgm_regexp.c.

Typedef Documentation

◆ TrgmColor

typedef int TrgmColor

Definition at line 282 of file trgm_regexp.c.

◆ TrgmState

typedef struct TrgmState TrgmState

Function Documentation

◆ addArc()

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
)
static

Definition at line 1298 of file trgm_regexp.c.

1300 {
1301  TrgmArc *arc;
1302  ListCell *cell;
1303 
1304  /* Do nothing if this wouldn't be a valid arc label trigram */
1305  if (!validArcLabel(key, co))
1306  return;
1307 
1308  /*
1309  * Check if we are going to reach key which is covered by a key which is
1310  * already listed in this state. If so arc is useless: the NFA can bypass
1311  * it through a path that doesn't require any predictable trigram, so
1312  * whether the arc's trigram is present or not doesn't really matter.
1313  */
1314  foreach(cell, state->enterKeys)
1315  {
1316  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1317 
1318  if (existingKey->nstate == destKey->nstate &&
1319  prefixContains(&existingKey->prefix, &destKey->prefix))
1320  return;
1321  }
1322 
1323  /* Checks were successful, add new arc */
1324  arc = (TrgmArc *) palloc(sizeof(TrgmArc));
1325  arc->target = getState(trgmNFA, destKey);
1326  arc->ctrgm.colors[0] = key->prefix.colors[0];
1327  arc->ctrgm.colors[1] = key->prefix.colors[1];
1328  arc->ctrgm.colors[2] = co;
1329 
1330  state->arcs = lappend(state->arcs, arc);
1331  trgmNFA->arcsCount++;
1332 }
List * lappend(List *list, void *datum)
Definition: list.c:338
void * palloc(Size size)
Definition: mcxt.c:1199
#define lfirst(lc)
Definition: pg_list.h:170
int arcsCount
Definition: trgm_regexp.c:414
TrgmPrefix prefix
Definition: trgm_regexp.c:309
Definition: regguts.h:291
Definition: regguts.h:318
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1340
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1395
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1426

References TrgmNFA::arcsCount, getState(), sort-test::key, lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), and validArcLabel().

Referenced by addArcs().

◆ addArcs()

static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 1200 of file trgm_regexp.c.

1201 {
1202  TrgmStateKey destKey;
1203  ListCell *cell;
1204  regex_arc_t *arcs;
1205  int arcsCount,
1206  i;
1207 
1208  /*
1209  * Ensure any pad bytes in destKey are zero, since it may get used as a
1210  * hashtable key by getState.
1211  */
1212  MemSet(&destKey, 0, sizeof(destKey));
1213 
1214  /*
1215  * Iterate over enter keys associated with this expanded-graph state. This
1216  * includes both the state's own stateKey, and any enter keys we added to
1217  * it during addKey (which represent expanded-graph states that are not
1218  * distinguishable from this one by means of trigrams). For each such
1219  * enter key, examine all the out-arcs of the key's underlying NFA state,
1220  * and try to make a trigram arc leading to where the out-arc leads.
1221  * (addArc will deal with whether the arc is valid or not.)
1222  */
1223  foreach(cell, state->enterKeys)
1224  {
1225  TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
1226 
1227  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1228  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1229  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1230 
1231  for (i = 0; i < arcsCount; i++)
1232  {
1233  regex_arc_t *arc = &arcs[i];
1234  TrgmColorInfo *colorInfo;
1235 
1236  /*
1237  * Ignore non-expandable colors; addKey already handled the case.
1238  *
1239  * We need no special check for WHITE or begin/end pseudocolors
1240  * here. We don't need to do any processing for them, and they
1241  * will be marked non-expandable since the regex engine will have
1242  * reported them that way. We do have to watch out for RAINBOW,
1243  * which has a negative color number.
1244  */
1245  if (arc->co < 0)
1246  continue;
1247  Assert(arc->co < trgmNFA->ncolors);
1248 
1249  colorInfo = &trgmNFA->colorInfo[arc->co];
1250  if (!colorInfo->expandable)
1251  continue;
1252 
1253  if (colorInfo->containsNonWord)
1254  {
1255  /*
1256  * Color includes non-word character(s).
1257  *
1258  * Generate an arc, treating this transition as occurring on
1259  * BLANK. This allows word-ending trigrams to be manufactured
1260  * if possible.
1261  */
1262  destKey.prefix.colors[0] = key->prefix.colors[1];
1263  destKey.prefix.colors[1] = COLOR_BLANK;
1264  destKey.nstate = arc->to;
1265 
1266  addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
1267  }
1268 
1269  if (colorInfo->wordCharsCount > 0)
1270  {
1271  /*
1272  * Color includes word character(s).
1273  *
1274  * Generate an arc. Color is pushed into prefix of target
1275  * state.
1276  */
1277  destKey.prefix.colors[0] = key->prefix.colors[1];
1278  destKey.prefix.colors[1] = arc->co;
1279  destKey.nstate = arc->to;
1280 
1281  addArc(trgmNFA, state, key, arc->co, &destKey);
1282  }
1283  }
1284 
1285  pfree(arcs);
1286  }
1287 }
#define MemSet(start, val, len)
Definition: c.h:953
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1306
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
bool containsNonWord
Definition: trgm_regexp.c:261
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
int ncolors
Definition: trgm_regexp.c:404
regex_t * regex
Definition: trgm_regexp.c:402
TrgmColor colors[2]
Definition: trgm_regexp.c:290
color co
Definition: regguts.h:293
struct state * to
Definition: regguts.h:295
#define COLOR_BLANK
Definition: trgm_regexp.c:286
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
Definition: trgm_regexp.c:1298

References addArc(), Assert(), arc::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmColorInfo::expandable, i, sort-test::key, lfirst, MemSet, TrgmNFA::ncolors, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, arc::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

◆ addKey()

static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
)
static

Definition at line 1019 of file trgm_regexp.c.

1020 {
1021  regex_arc_t *arcs;
1022  TrgmStateKey destKey;
1023  ListCell *cell;
1024  int i,
1025  arcsCount;
1026 
1027  /*
1028  * Ensure any pad bytes in destKey are zero, since it may get used as a
1029  * hashtable key by getState.
1030  */
1031  MemSet(&destKey, 0, sizeof(destKey));
1032 
1033  /*
1034  * Compare key to each existing enter key of the state to check for
1035  * redundancy. We can drop either old key(s) or the new key if we find
1036  * redundancy.
1037  */
1038  foreach(cell, state->enterKeys)
1039  {
1040  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1041 
1042  if (existingKey->nstate == key->nstate)
1043  {
1044  if (prefixContains(&existingKey->prefix, &key->prefix))
1045  {
1046  /* This old key already covers the new key. Nothing to do */
1047  return;
1048  }
1049  if (prefixContains(&key->prefix, &existingKey->prefix))
1050  {
1051  /*
1052  * The new key covers this old key. Remove the old key, it's
1053  * no longer needed once we add this key to the list.
1054  */
1055  state->enterKeys = foreach_delete_current(state->enterKeys,
1056  cell);
1057  }
1058  }
1059  }
1060 
1061  /* No redundancy, so add this key to the state's list */
1062  state->enterKeys = lappend(state->enterKeys, key);
1063 
1064  /* If state is now known final, mark it and we're done */
1065  if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
1066  {
1067  state->flags |= TSTATE_FIN;
1068  return;
1069  }
1070 
1071  /*
1072  * Loop through all outgoing arcs of the corresponding state in the
1073  * original NFA.
1074  */
1075  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1076  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1077  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1078 
1079  for (i = 0; i < arcsCount; i++)
1080  {
1081  regex_arc_t *arc = &arcs[i];
1082 
1083  if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
1084  {
1085  /*
1086  * Start of line/string (^). Trigram extraction treats start of
1087  * line same as start of word: double space prefix is added.
1088  * Hence, make an enter key showing we can reach the arc
1089  * destination with all-blank prefix.
1090  */
1091  destKey.prefix.colors[0] = COLOR_BLANK;
1092  destKey.prefix.colors[1] = COLOR_BLANK;
1093  destKey.nstate = arc->to;
1094 
1095  /* Add enter key to this state */
1096  addKeyToQueue(trgmNFA, &destKey);
1097  }
1098  else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
1099  {
1100  /*
1101  * End of line/string ($). We must consider this arc as a
1102  * transition that doesn't read anything. The reason for adding
1103  * this enter key to the state is that if the arc leads to the
1104  * NFA's final state, we must mark this expanded state as final.
1105  */
1106  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1107  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1108  destKey.nstate = arc->to;
1109 
1110  /* Add enter key to this state */
1111  addKeyToQueue(trgmNFA, &destKey);
1112  }
1113  else if (arc->co >= 0)
1114  {
1115  /* Regular color (including WHITE) */
1116  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1117 
1118  if (colorInfo->expandable)
1119  {
1120  if (colorInfo->containsNonWord &&
1122  {
1123  /*
1124  * We can reach the arc destination after reading a
1125  * non-word character, but the prefix is not something
1126  * that addArc will accept with COLOR_BLANK, so no trigram
1127  * arc can get made for this transition. We must make an
1128  * enter key to show that the arc destination is
1129  * reachable. Set it up with an all-blank prefix, since
1130  * that corresponds to what the trigram extraction code
1131  * will do at a word starting boundary.
1132  */
1133  destKey.prefix.colors[0] = COLOR_BLANK;
1134  destKey.prefix.colors[1] = COLOR_BLANK;
1135  destKey.nstate = arc->to;
1136  addKeyToQueue(trgmNFA, &destKey);
1137  }
1138 
1139  if (colorInfo->wordCharsCount > 0 &&
1140  !validArcLabel(key, arc->co))
1141  {
1142  /*
1143  * We can reach the arc destination after reading a word
1144  * character, but the prefix is not something that addArc
1145  * will accept, so no trigram arc can get made for this
1146  * transition. We must make an enter key to show that the
1147  * arc destination is reachable. The prefix for the enter
1148  * key should reflect the info we have for this arc.
1149  */
1150  destKey.prefix.colors[0] = key->prefix.colors[1];
1151  destKey.prefix.colors[1] = arc->co;
1152  destKey.nstate = arc->to;
1153  addKeyToQueue(trgmNFA, &destKey);
1154  }
1155  }
1156  else
1157  {
1158  /*
1159  * Unexpandable color. Add enter key with ambiguous prefix,
1160  * showing we can reach the destination from this state, but
1161  * the preceding colors will be uncertain. (We do not set the
1162  * first prefix color to key->prefix.colors[1], because a
1163  * prefix of known followed by unknown is invalid.)
1164  */
1165  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1166  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1167  destKey.nstate = arc->to;
1168  addKeyToQueue(trgmNFA, &destKey);
1169  }
1170  }
1171  else
1172  {
1173  /* RAINBOW: treat as unexpandable color */
1174  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1175  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1176  destKey.nstate = arc->to;
1177  addKeyToQueue(trgmNFA, &destKey);
1178  }
1179  }
1180 
1181  pfree(arcs);
1182 }
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:388
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1188
#define TSTATE_FIN
Definition: trgm_regexp.c:329
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:285

References addKeyToQueue(), arc::co, COLOR_BLANK, COLOR_UNKNOWN, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmColorInfo::expandable, foreach_delete_current, i, sort-test::key, lappend(), lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, arc::to, TSTATE_FIN, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

◆ addKeyToQueue()

static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1188 of file trgm_regexp.c.

1189 {
1190  TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
1191 
1192  memcpy(keyCopy, key, sizeof(TrgmStateKey));
1193  trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
1194 }
List * keysQueue
Definition: trgm_regexp.c:413

References sort-test::key, TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

◆ colorTrgmInfoCmp()

static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1904 of file trgm_regexp.c.

1905 {
1906  const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
1907  const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
1908 
1909  return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
1910 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:374

References ColorTrgmInfo::ctrgm, and p2.

Referenced by packGraph(), and selectColorTrigrams().

◆ colorTrgmInfoPenaltyCmp()

static int colorTrgmInfoPenaltyCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1917 of file trgm_regexp.c.

1918 {
1919  float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
1920  float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
1921 
1922  if (penalty1 < penalty2)
1923  return 1;
1924  else if (penalty1 == penalty2)
1925  return 0;
1926  else
1927  return -1;
1928 }
float float4
Definition: c.h:565

References p2.

Referenced by selectColorTrigrams().

◆ convertPgWchar()

static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
)
static

Definition at line 839 of file trgm_regexp.c.

840 {
841  /* "s" has enough space for a multibyte character and a trailing NUL */
842  char s[MAX_MULTIBYTE_CHAR_LEN + 1];
843 
844  /*
845  * We can ignore the NUL character, since it can never appear in a PG text
846  * string. This avoids the need for various special cases when
847  * reconstructing trigrams.
848  */
849  if (c == 0)
850  return false;
851 
852  /* Do the conversion, making sure the result is NUL-terminated */
853  memset(s, 0, sizeof(s));
854  pg_wchar2mb_with_len(&c, s, 1);
855 
856  /*
857  * In IGNORECASE mode, we can ignore uppercase characters. We assume that
858  * the regex engine generated both uppercase and lowercase equivalents
859  * within each color, since we used the REG_ICASE option; so there's no
860  * need to process the uppercase version.
861  *
862  * XXX this code is dependent on the assumption that lowerstr() works the
863  * same as the regex engine's internal case folding machinery. Might be
864  * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
865  * On the other hand, the trigrams in the index were created using
866  * lowerstr(), so we're probably screwed if there's any incompatibility
867  * anyway.
868  */
869 #ifdef IGNORECASE
870  {
871  char *lowerCased = lowerstr(s);
872 
873  if (strcmp(lowerCased, s) != 0)
874  {
875  pfree(lowerCased);
876  return false;
877  }
878  pfree(lowerCased);
879  }
880 #endif
881 
882  /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
883  memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
884  return true;
885 }
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:951
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:32
char * c
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:242
char * lowerstr(const char *str)
Definition: ts_locale.c:260

References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().

Referenced by getColorInfo().

◆ createTrgmNFA()

TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 521 of file trgm_regexp.c.

523 {
524  TRGM *trg;
525  regex_t regex;
526  MemoryContext tmpcontext;
527  MemoryContext oldcontext;
528 
529  /*
530  * This processing generates a great deal of cruft, which we'd like to
531  * clean up before returning (since this function may be called in a
532  * query-lifespan memory context). Make a temp context we can work in so
533  * that cleanup is easy.
534  */
536  "createTrgmNFA temporary context",
538  oldcontext = MemoryContextSwitchTo(tmpcontext);
539 
540  /*
541  * Stage 1: Compile the regexp into a NFA, using the regexp library.
542  */
543 #ifdef IGNORECASE
544  RE_compile(&regex, text_re,
545  REG_ADVANCED | REG_NOSUB | REG_ICASE, collation);
546 #else
547  RE_compile(&regex, text_re,
548  REG_ADVANCED | REG_NOSUB, collation);
549 #endif
550 
551  /*
552  * Since the regexp library allocates its internal data structures with
553  * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
554  * done even if there's an error.
555  */
556  PG_TRY();
557  {
558  trg = createTrgmNFAInternal(&regex, graph, rcontext);
559  }
560  PG_FINALLY();
561  {
562  pg_regfree(&regex);
563  }
564  PG_END_TRY();
565 
566  /* Clean up all the cruft we created */
567  MemoryContextSwitchTo(oldcontext);
568  MemoryContextDelete(tmpcontext);
569 
570  return trg;
571 }
#define PG_TRY(...)
Definition: elog.h:309
#define PG_END_TRY(...)
Definition: elog.h:334
#define PG_FINALLY(...)
Definition: elog.h:326
MemoryContext CurrentMemoryContext
Definition: mcxt.c:124
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:376
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:135
#define REG_ICASE
Definition: regex.h:106
#define REG_ADVANCED
Definition: regex.h:103
#define REG_NOSUB
Definition: regex.h:107
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: trgm.h:67
Definition: regex.h:56
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:577
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:731

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_END_TRY, PG_FINALLY, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, REG_ICASE, and REG_NOSUB.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

◆ createTrgmNFAInternal()

static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)
static

Definition at line 577 of file trgm_regexp.c.

579 {
580  TRGM *trg;
581  TrgmNFA trgmNFA;
582 
583  trgmNFA.regex = regex;
584 
585  /* Collect color information from the regex */
586  getColorInfo(regex, &trgmNFA);
587 
588 #ifdef TRGM_REGEXP_DEBUG
589  printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
590 #endif
591 
592  /*
593  * Stage 2: Create an expanded graph from the source NFA.
594  */
595  transformGraph(&trgmNFA);
596 
597 #ifdef TRGM_REGEXP_DEBUG
598  printTrgmNFA(&trgmNFA);
599 #endif
600 
601  /*
602  * Fail if we were unable to make a nontrivial graph, ie it is possible to
603  * get from the initial state to the final state without reading any
604  * predictable trigram.
605  */
606  if (trgmNFA.initState->flags & TSTATE_FIN)
607  return NULL;
608 
609  /*
610  * Stage 3: Select color trigrams to expand. Fail if too many trigrams.
611  */
612  if (!selectColorTrigrams(&trgmNFA))
613  return NULL;
614 
615  /*
616  * Stage 4: Expand color trigrams and pack graph into final
617  * representation.
618  */
619  trg = expandColorTrigrams(&trgmNFA, rcontext);
620 
621  *graph = packGraph(&trgmNFA, rcontext);
622 
623 #ifdef TRGM_REGEXP_DEBUG
624  printTrgmPackedGraph(*graph, trg);
625 #endif
626 
627  return trg;
628 }
TrgmState * initState
Definition: trgm_regexp.c:408
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:775
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:1468
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1787
static void transformGraph(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:905
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1942

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::flags, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, packGraph(), TrgmNFA::regex, selectColorTrigrams(), transformGraph(), and TSTATE_FIN.

Referenced by createTrgmNFA().

◆ expandColorTrigrams()

static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1787 of file trgm_regexp.c.

1788 {
1789  TRGM *trg;
1790  trgm *p;
1791  int i;
1792  TrgmColorInfo blankColor;
1793  trgm_mb_char blankChar;
1794 
1795  /* Set up "blank" color structure containing a single zero character */
1796  memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
1797  blankColor.wordCharsCount = 1;
1798  blankColor.wordChars = &blankChar;
1799 
1800  /* Construct the trgm array */
1801  trg = (TRGM *)
1802  MemoryContextAllocZero(rcontext,
1803  TRGMHDRSIZE +
1804  trgmNFA->totalTrgmCount * sizeof(trgm));
1805  trg->flag = ARRKEY;
1806  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
1807  p = GETARR(trg);
1808  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1809  {
1810  ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
1811  TrgmColorInfo *c[3];
1812  trgm_mb_char s[3];
1813  int j,
1814  i1,
1815  i2,
1816  i3;
1817 
1818  /* Ignore any unexpanded trigrams ... */
1819  if (!colorTrgm->expanded)
1820  continue;
1821 
1822  /* Get colors, substituting the dummy struct for COLOR_BLANK */
1823  for (j = 0; j < 3; j++)
1824  {
1825  if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
1826  c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
1827  else
1828  c[j] = &blankColor;
1829  }
1830 
1831  /* Iterate over all possible combinations of colors' characters */
1832  for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
1833  {
1834  s[0] = c[0]->wordChars[i1];
1835  for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
1836  {
1837  s[1] = c[1]->wordChars[i2];
1838  for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
1839  {
1840  s[2] = c[2]->wordChars[i3];
1841  fillTrgm(p, s);
1842  p++;
1843  }
1844  }
1845  }
1846  }
1847 
1848  return trg;
1849 }
#define CALCGTSIZE(flag, siglen)
Definition: hstore_gist.c:59
int j
Definition: isn.c:74
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1037
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:343
TrgmColor colors[3]
Definition: trgm_regexp.c:299
uint8 flag
Definition: trgm.h:69
trgm_mb_char * wordChars
Definition: trgm_regexp.c:263
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:418
int colorTrgmsCount
Definition: trgm_regexp.c:419
int totalTrgmCount
Definition: trgm_regexp.c:420
char trgm[3]
Definition: trgm.h:42
#define GETARR(x)
Definition: trgm.h:106
#define ARRKEY
Definition: trgm.h:96
#define TRGMHDRSIZE
Definition: trgm.h:73
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
Definition: trgm_regexp.c:1855

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, j, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ fillTrgm()

static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
)
static

Definition at line 1855 of file trgm_regexp.c.

1856 {
1857  char str[3 * MAX_MULTIBYTE_CHAR_LEN],
1858  *p;
1859  int i,
1860  j;
1861 
1862  /* Write multibyte string into "str" (we don't need null termination) */
1863  p = str;
1864 
1865  for (i = 0; i < 3; i++)
1866  {
1867  if (s[i].bytes[0] != 0)
1868  {
1869  for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
1870  *p++ = s[i].bytes[j];
1871  }
1872  else
1873  {
1874  /* Emit a space in place of COLOR_BLANK */
1875  *p++ = ' ';
1876  }
1877  }
1878 
1879  /* Convert "str" to a standard trigram (possibly hashing it) */
1880  compact_trigram(ptrgm, str, p - str);
1881 }
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:198

References trgm_mb_char::bytes, compact_trigram(), i, j, MAX_MULTIBYTE_CHAR_LEN, and generate_unaccent_rules::str.

Referenced by expandColorTrigrams().

◆ getColorInfo()

static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
)
static

Definition at line 775 of file trgm_regexp.c.

776 {
777  int colorsCount = pg_reg_getnumcolors(regex);
778  int i;
779 
780  trgmNFA->ncolors = colorsCount;
781  trgmNFA->colorInfo = (TrgmColorInfo *)
782  palloc0(colorsCount * sizeof(TrgmColorInfo));
783 
784  /*
785  * Loop over colors, filling TrgmColorInfo about each. Note we include
786  * WHITE (0) even though we know it'll be reported as non-expandable.
787  */
788  for (i = 0; i < colorsCount; i++)
789  {
790  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
791  int charsCount = pg_reg_getnumcharacters(regex, i);
792  pg_wchar *chars;
793  int j;
794 
795  if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
796  {
797  /* Non expandable, or too large to work with */
798  colorInfo->expandable = false;
799  continue;
800  }
801 
802  colorInfo->expandable = true;
803  colorInfo->containsNonWord = false;
804  colorInfo->wordChars = (trgm_mb_char *)
805  palloc(sizeof(trgm_mb_char) * charsCount);
806  colorInfo->wordCharsCount = 0;
807 
808  /* Extract all the chars in this color */
809  chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
810  pg_reg_getcharacters(regex, i, chars, charsCount);
811 
812  /*
813  * Convert characters back to multibyte form, and save only those that
814  * are word characters. Set "containsNonWord" if any non-word
815  * character. (Note: it'd probably be nicer to keep the chars in
816  * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
817  */
818  for (j = 0; j < charsCount; j++)
819  {
820  trgm_mb_char c;
821 
822  if (!convertPgWchar(chars[j], &c))
823  continue; /* ok to ignore it altogether */
824  if (ISWORDCHR(c.bytes))
825  colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
826  else
827  colorInfo->containsNonWord = true;
828  }
829 
830  pfree(chars);
831  }
832 }
unsigned int pg_wchar
Definition: mbprint.c:31
void * palloc0(Size size)
Definition: mcxt.c:1230
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:266
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:230
#define ISWORDCHR(c)
Definition: trgm.h:55
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
Definition: trgm_regexp.c:839
#define COLOR_COUNT_LIMIT
Definition: trgm_regexp.c:222
static char chars[TZ_MAX_CHARS]
Definition: zic.c:401

References chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, j, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ getState()

static TrgmState * getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1395 of file trgm_regexp.c.

1396 {
1397  TrgmState *state;
1398  bool found;
1399 
1400  state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
1401  &found);
1402  if (!found)
1403  {
1404  /* New state: initialize and queue it */
1405  state->arcs = NIL;
1406  state->enterKeys = NIL;
1407  state->flags = 0;
1408  /* states are initially given negative numbers */
1409  state->snumber = -(++trgmNFA->nstates);
1410  state->parent = NULL;
1411  state->tentFlags = 0;
1412  state->tentParent = NULL;
1413 
1414  trgmNFA->queue = lappend(trgmNFA->queue, state);
1415  }
1416  return state;
1417 }
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:953
@ HASH_ENTER
Definition: hsearch.h:114
#define NIL
Definition: pg_list.h:66
int nstates
Definition: trgm_regexp.c:409
List * queue
Definition: trgm_regexp.c:412
HTAB * states
Definition: trgm_regexp.c:407

References HASH_ENTER, hash_search(), sort-test::key, lappend(), NIL, TrgmNFA::nstates, TrgmNFA::queue, and TrgmNFA::states.

Referenced by addArc(), and transformGraph().

◆ mergeStates()

static void mergeStates ( TrgmState state1,
TrgmState state2 
)
static

Definition at line 1887 of file trgm_regexp.c.

1888 {
1889  Assert(state1 != state2);
1890  Assert(!state1->parent);
1891  Assert(!state2->parent);
1892 
1893  /* state1 absorbs state2's flags */
1894  state1->flags |= state2->flags;
1895 
1896  /* state2, and indirectly all its children, become children of state1 */
1897  state2->parent = state1;
1898 }
struct TrgmState * parent
Definition: trgm_regexp.c:338

References Assert(), TrgmState::flags, and TrgmState::parent.

Referenced by selectColorTrigrams().

◆ packArcInfoCmp()

static int packArcInfoCmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 2097 of file trgm_regexp.c.

2098 {
2099  const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
2100  const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
2101 
2102  if (p1->sourceState < p2->sourceState)
2103  return -1;
2104  if (p1->sourceState > p2->sourceState)
2105  return 1;
2106  if (p1->colorTrgm < p2->colorTrgm)
2107  return -1;
2108  if (p1->colorTrgm > p2->colorTrgm)
2109  return 1;
2110  if (p1->targetState < p2->targetState)
2111  return -1;
2112  if (p1->targetState > p2->targetState)
2113  return 1;
2114  return 0;
2115 }
static const FormData_pg_attribute a1
Definition: heap.c:141
static const FormData_pg_attribute a2
Definition: heap.c:155

References a1, a2, TrgmPackArcInfo::colorTrgm, p2, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.

Referenced by packGraph().

◆ packGraph()

static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1942 of file trgm_regexp.c.

1943 {
1944  int snumber = 2,
1945  arcIndex,
1946  arcsCount;
1947  HASH_SEQ_STATUS scan_status;
1948  TrgmState *state;
1949  TrgmPackArcInfo *arcs,
1950  *p1,
1951  *p2;
1952  TrgmPackedArc *packedArcs;
1953  TrgmPackedGraph *result;
1954  int i,
1955  j;
1956 
1957  /* Enumerate surviving states, giving init and fin reserved numbers */
1958  hash_seq_init(&scan_status, trgmNFA->states);
1959  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1960  {
1961  while (state->parent)
1962  state = state->parent;
1963 
1964  if (state->snumber < 0)
1965  {
1966  if (state->flags & TSTATE_INIT)
1967  state->snumber = 0;
1968  else if (state->flags & TSTATE_FIN)
1969  state->snumber = 1;
1970  else
1971  {
1972  state->snumber = snumber;
1973  snumber++;
1974  }
1975  }
1976  }
1977 
1978  /* Collect array of all arcs */
1979  arcs = (TrgmPackArcInfo *)
1980  palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
1981  arcIndex = 0;
1982  hash_seq_init(&scan_status, trgmNFA->states);
1983  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1984  {
1985  TrgmState *source = state;
1986  ListCell *cell;
1987 
1988  while (source->parent)
1989  source = source->parent;
1990 
1991  foreach(cell, state->arcs)
1992  {
1993  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1994  TrgmState *target = arc->target;
1995 
1996  while (target->parent)
1997  target = target->parent;
1998 
1999  if (source->snumber != target->snumber)
2000  {
2001  ColorTrgmInfo *ctrgm;
2002 
2003  ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
2004  trgmNFA->colorTrgms,
2005  trgmNFA->colorTrgmsCount,
2006  sizeof(ColorTrgmInfo),
2008  Assert(ctrgm != NULL);
2009  Assert(ctrgm->expanded);
2010 
2011  arcs[arcIndex].sourceState = source->snumber;
2012  arcs[arcIndex].targetState = target->snumber;
2013  arcs[arcIndex].colorTrgm = ctrgm->cnumber;
2014  arcIndex++;
2015  }
2016  }
2017  }
2018 
2019  /* Sort arcs to ease duplicate detection */
2020  qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
2021 
2022  /* We could have duplicates because states were merged. Remove them. */
2023  /* p1 is probe point, p2 is last known non-duplicate. */
2024  p2 = arcs;
2025  for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2026  {
2027  if (packArcInfoCmp(p1, p2) > 0)
2028  {
2029  p2++;
2030  *p2 = *p1;
2031  }
2032  }
2033  arcsCount = (p2 - arcs) + 1;
2034 
2035  /* Create packed representation */
2036  result = (TrgmPackedGraph *)
2037  MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
2038 
2039  /* Pack color trigrams information */
2040  result->colorTrigramsCount = 0;
2041  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2042  {
2043  if (trgmNFA->colorTrgms[i].expanded)
2044  result->colorTrigramsCount++;
2045  }
2046  result->colorTrigramGroups = (int *)
2047  MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
2048  j = 0;
2049  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2050  {
2051  if (trgmNFA->colorTrgms[i].expanded)
2052  {
2053  result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
2054  j++;
2055  }
2056  }
2057 
2058  /* Pack states and arcs information */
2059  result->statesCount = snumber;
2060  result->states = (TrgmPackedState *)
2061  MemoryContextAlloc(rcontext, snumber * sizeof(TrgmPackedState));
2062  packedArcs = (TrgmPackedArc *)
2063  MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
2064  j = 0;
2065  for (i = 0; i < snumber; i++)
2066  {
2067  int cnt = 0;
2068 
2069  result->states[i].arcs = &packedArcs[j];
2070  while (j < arcsCount && arcs[j].sourceState == i)
2071  {
2072  packedArcs[j].targetState = arcs[j].targetState;
2073  packedArcs[j].colorTrgm = arcs[j].colorTrgm;
2074  cnt++;
2075  j++;
2076  }
2077  result->states[i].arcsCount = cnt;
2078  }
2079 
2080  /* Allocate working memory for trigramsMatchGraph() */
2081  result->colorTrigramsActive = (bool *)
2082  MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
2083  result->statesActive = (bool *)
2084  MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
2085  result->statesQueue = (int *)
2086  MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
2087 
2088  return result;
2089 }
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1431
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1421
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:994
static rewind_source * source
Definition: pg_rewind.c:81
#define qsort(a, b, c, d)
Definition: port.h:445
TrgmPackedState * states
Definition: trgm_regexp.c:458
bool * statesActive
Definition: trgm_regexp.c:462
bool * colorTrigramsActive
Definition: trgm_regexp.c:461
int * colorTrigramGroups
Definition: trgm_regexp.c:451
TrgmPackedArc * arcs
Definition: trgm_regexp.c:435
#define TSTATE_INIT
Definition: trgm_regexp.c:328
static int packArcInfoCmp(const void *a1, const void *a2)
Definition: trgm_regexp.c:2097
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1904

References TrgmPackedState::arcs, TrgmNFA::arcsCount, TrgmPackedState::arcsCount, Assert(), ColorTrgmInfo::cnumber, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, colorTrgmInfoCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, ColorTrgmInfo::expanded, hash_seq_init(), hash_seq_search(), i, j, lfirst, MemoryContextAlloc(), p2, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmState::snumber, source, TrgmPackArcInfo::sourceState, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

◆ prefixContains()

static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
)
static

Definition at line 1426 of file trgm_regexp.c.

1427 {
1428  if (prefix1->colors[1] == COLOR_UNKNOWN)
1429  {
1430  /* Fully ambiguous prefix contains everything */
1431  return true;
1432  }
1433  else if (prefix1->colors[0] == COLOR_UNKNOWN)
1434  {
1435  /*
1436  * Prefix with only first unknown color contains every prefix with
1437  * same second color.
1438  */
1439  if (prefix1->colors[1] == prefix2->colors[1])
1440  return true;
1441  else
1442  return false;
1443  }
1444  else
1445  {
1446  /* Exact prefix contains only the exact same prefix */
1447  if (prefix1->colors[0] == prefix2->colors[0] &&
1448  prefix1->colors[1] == prefix2->colors[1])
1449  return true;
1450  else
1451  return false;
1452  }
1453 }

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

◆ processState()

static void processState ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 969 of file trgm_regexp.c.

970 {
971  ListCell *lc;
972 
973  /* keysQueue should be NIL already, but make sure */
974  trgmNFA->keysQueue = NIL;
975 
976  /*
977  * Add state's own key, and then process all keys added to keysQueue until
978  * queue is finished. But we can quit if the state gets marked final.
979  */
980  addKey(trgmNFA, state, &state->stateKey);
981  foreach(lc, trgmNFA->keysQueue)
982  {
983  TrgmStateKey *key = (TrgmStateKey *) lfirst(lc);
984 
985  if (state->flags & TSTATE_FIN)
986  break;
987  addKey(trgmNFA, state, key);
988  }
989 
990  /* Release keysQueue to clean up for next cycle */
991  list_free(trgmNFA->keysQueue);
992  trgmNFA->keysQueue = NIL;
993 
994  /*
995  * Add outgoing arcs only if state isn't final (we have no interest in
996  * outgoing arcs if we already match)
997  */
998  if (!(state->flags & TSTATE_FIN))
999  addArcs(trgmNFA, state);
1000 }
void list_free(List *list)
Definition: list.c:1545
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
Definition: trgm_regexp.c:1019
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:1200

References addArcs(), addKey(), sort-test::key, TrgmNFA::keysQueue, lfirst, list_free(), NIL, and TSTATE_FIN.

Referenced by transformGraph().

◆ RE_compile()

static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
)
static

Definition at line 731 of file trgm_regexp.c.

732 {
733  int text_re_len = VARSIZE_ANY_EXHDR(text_re);
734  char *text_re_val = VARDATA_ANY(text_re);
735  pg_wchar *pattern;
736  int pattern_len;
737  int regcomp_result;
738  char errMsg[100];
739 
740  /* Convert pattern string to wide characters */
741  pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
742  pattern_len = pg_mb2wchar_with_len(text_re_val,
743  pattern,
744  text_re_len);
745 
746  /* Compile regex */
747  regcomp_result = pg_regcomp(regex,
748  pattern,
749  pattern_len,
750  cflags,
751  collation);
752 
753  pfree(pattern);
754 
755  if (regcomp_result != REG_OKAY)
756  {
757  /* re didn't compile (no need for pg_regfree, if so) */
758  pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
759  ereport(ERROR,
760  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
761  errmsg("invalid regular expression: %s", errMsg)));
762  }
763 }
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:929
#define VARDATA_ANY(PTR)
Definition: postgres.h:362
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:355
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:372
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define REG_OKAY
Definition: regex.h:137

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

◆ selectColorTrigrams()

static bool selectColorTrigrams ( TrgmNFA trgmNFA)
static

Definition at line 1468 of file trgm_regexp.c.

1469 {
1470  HASH_SEQ_STATUS scan_status;
1471  int arcsCount = trgmNFA->arcsCount,
1472  i;
1473  TrgmState *state;
1474  ColorTrgmInfo *colorTrgms;
1475  int64 totalTrgmCount;
1476  float4 totalTrgmPenalty;
1477  int cnumber;
1478 
1479  /* Collect color trigrams from all arcs */
1480  colorTrgms = (ColorTrgmInfo *) palloc0(sizeof(ColorTrgmInfo) * arcsCount);
1481  trgmNFA->colorTrgms = colorTrgms;
1482 
1483  i = 0;
1484  hash_seq_init(&scan_status, trgmNFA->states);
1485  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1486  {
1487  ListCell *cell;
1488 
1489  foreach(cell, state->arcs)
1490  {
1491  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1492  TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
1493  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1494 
1495  arcInfo->source = state;
1496  arcInfo->target = arc->target;
1497  trgmInfo->ctrgm = arc->ctrgm;
1498  trgmInfo->cnumber = -1;
1499  /* count and penalty will be set below */
1500  trgmInfo->expanded = true;
1501  trgmInfo->arcs = list_make1(arcInfo);
1502  i++;
1503  }
1504  }
1505  Assert(i == arcsCount);
1506 
1507  /* Remove duplicates, merging their arcs lists */
1508  if (arcsCount >= 2)
1509  {
1510  ColorTrgmInfo *p1,
1511  *p2;
1512 
1513  /* Sort trigrams to ease duplicate detection */
1514  qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
1515 
1516  /* p1 is probe point, p2 is last known non-duplicate. */
1517  p2 = colorTrgms;
1518  for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1519  {
1520  if (colorTrgmInfoCmp(p1, p2) > 0)
1521  {
1522  p2++;
1523  *p2 = *p1;
1524  }
1525  else
1526  {
1527  p2->arcs = list_concat(p2->arcs, p1->arcs);
1528  }
1529  }
1530  trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
1531  }
1532  else
1533  {
1534  trgmNFA->colorTrgmsCount = arcsCount;
1535  }
1536 
1537  /*
1538  * Count number of simple trigrams generated by each color trigram, and
1539  * also compute a penalty value, which is the number of simple trigrams
1540  * times a multiplier that depends on its whitespace content.
1541  *
1542  * Note: per-color-trigram counts cannot overflow an int so long as
1543  * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
1544  * 1290. However, the grand total totalTrgmCount might conceivably
1545  * overflow an int, so we use int64 for that within this routine. Also,
1546  * penalties are calculated in float4 arithmetic to avoid any overflow
1547  * worries.
1548  */
1549  totalTrgmCount = 0;
1550  totalTrgmPenalty = 0.0f;
1551  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1552  {
1553  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1554  int j,
1555  count = 1,
1556  typeIndex = 0;
1557 
1558  for (j = 0; j < 3; j++)
1559  {
1560  TrgmColor c = trgmInfo->ctrgm.colors[j];
1561 
1562  typeIndex *= 2;
1563  if (c == COLOR_BLANK)
1564  typeIndex++;
1565  else
1566  count *= trgmNFA->colorInfo[c].wordCharsCount;
1567  }
1568  trgmInfo->count = count;
1569  totalTrgmCount += count;
1570  trgmInfo->penalty = penalties[typeIndex] * (float4) count;
1571  totalTrgmPenalty += trgmInfo->penalty;
1572  }
1573 
1574  /* Sort color trigrams in descending order of their penalties */
1575  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1577 
1578  /*
1579  * Remove color trigrams from the graph so long as total penalty of color
1580  * trigrams exceeds WISH_TRGM_PENALTY. (If we fail to get down to
1581  * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
1582  * MAX_TRGM_COUNT.) We prefer to remove color trigrams with higher
1583  * penalty, since those are the most promising for reducing the total
1584  * penalty. When removing a color trigram we have to merge states
1585  * connected by arcs labeled with that trigram. It's necessary to not
1586  * merge initial and final states, because our graph becomes useless if
1587  * that happens; so we cannot always remove the trigram we'd prefer to.
1588  */
1589  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1590  {
1591  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1592  bool canRemove = true;
1593  ListCell *cell;
1594 
1595  /* Done if we've reached the target */
1596  if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
1597  break;
1598 
1599 #ifdef TRGM_REGEXP_DEBUG
1600  fprintf(stderr, "considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1601  trgmInfo->ctrgm.colors[0],
1602  trgmInfo->ctrgm.colors[1],
1603  trgmInfo->ctrgm.colors[2],
1604  trgmInfo->penalty,
1605  list_length(trgmInfo->arcs));
1606 #endif
1607 
1608  /*
1609  * Does any arc of this color trigram connect initial and final
1610  * states? If so we can't remove it.
1611  */
1612  foreach(cell, trgmInfo->arcs)
1613  {
1614  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1615  TrgmState *source = arcInfo->source,
1616  *target = arcInfo->target;
1617  int source_flags,
1618  target_flags;
1619 
1620 #ifdef TRGM_REGEXP_DEBUG
1621  fprintf(stderr, "examining arc to s%d (%x) from s%d (%x)\n",
1622  -target->snumber, target->flags,
1623  -source->snumber, source->flags);
1624 #endif
1625 
1626  /* examine parent states, if any merging has already happened */
1627  while (source->parent)
1628  source = source->parent;
1629  while (target->parent)
1630  target = target->parent;
1631 
1632 #ifdef TRGM_REGEXP_DEBUG
1633  fprintf(stderr, " ... after completed merges: to s%d (%x) from s%d (%x)\n",
1634  -target->snumber, target->flags,
1635  -source->snumber, source->flags);
1636 #endif
1637 
1638  /* we must also consider merges we are planning right now */
1639  source_flags = source->flags | source->tentFlags;
1640  while (source->tentParent)
1641  {
1642  source = source->tentParent;
1643  source_flags |= source->flags | source->tentFlags;
1644  }
1645  target_flags = target->flags | target->tentFlags;
1646  while (target->tentParent)
1647  {
1648  target = target->tentParent;
1649  target_flags |= target->flags | target->tentFlags;
1650  }
1651 
1652 #ifdef TRGM_REGEXP_DEBUG
1653  fprintf(stderr, " ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1654  -target->snumber, target_flags,
1655  -source->snumber, source_flags);
1656 #endif
1657 
1658  /* would fully-merged state have both INIT and FIN set? */
1659  if (((source_flags | target_flags) & (TSTATE_INIT | TSTATE_FIN)) ==
1660  (TSTATE_INIT | TSTATE_FIN))
1661  {
1662  canRemove = false;
1663  break;
1664  }
1665 
1666  /* ok so far, so remember planned merge */
1667  if (source != target)
1668  {
1669 #ifdef TRGM_REGEXP_DEBUG
1670  fprintf(stderr, " ... tentatively merging s%d into s%d\n",
1671  -target->snumber, -source->snumber);
1672 #endif
1673  target->tentParent = source;
1674  source->tentFlags |= target_flags;
1675  }
1676  }
1677 
1678  /*
1679  * We must reset all the tentFlags/tentParent fields before
1680  * continuing. tentFlags could only have become set in states that
1681  * are the source or parent or tentative parent of one of the current
1682  * arcs; likewise tentParent could only have become set in states that
1683  * are the target or parent or tentative parent of one of the current
1684  * arcs. There might be some overlap between those sets, but if we
1685  * clear tentFlags in target states as well as source states, we
1686  * should be okay even if we visit a state as target before visiting
1687  * it as a source.
1688  */
1689  foreach(cell, trgmInfo->arcs)
1690  {
1691  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1692  TrgmState *source = arcInfo->source,
1693  *target = arcInfo->target;
1694  TrgmState *ttarget;
1695 
1696  /* no need to touch previously-merged states */
1697  while (source->parent)
1698  source = source->parent;
1699  while (target->parent)
1700  target = target->parent;
1701 
1702  while (source)
1703  {
1704  source->tentFlags = 0;
1705  source = source->tentParent;
1706  }
1707 
1708  while ((ttarget = target->tentParent) != NULL)
1709  {
1710  target->tentParent = NULL;
1711  target->tentFlags = 0; /* in case it was also a source */
1712  target = ttarget;
1713  }
1714  }
1715 
1716  /* Now, move on if we can't drop this trigram */
1717  if (!canRemove)
1718  {
1719 #ifdef TRGM_REGEXP_DEBUG
1720  fprintf(stderr, " ... not ok to merge\n");
1721 #endif
1722  continue;
1723  }
1724 
1725  /* OK, merge states linked by each arc labeled by the trigram */
1726  foreach(cell, trgmInfo->arcs)
1727  {
1728  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1729  TrgmState *source = arcInfo->source,
1730  *target = arcInfo->target;
1731 
1732  while (source->parent)
1733  source = source->parent;
1734  while (target->parent)
1735  target = target->parent;
1736  if (source != target)
1737  {
1738 #ifdef TRGM_REGEXP_DEBUG
1739  fprintf(stderr, "merging s%d into s%d\n",
1740  -target->snumber, -source->snumber);
1741 #endif
1742  mergeStates(source, target);
1743  /* Assert we didn't merge initial and final states */
1744  Assert((source->flags & (TSTATE_INIT | TSTATE_FIN)) !=
1745  (TSTATE_INIT | TSTATE_FIN));
1746  }
1747  }
1748 
1749  /* Mark trigram unexpanded, and update totals */
1750  trgmInfo->expanded = false;
1751  totalTrgmCount -= trgmInfo->count;
1752  totalTrgmPenalty -= trgmInfo->penalty;
1753  }
1754 
1755  /* Did we succeed in fitting into MAX_TRGM_COUNT? */
1756  if (totalTrgmCount > MAX_TRGM_COUNT)
1757  return false;
1758 
1759  trgmNFA->totalTrgmCount = (int) totalTrgmCount;
1760 
1761  /*
1762  * Sort color trigrams by colors (will be useful for bsearch in packGraph)
1763  * and enumerate the color trigrams that are expanded.
1764  */
1765  cnumber = 0;
1766  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1768  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1769  {
1770  if (colorTrgms[i].expanded)
1771  {
1772  colorTrgms[i].cnumber = cnumber;
1773  cnumber++;
1774  }
1775  }
1776 
1777  return true;
1778 }
List * list_concat(List *list1, const List *list2)
Definition: list.c:560
static int list_length(const List *l)
Definition: pg_list.h:150
#define list_make1(x1)
Definition: pg_list.h:210
#define fprintf
Definition: port.h:242
float4 penalty
Definition: trgm_regexp.c:377
TrgmState * source
Definition: trgm_regexp.c:359
TrgmState * target
Definition: trgm_regexp.c:360
int TrgmColor
Definition: trgm_regexp.c:282
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1917
#define MAX_TRGM_COUNT
Definition: trgm_regexp.c:220
static const float4 penalties[8]
Definition: trgm_regexp.c:228
#define WISH_TRGM_PENALTY
Definition: trgm_regexp.c:221
static void mergeStates(TrgmState *state1, TrgmState *state2)
Definition: trgm_regexp.c:1887

References ColorTrgmInfo::arcs, TrgmNFA::arcsCount, Assert(), ColorTrgmInfo::cnumber, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoPenaltyCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fprintf, hash_seq_init(), hash_seq_search(), i, j, lfirst, list_concat(), list_length(), list_make1, MAX_TRGM_COUNT, mergeStates(), p2, palloc(), palloc0(), penalties, ColorTrgmInfo::penalty, qsort, TrgmArcInfo::source, source, TrgmNFA::states, TrgmArcInfo::target, TrgmState::tentParent, TrgmNFA::totalTrgmCount, TSTATE_FIN, TSTATE_INIT, WISH_TRGM_PENALTY, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

◆ transformGraph()

static void transformGraph ( TrgmNFA trgmNFA)
static

Definition at line 905 of file trgm_regexp.c.

906 {
907  HASHCTL hashCtl;
908  TrgmStateKey initkey;
909  TrgmState *initstate;
910  ListCell *lc;
911 
912  /* Initialize this stage's workspace in trgmNFA struct */
913  trgmNFA->queue = NIL;
914  trgmNFA->keysQueue = NIL;
915  trgmNFA->arcsCount = 0;
916  trgmNFA->overflowed = false;
917 
918  /* Create hashtable for states */
919  hashCtl.keysize = sizeof(TrgmStateKey);
920  hashCtl.entrysize = sizeof(TrgmState);
921  hashCtl.hcxt = CurrentMemoryContext;
922  trgmNFA->states = hash_create("Trigram NFA",
923  1024,
924  &hashCtl,
926  trgmNFA->nstates = 0;
927 
928  /* Create initial state: ambiguous prefix, NFA's initial state */
929  MemSet(&initkey, 0, sizeof(initkey));
930  initkey.prefix.colors[0] = COLOR_UNKNOWN;
931  initkey.prefix.colors[1] = COLOR_UNKNOWN;
932  initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
933 
934  initstate = getState(trgmNFA, &initkey);
935  initstate->flags |= TSTATE_INIT;
936  trgmNFA->initState = initstate;
937 
938  /*
939  * Recursively build the expanded graph by processing queue of states
940  * (breadth-first search). getState already put initstate in the queue.
941  * Note that getState will append new states to the queue within the loop,
942  * too; this works as long as we don't do repeat fetches using the "lc"
943  * pointer.
944  */
945  foreach(lc, trgmNFA->queue)
946  {
947  TrgmState *state = (TrgmState *) lfirst(lc);
948 
949  /*
950  * If we overflowed then just mark state as final. Otherwise do
951  * actual processing.
952  */
953  if (trgmNFA->overflowed)
954  state->flags |= TSTATE_FIN;
955  else
956  processState(trgmNFA, state);
957 
958  /* Did we overflow? */
959  if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
961  trgmNFA->overflowed = true;
962  }
963 }
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:350
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1377
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
MemoryContext hcxt
Definition: hsearch.h:86
bool overflowed
Definition: trgm_regexp.c:415
#define MAX_EXPANDED_STATES
Definition: trgm_regexp.c:218
#define MAX_EXPANDED_ARCS
Definition: trgm_regexp.c:219
struct TrgmState TrgmState
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:969

References TrgmNFA::arcsCount, COLOR_UNKNOWN, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::flags, getState(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, hash_get_num_entries(), HASHCTL::hcxt, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, lfirst, MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::nstates, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, TrgmNFA::states, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

◆ trigramsMatchGraph()

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 638 of file trgm_regexp.c.

639 {
640  int i,
641  j,
642  k,
643  queueIn,
644  queueOut;
645 
646  /*
647  * Reset temporary working areas.
648  */
649  memset(graph->colorTrigramsActive, 0,
650  sizeof(bool) * graph->colorTrigramsCount);
651  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
652 
653  /*
654  * Check which color trigrams were matched. A match for any simple
655  * trigram associated with a color trigram counts as a match of the color
656  * trigram.
657  */
658  j = 0;
659  for (i = 0; i < graph->colorTrigramsCount; i++)
660  {
661  int cnt = graph->colorTrigramGroups[i];
662 
663  for (k = j; k < j + cnt; k++)
664  {
665  if (check[k])
666  {
667  /*
668  * Found one matched trigram in the group. Can skip the rest
669  * of them and go to the next group.
670  */
671  graph->colorTrigramsActive[i] = true;
672  break;
673  }
674  }
675  j = j + cnt;
676  }
677 
678  /*
679  * Initialize the statesQueue to hold just the initial state. Note:
680  * statesQueue has room for statesCount entries, which is certainly enough
681  * since no state will be put in the queue more than once. The
682  * statesActive array marks which states have been queued.
683  */
684  graph->statesActive[0] = true;
685  graph->statesQueue[0] = 0;
686  queueIn = 0;
687  queueOut = 1;
688 
689  /* Process queued states as long as there are any. */
690  while (queueIn < queueOut)
691  {
692  int stateno = graph->statesQueue[queueIn++];
693  TrgmPackedState *state = &graph->states[stateno];
694  int cnt = state->arcsCount;
695 
696  /* Loop over state's out-arcs */
697  for (i = 0; i < cnt; i++)
698  {
699  TrgmPackedArc *arc = &state->arcs[i];
700 
701  /*
702  * If corresponding color trigram is present then activate the
703  * corresponding state. We're done if that's the final state,
704  * otherwise queue the state if it's not been queued already.
705  */
706  if (graph->colorTrigramsActive[arc->colorTrgm])
707  {
708  int nextstate = arc->targetState;
709 
710  if (nextstate == 1)
711  return true; /* success: final state is reachable */
712 
713  if (!graph->statesActive[nextstate])
714  {
715  graph->statesActive[nextstate] = true;
716  graph->statesQueue[queueOut++] = nextstate;
717  }
718  }
719  }
720  }
721 
722  /* Queue is empty, so match fails. */
723  return false;
724 }

References TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, j, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, and TrgmPackedGraph::statesQueue.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

◆ validArcLabel()

static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
)
static

Definition at line 1340 of file trgm_regexp.c.

1341 {
1342  /*
1343  * We have to know full trigram in order to add outgoing arc. So we can't
1344  * do it if prefix is ambiguous.
1345  */
1346  if (key->prefix.colors[0] == COLOR_UNKNOWN)
1347  return false;
1348 
1349  /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
1350  Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
1351  /* And we should not be called with an unknown arc color anytime */
1352  Assert(co != COLOR_UNKNOWN);
1353 
1354  /*
1355  * We don't bother with making arcs representing three non-word
1356  * characters, since that's useless for trigram extraction.
1357  */
1358  if (key->prefix.colors[0] == COLOR_BLANK &&
1359  key->prefix.colors[1] == COLOR_BLANK &&
1360  co == COLOR_BLANK)
1361  return false;
1362 
1363  /*
1364  * We also reject nonblank-blank-anything. The nonblank-blank-nonblank
1365  * case doesn't correspond to any trigram the trigram extraction code
1366  * would make. The nonblank-blank-blank case is also not possible with
1367  * RPADDING = 1. (Note that in many cases we'd fail to generate such a
1368  * trigram even if it were valid, for example processing "foo bar" will
1369  * not result in considering the trigram "o ". So if you want to support
1370  * RPADDING = 2, there's more to do than just twiddle this test.)
1371  */
1372  if (key->prefix.colors[0] != COLOR_BLANK &&
1373  key->prefix.colors[1] == COLOR_BLANK)
1374  return false;
1375 
1376  /*
1377  * Other combinations involving blank are valid, in particular we assume
1378  * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
1379  *
1380  * Note: Using again the example "foo bar", we will not consider the
1381  * trigram " b", though this trigram would be found by the trigram
1382  * extraction code. Since we will find " ba", it doesn't seem worth
1383  * trying to hack the algorithm to generate the additional trigram.
1384  */
1385 
1386  /* arc label is valid */
1387  return true;
1388 }

References Assert(), COLOR_BLANK, COLOR_UNKNOWN, and sort-test::key.

Referenced by addArc(), and addKey().

Variable Documentation

◆ penalties

const float4 penalties[8]
static
Initial value:
= {
1.0f,
3.5f,
0.0f,
0.0f,
4.2f,
2.1f,
25.0f,
0.0f
}

Definition at line 228 of file trgm_regexp.c.

Referenced by selectColorTrigrams().