PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
trgm_regexp.c File Reference
#include "postgres.h"
#include "trgm.h"
#include "regex/regexport.h"
#include "tsearch/ts_locale.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
 
struct  TrgmColorInfo
 
struct  TrgmPrefix
 
struct  ColorTrgm
 
struct  TrgmStateKey
 
struct  TrgmState
 
struct  TrgmArc
 
struct  TrgmArcInfo
 
struct  ColorTrgmInfo
 
struct  TrgmNFA
 
struct  TrgmPackedArc
 
struct  TrgmPackedState
 
struct  TrgmPackedGraph
 
struct  TrgmPackArcInfo
 

Macros

#define MAX_EXPANDED_STATES   128
 
#define MAX_EXPANDED_ARCS   1024
 
#define MAX_TRGM_COUNT   256
 
#define WISH_TRGM_PENALTY   16
 
#define COLOR_COUNT_LIMIT   256
 
#define COLOR_UNKNOWN   (-1)
 
#define COLOR_BLANK   (-2)
 
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */
 
#define TSTATE_FIN   0x02 /* flag indicating this state is final */
 

Typedefs

typedef int TrgmColor
 
typedef struct TrgmState TrgmState
 

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
 
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
 
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
 
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
 
static void transformGraph (TrgmNFA *trgmNFA)
 
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
 
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
 
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
 
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
 
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
 
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
 
static void mergeStates (TrgmState *state1, TrgmState *state2)
 
static int colorTrgmInfoCmp (const void *p1, const void *p2)
 
static int colorTrgmInfoPenaltyCmp (const void *p1, const void *p2)
 
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static int packArcInfoCmp (const void *a1, const void *a2)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

const float4 penalties [8]
 

Macro Definition Documentation

#define COLOR_BLANK   (-2)
#define COLOR_COUNT_LIMIT   256

Definition at line 223 of file trgm_regexp.c.

Referenced by getColorInfo().

#define COLOR_UNKNOWN   (-1)

Definition at line 286 of file trgm_regexp.c.

Referenced by addKey(), prefixContains(), transformGraph(), and validArcLabel().

#define MAX_EXPANDED_ARCS   1024

Definition at line 220 of file trgm_regexp.c.

Referenced by transformGraph().

#define MAX_EXPANDED_STATES   128

Definition at line 219 of file trgm_regexp.c.

Referenced by transformGraph().

#define MAX_TRGM_COUNT   256

Definition at line 221 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

#define TSTATE_FIN   0x02 /* flag indicating this state is final */
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */

Definition at line 326 of file trgm_regexp.c.

Referenced by packGraph(), selectColorTrigrams(), and transformGraph().

#define WISH_TRGM_PENALTY   16

Definition at line 222 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

Typedef Documentation

Definition at line 283 of file trgm_regexp.c.

Function Documentation

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
)
static

Definition at line 1280 of file trgm_regexp.c.

References TrgmState::arcs, TrgmNFA::arcsCount, TrgmPrefix::colors, ColorTrgm::colors, TrgmArc::ctrgm, TrgmState::enterKeys, getState(), lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), TrgmArc::target, and validArcLabel().

Referenced by addArcs().

1282 {
1283  TrgmArc *arc;
1284  ListCell *cell;
1285 
1286  /* Do nothing if this wouldn't be a valid arc label trigram */
1287  if (!validArcLabel(key, co))
1288  return;
1289 
1290  /*
1291  * Check if we are going to reach key which is covered by a key which is
1292  * already listed in this state. If so arc is useless: the NFA can bypass
1293  * it through a path that doesn't require any predictable trigram, so
1294  * whether the arc's trigram is present or not doesn't really matter.
1295  */
1296  foreach(cell, state->enterKeys)
1297  {
1298  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1299 
1300  if (existingKey->nstate == destKey->nstate &&
1301  prefixContains(&existingKey->prefix, &destKey->prefix))
1302  return;
1303  }
1304 
1305  /* Checks were successful, add new arc */
1306  arc = (TrgmArc *) palloc(sizeof(TrgmArc));
1307  arc->target = getState(trgmNFA, destKey);
1308  arc->ctrgm.colors[0] = key->prefix.colors[0];
1309  arc->ctrgm.colors[1] = key->prefix.colors[1];
1310  arc->ctrgm.colors[2] = co;
1311 
1312  state->arcs = lappend(state->arcs, arc);
1313  trgmNFA->arcsCount++;
1314 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:345
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1406
Definition: regguts.h:276
TrgmColor colors[3]
Definition: trgm_regexp.c:300
TrgmColor colors[2]
Definition: trgm_regexp.c:291
List * enterKeys
Definition: trgm_regexp.c:333
TrgmState * target
Definition: trgm_regexp.c:346
List * arcs
Definition: trgm_regexp.c:332
List * lappend(List *list, void *datum)
Definition: list.c:128
int arcsCount
Definition: trgm_regexp.c:410
#define lfirst(lc)
Definition: pg_list.h:106
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1322
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1377
TrgmPrefix prefix
Definition: trgm_regexp.c:310
void * palloc(Size size)
Definition: mcxt.c:891
static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 1188 of file trgm_regexp.c.

References addArc(), regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, i, lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, regex_arc_t::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

1189 {
1190  TrgmStateKey destKey;
1191  ListCell *cell;
1192  regex_arc_t *arcs;
1193  int arcsCount,
1194  i;
1195 
1196  /*
1197  * Ensure any pad bytes in destKey are zero, since it may get used as a
1198  * hashtable key by getState.
1199  */
1200  MemSet(&destKey, 0, sizeof(destKey));
1201 
1202  /*
1203  * Iterate over enter keys associated with this expanded-graph state. This
1204  * includes both the state's own stateKey, and any enter keys we added to
1205  * it during addKey (which represent expanded-graph states that are not
1206  * distinguishable from this one by means of trigrams). For each such
1207  * enter key, examine all the out-arcs of the key's underlying NFA state,
1208  * and try to make a trigram arc leading to where the out-arc leads.
1209  * (addArc will deal with whether the arc is valid or not.)
1210  */
1211  foreach(cell, state->enterKeys)
1212  {
1213  TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
1214 
1215  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1216  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1217  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1218 
1219  for (i = 0; i < arcsCount; i++)
1220  {
1221  regex_arc_t *arc = &arcs[i];
1222  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1223 
1224  /*
1225  * Ignore non-expandable colors; addKey already handled the case.
1226  *
1227  * We need no special check for begin/end pseudocolors here. We
1228  * don't need to do any processing for them, and they will be
1229  * marked non-expandable since the regex engine will have reported
1230  * them that way.
1231  */
1232  if (!colorInfo->expandable)
1233  continue;
1234 
1235  if (colorInfo->containsNonWord)
1236  {
1237  /*
1238  * Color includes non-word character(s).
1239  *
1240  * Generate an arc, treating this transition as occurring on
1241  * BLANK. This allows word-ending trigrams to be manufactured
1242  * if possible.
1243  */
1244  destKey.prefix.colors[0] = key->prefix.colors[1];
1245  destKey.prefix.colors[1] = COLOR_BLANK;
1246  destKey.nstate = arc->to;
1247 
1248  addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
1249  }
1250 
1251  if (colorInfo->wordCharsCount > 0)
1252  {
1253  /*
1254  * Color includes word character(s).
1255  *
1256  * Generate an arc. Color is pushed into prefix of target
1257  * state.
1258  */
1259  destKey.prefix.colors[0] = key->prefix.colors[1];
1260  destKey.prefix.colors[1] = arc->co;
1261  destKey.nstate = arc->to;
1262 
1263  addArc(trgmNFA, state, key, arc->co, &destKey);
1264  }
1265  }
1266 
1267  pfree(arcs);
1268  }
1269 }
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:106
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
Definition: trgm_regexp.c:1280
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:400
regex_t * regex
Definition: trgm_regexp.c:399
Definition: regguts.h:276
#define MemSet(start, val, len)
Definition: c.h:853
bool containsNonWord
Definition: trgm_regexp.c:262
void pfree(void *pointer)
Definition: mcxt.c:992
TrgmColor colors[2]
Definition: trgm_regexp.c:291
List * enterKeys
Definition: trgm_regexp.c:333
#define COLOR_BLANK
Definition: trgm_regexp.c:287
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:80
#define lfirst(lc)
Definition: pg_list.h:106
TrgmPrefix prefix
Definition: trgm_regexp.c:310
void * palloc(Size size)
Definition: mcxt.c:891
int i
static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
)
static

Definition at line 1005 of file trgm_regexp.c.

References addKeyToQueue(), regex_arc_t::co, COLOR_BLANK, COLOR_UNKNOWN, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, TrgmState::flags, i, lappend(), lfirst, list_delete_cell(), list_head(), lnext, MemSet, next, TrgmStateKey::nstate, NULL, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, regex_arc_t::to, TSTATE_FIN, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

1006 {
1007  regex_arc_t *arcs;
1008  TrgmStateKey destKey;
1009  ListCell *cell,
1010  *prev,
1011  *next;
1012  int i,
1013  arcsCount;
1014 
1015  /*
1016  * Ensure any pad bytes in destKey are zero, since it may get used as a
1017  * hashtable key by getState.
1018  */
1019  MemSet(&destKey, 0, sizeof(destKey));
1020 
1021  /*
1022  * Compare key to each existing enter key of the state to check for
1023  * redundancy. We can drop either old key(s) or the new key if we find
1024  * redundancy.
1025  */
1026  prev = NULL;
1027  cell = list_head(state->enterKeys);
1028  while (cell)
1029  {
1030  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1031 
1032  next = lnext(cell);
1033  if (existingKey->nstate == key->nstate)
1034  {
1035  if (prefixContains(&existingKey->prefix, &key->prefix))
1036  {
1037  /* This old key already covers the new key. Nothing to do */
1038  return;
1039  }
1040  if (prefixContains(&key->prefix, &existingKey->prefix))
1041  {
1042  /*
1043  * The new key covers this old key. Remove the old key, it's
1044  * no longer needed once we add this key to the list.
1045  */
1046  state->enterKeys = list_delete_cell(state->enterKeys,
1047  cell, prev);
1048  }
1049  else
1050  prev = cell;
1051  }
1052  else
1053  prev = cell;
1054  cell = next;
1055  }
1056 
1057  /* No redundancy, so add this key to the state's list */
1058  state->enterKeys = lappend(state->enterKeys, key);
1059 
1060  /* If state is now known final, mark it and we're done */
1061  if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
1062  {
1063  state->flags |= TSTATE_FIN;
1064  return;
1065  }
1066 
1067  /*
1068  * Loop through all outgoing arcs of the corresponding state in the
1069  * original NFA.
1070  */
1071  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1072  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1073  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1074 
1075  for (i = 0; i < arcsCount; i++)
1076  {
1077  regex_arc_t *arc = &arcs[i];
1078 
1079  if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
1080  {
1081  /*
1082  * Start of line/string (^). Trigram extraction treats start of
1083  * line same as start of word: double space prefix is added.
1084  * Hence, make an enter key showing we can reach the arc
1085  * destination with all-blank prefix.
1086  */
1087  destKey.prefix.colors[0] = COLOR_BLANK;
1088  destKey.prefix.colors[1] = COLOR_BLANK;
1089  destKey.nstate = arc->to;
1090 
1091  /* Add enter key to this state */
1092  addKeyToQueue(trgmNFA, &destKey);
1093  }
1094  else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
1095  {
1096  /*
1097  * End of line/string ($). We must consider this arc as a
1098  * transition that doesn't read anything. The reason for adding
1099  * this enter key to the state is that if the arc leads to the
1100  * NFA's final state, we must mark this expanded state as final.
1101  */
1102  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1103  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1104  destKey.nstate = arc->to;
1105 
1106  /* Add enter key to this state */
1107  addKeyToQueue(trgmNFA, &destKey);
1108  }
1109  else
1110  {
1111  /* Regular color */
1112  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1113 
1114  if (colorInfo->expandable)
1115  {
1116  if (colorInfo->containsNonWord &&
1117  !validArcLabel(key, COLOR_BLANK))
1118  {
1119  /*
1120  * We can reach the arc destination after reading a
1121  * non-word character, but the prefix is not something
1122  * that addArc will accept with COLOR_BLANK, so no trigram
1123  * arc can get made for this transition. We must make an
1124  * enter key to show that the arc destination is
1125  * reachable. Set it up with an all-blank prefix, since
1126  * that corresponds to what the trigram extraction code
1127  * will do at a word starting boundary.
1128  */
1129  destKey.prefix.colors[0] = COLOR_BLANK;
1130  destKey.prefix.colors[1] = COLOR_BLANK;
1131  destKey.nstate = arc->to;
1132  addKeyToQueue(trgmNFA, &destKey);
1133  }
1134 
1135  if (colorInfo->wordCharsCount > 0 &&
1136  !validArcLabel(key, arc->co))
1137  {
1138  /*
1139  * We can reach the arc destination after reading a word
1140  * character, but the prefix is not something that addArc
1141  * will accept, so no trigram arc can get made for this
1142  * transition. We must make an enter key to show that the
1143  * arc destination is reachable. The prefix for the enter
1144  * key should reflect the info we have for this arc.
1145  */
1146  destKey.prefix.colors[0] = key->prefix.colors[1];
1147  destKey.prefix.colors[1] = arc->co;
1148  destKey.nstate = arc->to;
1149  addKeyToQueue(trgmNFA, &destKey);
1150  }
1151  }
1152  else
1153  {
1154  /*
1155  * Unexpandable color. Add enter key with ambiguous prefix,
1156  * showing we can reach the destination from this state, but
1157  * the preceding colors will be uncertain. (We do not set the
1158  * first prefix color to key->prefix.colors[1], because a
1159  * prefix of known followed by unknown is invalid.)
1160  */
1161  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1162  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1163  destKey.nstate = arc->to;
1164  addKeyToQueue(trgmNFA, &destKey);
1165  }
1166  }
1167  }
1168 
1169  pfree(arcs);
1170 }
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:106
static int32 next
Definition: blutils.c:210
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1406
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:400
regex_t * regex
Definition: trgm_regexp.c:399
Definition: regguts.h:276
#define MemSet(start, val, len)
Definition: c.h:853
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:151
#define TSTATE_FIN
Definition: trgm_regexp.c:327
bool containsNonWord
Definition: trgm_regexp.c:262
void pfree(void *pointer)
Definition: mcxt.c:992
TrgmColor colors[2]
Definition: trgm_regexp.c:291
List * enterKeys
Definition: trgm_regexp.c:333
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
#define lnext(lc)
Definition: pg_list.h:105
List * lappend(List *list, void *datum)
Definition: list.c:128
#define COLOR_BLANK
Definition: trgm_regexp.c:287
List * list_delete_cell(List *list, ListCell *cell, ListCell *prev)
Definition: list.c:528
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1176
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:168
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:80
#define NULL
Definition: c.h:226
#define lfirst(lc)
Definition: pg_list.h:106
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1322
TrgmPrefix prefix
Definition: trgm_regexp.c:310
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:286
void * palloc(Size size)
Definition: mcxt.c:891
int i
static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1176 of file trgm_regexp.c.

References TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

1177 {
1178  TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
1179 
1180  memcpy(keyCopy, key, sizeof(TrgmStateKey));
1181  trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
1182 }
List * lappend(List *list, void *datum)
Definition: list.c:128
void * palloc(Size size)
Definition: mcxt.c:891
List * keysQueue
Definition: trgm_regexp.c:409
static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1817 of file trgm_regexp.c.

References ColorTrgmInfo::ctrgm.

Referenced by packGraph(), and selectColorTrigrams().

1818 {
1819  const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
1820  const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
1821 
1822  return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
1823 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:371
static int colorTrgmInfoPenaltyCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1830 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

1831 {
1832  float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
1833  float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
1834 
1835  if (penalty1 < penalty2)
1836  return 1;
1837  else if (penalty1 == penalty2)
1838  return 0;
1839  else
1840  return -1;
1841 }
float float4
Definition: c.h:377
static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
)
static

Definition at line 835 of file trgm_regexp.c.

References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().

Referenced by getColorInfo().

836 {
837  /* "s" has enough space for a multibyte character and a trailing NUL */
838  char s[MAX_MULTIBYTE_CHAR_LEN + 1];
839 
840  /*
841  * We can ignore the NUL character, since it can never appear in a PG text
842  * string. This avoids the need for various special cases when
843  * reconstructing trigrams.
844  */
845  if (c == 0)
846  return false;
847 
848  /* Do the conversion, making sure the result is NUL-terminated */
849  memset(s, 0, sizeof(s));
850  pg_wchar2mb_with_len(&c, s, 1);
851 
852  /*
853  * In IGNORECASE mode, we can ignore uppercase characters. We assume that
854  * the regex engine generated both uppercase and lowercase equivalents
855  * within each color, since we used the REG_ICASE option; so there's no
856  * need to process the uppercase version.
857  *
858  * XXX this code is dependent on the assumption that lowerstr() works the
859  * same as the regex engine's internal case folding machinery. Might be
860  * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
861  * On the other hand, the trigrams in the index were created using
862  * lowerstr(), so we're probably screwed if there's any incompatibility
863  * anyway.
864  */
865 #ifdef IGNORECASE
866  {
867  char *lowerCased = lowerstr(s);
868 
869  if (strcmp(lowerCased, s) != 0)
870  {
871  pfree(lowerCased);
872  return false;
873  }
874  pfree(lowerCased);
875  }
876 #endif
877 
878  /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
879  memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
880  return true;
881 }
char * lowerstr(const char *str)
Definition: ts_locale.c:231
void pfree(void *pointer)
Definition: mcxt.c:992
char * c
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:243
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:756
TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 517 of file trgm_regexp.c.

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate(), createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_CATCH, PG_END_TRY, PG_RE_THROW, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

519 {
520  TRGM *trg;
521  regex_t regex;
522  MemoryContext tmpcontext;
523  MemoryContext oldcontext;
524 
525  /*
526  * This processing generates a great deal of cruft, which we'd like to
527  * clean up before returning (since this function may be called in a
528  * query-lifespan memory context). Make a temp context we can work in so
529  * that cleanup is easy.
530  */
532  "createTrgmNFA temporary context",
534  oldcontext = MemoryContextSwitchTo(tmpcontext);
535 
536  /*
537  * Stage 1: Compile the regexp into a NFA, using the regexp library.
538  */
539 #ifdef IGNORECASE
540  RE_compile(&regex, text_re, REG_ADVANCED | REG_ICASE, collation);
541 #else
542  RE_compile(&regex, text_re, REG_ADVANCED, collation);
543 #endif
544 
545  /*
546  * Since the regexp library allocates its internal data structures with
547  * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
548  * done even if there's an error.
549  */
550  PG_TRY();
551  {
552  trg = createTrgmNFAInternal(&regex, graph, rcontext);
553  }
554  PG_CATCH();
555  {
556  pg_regfree(&regex);
557  PG_RE_THROW();
558  }
559  PG_END_TRY();
560 
561  pg_regfree(&regex);
562 
563  /* Clean up all the cruft we created */
564  MemoryContextSwitchTo(oldcontext);
565  MemoryContextDelete(tmpcontext);
566 
567  return trg;
568 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:574
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define REG_ICASE
Definition: regex.h:106
Definition: trgm.h:63
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:145
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
#define REG_ADVANCED
Definition: regex.h:103
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:440
#define PG_CATCH()
Definition: elog.h:293
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:728
#define PG_RE_THROW()
Definition: elog.h:314
#define PG_TRY()
Definition: elog.h:284
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: regex.h:55
#define PG_END_TRY()
Definition: elog.h:300
static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)
static

Definition at line 574 of file trgm_regexp.c.

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::flags, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, NULL, packGraph(), TrgmNFA::regex, selectColorTrigrams(), transformGraph(), and TSTATE_FIN.

Referenced by createTrgmNFA().

576 {
577  TRGM *trg;
578  TrgmNFA trgmNFA;
579 
580  trgmNFA.regex = regex;
581 
582  /* Collect color information from the regex */
583  getColorInfo(regex, &trgmNFA);
584 
585 #ifdef TRGM_REGEXP_DEBUG
586  printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
587 #endif
588 
589  /*
590  * Stage 2: Create an expanded graph from the source NFA.
591  */
592  transformGraph(&trgmNFA);
593 
594 #ifdef TRGM_REGEXP_DEBUG
595  printTrgmNFA(&trgmNFA);
596 #endif
597 
598  /*
599  * Fail if we were unable to make a nontrivial graph, ie it is possible to
600  * get from the initial state to the final state without reading any
601  * predictable trigram.
602  */
603  if (trgmNFA.initState->flags & TSTATE_FIN)
604  return NULL;
605 
606  /*
607  * Stage 3: Select color trigrams to expand. Fail if too many trigrams.
608  */
609  if (!selectColorTrigrams(&trgmNFA))
610  return NULL;
611 
612  /*
613  * Stage 4: Expand color trigrams and pack graph into final
614  * representation.
615  */
616  trg = expandColorTrigrams(&trgmNFA, rcontext);
617 
618  *graph = packGraph(&trgmNFA, rcontext);
619 
620 #ifdef TRGM_REGEXP_DEBUG
621  printTrgmPackedGraph(*graph, trg);
622 #endif
623 
624  return trg;
625 }
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:772
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:400
regex_t * regex
Definition: trgm_regexp.c:399
TrgmState * initState
Definition: trgm_regexp.c:405
#define TSTATE_FIN
Definition: trgm_regexp.c:327
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:1448
Definition: trgm.h:63
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1855
#define NULL
Definition: c.h:226
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1700
static void transformGraph(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:901
int ncolors
Definition: trgm_regexp.c:401
static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1700 of file trgm_regexp.c.

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

1701 {
1702  TRGM *trg;
1703  trgm *p;
1704  int i;
1705  TrgmColorInfo blankColor;
1706  trgm_mb_char blankChar;
1707 
1708  /* Set up "blank" color structure containing a single zero character */
1709  memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
1710  blankColor.wordCharsCount = 1;
1711  blankColor.wordChars = &blankChar;
1712 
1713  /* Construct the trgm array */
1714  trg = (TRGM *)
1715  MemoryContextAllocZero(rcontext,
1716  TRGMHDRSIZE +
1717  trgmNFA->totalTrgmCount * sizeof(trgm));
1718  trg->flag = ARRKEY;
1719  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
1720  p = GETARR(trg);
1721  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1722  {
1723  ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
1724  TrgmColorInfo *c[3];
1725  trgm_mb_char s[3];
1726  int j,
1727  i1,
1728  i2,
1729  i3;
1730 
1731  /* Ignore any unexpanded trigrams ... */
1732  if (!colorTrgm->expanded)
1733  continue;
1734 
1735  /* Get colors, substituting the dummy struct for COLOR_BLANK */
1736  for (j = 0; j < 3; j++)
1737  {
1738  if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
1739  c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
1740  else
1741  c[j] = &blankColor;
1742  }
1743 
1744  /* Iterate over all possible combinations of colors' characters */
1745  for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
1746  {
1747  s[0] = c[0]->wordChars[i1];
1748  for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
1749  {
1750  s[1] = c[1]->wordChars[i2];
1751  for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
1752  {
1753  s[2] = c[2]->wordChars[i3];
1754  fillTrgm(p, s);
1755  p++;
1756  }
1757  }
1758  }
1759  }
1760 
1761  return trg;
1762 }
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:400
TrgmColor colors[3]
Definition: trgm_regexp.c:300
#define ARRKEY
Definition: trgm.h:94
int totalTrgmCount
Definition: trgm_regexp.c:416
#define GETARR(x)
Definition: trgm.h:104
Definition: trgm.h:63
char * c
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:414
#define TRGMHDRSIZE
Definition: trgm.h:70
#define COLOR_BLANK
Definition: trgm_regexp.c:287
uint8 flag
Definition: trgm.h:66
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:784
int colorTrgmsCount
Definition: trgm_regexp.c:415
char trgm[3]
Definition: trgm.h:38
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:52
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:243
int i
ColorTrgm ctrgm
Definition: trgm_regexp.c:371
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
trgm_mb_char * wordChars
Definition: trgm_regexp.c:264
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
Definition: trgm_regexp.c:1768
static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
)
static

Definition at line 1768 of file trgm_regexp.c.

References trgm_mb_char::bytes, compact_trigram(), i, and MAX_MULTIBYTE_CHAR_LEN.

Referenced by expandColorTrigrams().

1769 {
1770  char str[3 * MAX_MULTIBYTE_CHAR_LEN],
1771  *p;
1772  int i,
1773  j;
1774 
1775  /* Write multibyte string into "str" (we don't need null termination) */
1776  p = str;
1777 
1778  for (i = 0; i < 3; i++)
1779  {
1780  if (s[i].bytes[0] != 0)
1781  {
1782  for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
1783  *p++ = s[i].bytes[j];
1784  }
1785  else
1786  {
1787  /* Emit a space in place of COLOR_BLANK */
1788  *p++ = ' ';
1789  }
1790  }
1791 
1792  /* Convert "str" to a standard trigram (possibly hashing it) */
1793  compact_trigram(ptrgm, str, p - str);
1794 }
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:166
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:243
int i
static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
)
static

Definition at line 772 of file trgm_regexp.c.

References trgm_mb_char::bytes, chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

773 {
774  int colorsCount = pg_reg_getnumcolors(regex);
775  int i;
776 
777  trgmNFA->ncolors = colorsCount;
778  trgmNFA->colorInfo = (TrgmColorInfo *)
779  palloc0(colorsCount * sizeof(TrgmColorInfo));
780 
781  /*
782  * Loop over colors, filling TrgmColorInfo about each.
783  */
784  for (i = 0; i < colorsCount; i++)
785  {
786  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
787  int charsCount = pg_reg_getnumcharacters(regex, i);
788  pg_wchar *chars;
789  int j;
790 
791  if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
792  {
793  /* Non expandable, or too large to work with */
794  colorInfo->expandable = false;
795  continue;
796  }
797 
798  colorInfo->expandable = true;
799  colorInfo->containsNonWord = false;
800  colorInfo->wordChars = (trgm_mb_char *)
801  palloc(sizeof(trgm_mb_char) * charsCount);
802  colorInfo->wordCharsCount = 0;
803 
804  /* Extract all the chars in this color */
805  chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
806  pg_reg_getcharacters(regex, i, chars, charsCount);
807 
808  /*
809  * Convert characters back to multibyte form, and save only those that
810  * are word characters. Set "containsNonWord" if any non-word
811  * character. (Note: it'd probably be nicer to keep the chars in
812  * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
813  */
814  for (j = 0; j < charsCount; j++)
815  {
816  trgm_mb_char c;
817 
818  if (!convertPgWchar(chars[j], &c))
819  continue; /* ok to ignore it altogether */
820  if (ISWORDCHR(c.bytes))
821  colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
822  else
823  colorInfo->containsNonWord = true;
824  }
825 
826  pfree(chars);
827  }
828 }
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:400
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:189
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:225
#define ISWORDCHR(c)
Definition: trgm.h:51
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:134
bool containsNonWord
Definition: trgm_regexp.c:262
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
Definition: trgm_regexp.c:835
void pfree(void *pointer)
Definition: mcxt.c:992
char * c
#define COLOR_COUNT_LIMIT
Definition: trgm_regexp.c:223
unsigned int pg_wchar
Definition: mbprint.c:31
void * palloc0(Size size)
Definition: mcxt.c:920
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:243
void * palloc(Size size)
Definition: mcxt.c:891
int i
trgm_mb_char * wordChars
Definition: trgm_regexp.c:264
static char chars[TZ_MAX_CHARS]
Definition: zic.c:382
int ncolors
Definition: trgm_regexp.c:401
static TrgmState * getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1377 of file trgm_regexp.c.

References TrgmState::arcs, TrgmState::enterKeys, TrgmState::flags, HASH_ENTER, hash_search(), lappend(), NIL, NULL, TrgmState::number, TrgmState::parent, TrgmNFA::queue, TrgmNFA::states, and TrgmState::tentParent.

Referenced by addArc(), and transformGraph().

1378 {
1379  TrgmState *state;
1380  bool found;
1381 
1382  state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
1383  &found);
1384  if (!found)
1385  {
1386  /* New state: initialize and queue it */
1387  state->arcs = NIL;
1388  state->enterKeys = NIL;
1389  state->flags = 0;
1390  state->parent = NULL;
1391  state->tentParent = NULL;
1392  state->number = -1;
1393 
1394  trgmNFA->queue = lappend(trgmNFA->queue, state);
1395  }
1396  return state;
1397 }
#define NIL
Definition: pg_list.h:69
struct TrgmState * tentParent
Definition: trgm_regexp.c:336
HTAB * states
Definition: trgm_regexp.c:404
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
List * enterKeys
Definition: trgm_regexp.c:333
List * arcs
Definition: trgm_regexp.c:332
List * lappend(List *list, void *datum)
Definition: list.c:128
List * queue
Definition: trgm_regexp.c:408
#define NULL
Definition: c.h:226
Definition: regguts.h:298
struct TrgmState * parent
Definition: trgm_regexp.c:335
static void mergeStates ( TrgmState state1,
TrgmState state2 
)
static

Definition at line 1800 of file trgm_regexp.c.

References Assert, TrgmState::flags, and TrgmState::parent.

Referenced by selectColorTrigrams().

1801 {
1802  Assert(state1 != state2);
1803  Assert(!state1->parent);
1804  Assert(!state2->parent);
1805 
1806  /* state1 absorbs state2's flags */
1807  state1->flags |= state2->flags;
1808 
1809  /* state2, and indirectly all its children, become children of state1 */
1810  state2->parent = state1;
1811 }
#define Assert(condition)
Definition: c.h:671
struct TrgmState * parent
Definition: trgm_regexp.c:335
static int packArcInfoCmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 2010 of file trgm_regexp.c.

References TrgmPackArcInfo::colorTrgm, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.

Referenced by packGraph().

2011 {
2012  const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
2013  const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
2014 
2015  if (p1->sourceState < p2->sourceState)
2016  return -1;
2017  if (p1->sourceState > p2->sourceState)
2018  return 1;
2019  if (p1->colorTrgm < p2->colorTrgm)
2020  return -1;
2021  if (p1->colorTrgm > p2->colorTrgm)
2022  return 1;
2023  if (p1->targetState < p2->targetState)
2024  return -1;
2025  if (p1->targetState > p2->targetState)
2026  return 1;
2027  return 0;
2028 }
static FormData_pg_attribute a1
Definition: heap.c:142
static FormData_pg_attribute a2
Definition: heap.c:148
static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1855 of file trgm_regexp.c.

References TrgmState::arcs, TrgmPackedState::arcs, TrgmNFA::arcsCount, TrgmPackedState::arcsCount, Assert, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, colorTrgmInfoCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::expanded, TrgmState::flags, hash_seq_init(), hash_seq_search(), i, lfirst, MemoryContextAlloc(), NULL, TrgmState::number, ColorTrgmInfo::number, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmPackArcInfo::sourceState, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmArc::target, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

1856 {
1857  int number = 2,
1858  arcIndex,
1859  arcsCount;
1860  HASH_SEQ_STATUS scan_status;
1861  TrgmState *state;
1862  TrgmPackArcInfo *arcs,
1863  *p1,
1864  *p2;
1865  TrgmPackedArc *packedArcs;
1866  TrgmPackedGraph *result;
1867  int i,
1868  j;
1869 
1870  /* Enumerate surviving states, giving init and fin reserved numbers */
1871  hash_seq_init(&scan_status, trgmNFA->states);
1872  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1873  {
1874  while (state->parent)
1875  state = state->parent;
1876 
1877  if (state->number < 0)
1878  {
1879  if (state->flags & TSTATE_INIT)
1880  state->number = 0;
1881  else if (state->flags & TSTATE_FIN)
1882  state->number = 1;
1883  else
1884  {
1885  state->number = number;
1886  number++;
1887  }
1888  }
1889  }
1890 
1891  /* Collect array of all arcs */
1892  arcs = (TrgmPackArcInfo *)
1893  palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
1894  arcIndex = 0;
1895  hash_seq_init(&scan_status, trgmNFA->states);
1896  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1897  {
1898  TrgmState *source = state;
1899  ListCell *cell;
1900 
1901  while (source->parent)
1902  source = source->parent;
1903 
1904  foreach(cell, state->arcs)
1905  {
1906  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1907  TrgmState *target = arc->target;
1908 
1909  while (target->parent)
1910  target = target->parent;
1911 
1912  if (source->number != target->number)
1913  {
1914  ColorTrgmInfo *ctrgm;
1915 
1916  ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
1917  trgmNFA->colorTrgms,
1918  trgmNFA->colorTrgmsCount,
1919  sizeof(ColorTrgmInfo),
1921  Assert(ctrgm != NULL);
1922  Assert(ctrgm->expanded);
1923 
1924  arcs[arcIndex].sourceState = source->number;
1925  arcs[arcIndex].targetState = target->number;
1926  arcs[arcIndex].colorTrgm = ctrgm->number;
1927  arcIndex++;
1928  }
1929  }
1930  }
1931 
1932  /* Sort arcs to ease duplicate detection */
1933  qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
1934 
1935  /* We could have duplicates because states were merged. Remove them. */
1936  /* p1 is probe point, p2 is last known non-duplicate. */
1937  p2 = arcs;
1938  for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
1939  {
1940  if (packArcInfoCmp(p1, p2) > 0)
1941  {
1942  p2++;
1943  *p2 = *p1;
1944  }
1945  }
1946  arcsCount = (p2 - arcs) + 1;
1947 
1948  /* Create packed representation */
1949  result = (TrgmPackedGraph *)
1950  MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
1951 
1952  /* Pack color trigrams information */
1953  result->colorTrigramsCount = 0;
1954  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1955  {
1956  if (trgmNFA->colorTrgms[i].expanded)
1957  result->colorTrigramsCount++;
1958  }
1959  result->colorTrigramGroups = (int *)
1960  MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
1961  j = 0;
1962  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1963  {
1964  if (trgmNFA->colorTrgms[i].expanded)
1965  {
1966  result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
1967  j++;
1968  }
1969  }
1970 
1971  /* Pack states and arcs information */
1972  result->statesCount = number;
1973  result->states = (TrgmPackedState *)
1974  MemoryContextAlloc(rcontext, number * sizeof(TrgmPackedState));
1975  packedArcs = (TrgmPackedArc *)
1976  MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
1977  j = 0;
1978  for (i = 0; i < number; i++)
1979  {
1980  int cnt = 0;
1981 
1982  result->states[i].arcs = &packedArcs[j];
1983  while (j < arcsCount && arcs[j].sourceState == i)
1984  {
1985  packedArcs[j].targetState = arcs[j].targetState;
1986  packedArcs[j].colorTrgm = arcs[j].colorTrgm;
1987  cnt++;
1988  j++;
1989  }
1990  result->states[i].arcsCount = cnt;
1991  }
1992 
1993  /* Allocate working memory for trigramsMatchGraph() */
1994  result->colorTrigramsActive = (bool *)
1995  MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
1996  result->statesActive = (bool *)
1997  MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
1998  result->statesQueue = (int *)
1999  MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
2000 
2001  return result;
2002 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:457
ColorTrgm ctrgm
Definition: trgm_regexp.c:345
Definition: regguts.h:276
HTAB * states
Definition: trgm_regexp.c:404
TrgmPackedState * states
Definition: trgm_regexp.c:454
#define TSTATE_FIN
Definition: trgm_regexp.c:327
#define TSTATE_INIT
Definition: trgm_regexp.c:326
TrgmPackedArc * arcs
Definition: trgm_regexp.c:431
static int packArcInfoCmp(const void *a1, const void *a2)
Definition: trgm_regexp.c:2010
bool * statesActive
Definition: trgm_regexp.c:458
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:414
TrgmState * target
Definition: trgm_regexp.c:346
List * arcs
Definition: trgm_regexp.c:332
int arcsCount
Definition: trgm_regexp.c:410
int colorTrgmsCount
Definition: trgm_regexp.c:415
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1353
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1343
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1817
struct TrgmState * parent
Definition: trgm_regexp.c:335
void * palloc(Size size)
Definition: mcxt.c:891
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:749
int i
#define qsort(a, b, c, d)
Definition: port.h:440
int * colorTrigramGroups
Definition: trgm_regexp.c:447
static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
)
static

Definition at line 1406 of file trgm_regexp.c.

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

1407 {
1408  if (prefix1->colors[1] == COLOR_UNKNOWN)
1409  {
1410  /* Fully ambiguous prefix contains everything */
1411  return true;
1412  }
1413  else if (prefix1->colors[0] == COLOR_UNKNOWN)
1414  {
1415  /*
1416  * Prefix with only first unknown color contains every prefix with
1417  * same second color.
1418  */
1419  if (prefix1->colors[1] == prefix2->colors[1])
1420  return true;
1421  else
1422  return false;
1423  }
1424  else
1425  {
1426  /* Exact prefix contains only the exact same prefix */
1427  if (prefix1->colors[0] == prefix2->colors[0] &&
1428  prefix1->colors[1] == prefix2->colors[1])
1429  return true;
1430  else
1431  return false;
1432  }
1433 }
TrgmColor colors[2]
Definition: trgm_regexp.c:291
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:286
static void processState ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 962 of file trgm_regexp.c.

References addArcs(), addKey(), TrgmState::flags, TrgmNFA::keysQueue, linitial, list_delete_first(), NIL, TrgmState::stateKey, and TSTATE_FIN.

Referenced by transformGraph().

963 {
964  /* keysQueue should be NIL already, but make sure */
965  trgmNFA->keysQueue = NIL;
966 
967  /*
968  * Add state's own key, and then process all keys added to keysQueue until
969  * queue is empty. But we can quit if the state gets marked final.
970  */
971  addKey(trgmNFA, state, &state->stateKey);
972  while (trgmNFA->keysQueue != NIL && !(state->flags & TSTATE_FIN))
973  {
974  TrgmStateKey *key = (TrgmStateKey *) linitial(trgmNFA->keysQueue);
975 
976  trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
977  addKey(trgmNFA, state, key);
978  }
979 
980  /*
981  * Add outgoing arcs only if state isn't final (we have no interest in
982  * outgoing arcs if we already match)
983  */
984  if (!(state->flags & TSTATE_FIN))
985  addArcs(trgmNFA, state);
986 }
#define NIL
Definition: pg_list.h:69
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
Definition: trgm_regexp.c:1005
#define TSTATE_FIN
Definition: trgm_regexp.c:327
#define linitial(l)
Definition: pg_list.h:110
TrgmStateKey stateKey
Definition: trgm_regexp.c:331
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:1188
List * keysQueue
Definition: trgm_regexp.c:409
List * list_delete_first(List *list)
Definition: list.c:666
static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
)
static

Definition at line 728 of file trgm_regexp.c.

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

729 {
730  int text_re_len = VARSIZE_ANY_EXHDR(text_re);
731  char *text_re_val = VARDATA_ANY(text_re);
732  pg_wchar *pattern;
733  int pattern_len;
734  int regcomp_result;
735  char errMsg[100];
736 
737  /* Convert pattern string to wide characters */
738  pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
739  pattern_len = pg_mb2wchar_with_len(text_re_val,
740  pattern,
741  text_re_len);
742 
743  /* Compile regex */
744  regcomp_result = pg_regcomp(regex,
745  pattern,
746  pattern_len,
747  cflags,
748  collation);
749 
750  pfree(pattern);
751 
752  if (regcomp_result != REG_OKAY)
753  {
754  /* re didn't compile (no need for pg_regfree, if so) */
755  pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
756  ereport(ERROR,
757  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
758  errmsg("invalid regular expression: %s", errMsg)));
759  }
760 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:349
int errcode(int sqlerrcode)
Definition: elog.c:575
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:316
void pfree(void *pointer)
Definition: mcxt.c:992
#define REG_OKAY
Definition: regex.h:137
#define ERROR
Definition: elog.h:43
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define ereport(elevel, rest)
Definition: elog.h:122
unsigned int pg_wchar
Definition: mbprint.c:31
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:734
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:342
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
static bool selectColorTrigrams ( TrgmNFA trgmNFA)
static

Definition at line 1448 of file trgm_regexp.c.

References TrgmState::arcs, ColorTrgmInfo::arcs, TrgmNFA::arcsCount, Assert, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoPenaltyCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, TrgmState::flags, hash_seq_init(), hash_seq_search(), i, lfirst, list_concat(), list_make1, MAX_TRGM_COUNT, mergeStates(), NULL, ColorTrgmInfo::number, palloc(), TrgmState::parent, penalties, ColorTrgmInfo::penalty, qsort, TrgmArcInfo::source, TrgmNFA::states, TrgmArc::target, TrgmArcInfo::target, TrgmState::tentParent, TrgmNFA::totalTrgmCount, TSTATE_FIN, TSTATE_INIT, WISH_TRGM_PENALTY, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

1449 {
1450  HASH_SEQ_STATUS scan_status;
1451  int arcsCount = trgmNFA->arcsCount,
1452  i;
1453  TrgmState *state;
1454  ColorTrgmInfo *colorTrgms;
1455  int64 totalTrgmCount;
1456  float4 totalTrgmPenalty;
1457  int number;
1458 
1459  /* Collect color trigrams from all arcs */
1460  colorTrgms = (ColorTrgmInfo *) palloc(sizeof(ColorTrgmInfo) * arcsCount);
1461  trgmNFA->colorTrgms = colorTrgms;
1462 
1463  i = 0;
1464  hash_seq_init(&scan_status, trgmNFA->states);
1465  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1466  {
1467  ListCell *cell;
1468 
1469  foreach(cell, state->arcs)
1470  {
1471  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1472  TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
1473 
1474  arcInfo->source = state;
1475  arcInfo->target = arc->target;
1476  colorTrgms[i].arcs = list_make1(arcInfo);
1477  colorTrgms[i].expanded = true;
1478  colorTrgms[i].ctrgm = arc->ctrgm;
1479  i++;
1480  }
1481  }
1482  Assert(i == arcsCount);
1483 
1484  /* Remove duplicates, merging their arcs lists */
1485  if (arcsCount >= 2)
1486  {
1487  ColorTrgmInfo *p1,
1488  *p2;
1489 
1490  /* Sort trigrams to ease duplicate detection */
1491  qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
1492 
1493  /* p1 is probe point, p2 is last known non-duplicate. */
1494  p2 = colorTrgms;
1495  for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1496  {
1497  if (colorTrgmInfoCmp(p1, p2) > 0)
1498  {
1499  p2++;
1500  *p2 = *p1;
1501  }
1502  else
1503  {
1504  p2->arcs = list_concat(p2->arcs, p1->arcs);
1505  }
1506  }
1507  trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
1508  }
1509  else
1510  {
1511  trgmNFA->colorTrgmsCount = arcsCount;
1512  }
1513 
1514  /*
1515  * Count number of simple trigrams generated by each color trigram, and
1516  * also compute a penalty value, which is the number of simple trigrams
1517  * times a multiplier that depends on its whitespace content.
1518  *
1519  * Note: per-color-trigram counts cannot overflow an int so long as
1520  * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
1521  * 1290. However, the grand total totalTrgmCount might conceivably
1522  * overflow an int, so we use int64 for that within this routine. Also,
1523  * penalties are calculated in float4 arithmetic to avoid any overflow
1524  * worries.
1525  */
1526  totalTrgmCount = 0;
1527  totalTrgmPenalty = 0.0f;
1528  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1529  {
1530  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1531  int j,
1532  count = 1,
1533  typeIndex = 0;
1534 
1535  for (j = 0; j < 3; j++)
1536  {
1537  TrgmColor c = trgmInfo->ctrgm.colors[j];
1538 
1539  typeIndex *= 2;
1540  if (c == COLOR_BLANK)
1541  typeIndex++;
1542  else
1543  count *= trgmNFA->colorInfo[c].wordCharsCount;
1544  }
1545  trgmInfo->count = count;
1546  totalTrgmCount += count;
1547  trgmInfo->penalty = penalties[typeIndex] * (float4) count;
1548  totalTrgmPenalty += trgmInfo->penalty;
1549  }
1550 
1551  /* Sort color trigrams in descending order of their penalties */
1552  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1554 
1555  /*
1556  * Remove color trigrams from the graph so long as total penalty of color
1557  * trigrams exceeds WISH_TRGM_PENALTY. (If we fail to get down to
1558  * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
1559  * MAX_TRGM_COUNT.) We prefer to remove color trigrams with higher
1560  * penalty, since those are the most promising for reducing the total
1561  * penalty. When removing a color trigram we have to merge states
1562  * connected by arcs labeled with that trigram. It's necessary to not
1563  * merge initial and final states, because our graph becomes useless if
1564  * that happens; so we cannot always remove the trigram we'd prefer to.
1565  */
1566  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1567  {
1568  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1569  bool canRemove = true;
1570  ListCell *cell;
1571 
1572  /* Done if we've reached the target */
1573  if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
1574  break;
1575 
1576  /*
1577  * Does any arc of this color trigram connect initial and final
1578  * states? If so we can't remove it.
1579  */
1580  foreach(cell, trgmInfo->arcs)
1581  {
1582  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1583  TrgmState *source = arcInfo->source,
1584  *target = arcInfo->target;
1585  int source_flags,
1586  target_flags;
1587 
1588  /* examine parent states, if any merging has already happened */
1589  while (source->parent)
1590  source = source->parent;
1591  while (target->parent)
1592  target = target->parent;
1593 
1594  /* we must also consider merges we are planning right now */
1595  source_flags = source->flags;
1596  while (source->tentParent)
1597  {
1598  source = source->tentParent;
1599  source_flags |= source->flags;
1600  }
1601  target_flags = target->flags;
1602  while (target->tentParent)
1603  {
1604  target = target->tentParent;
1605  target_flags |= target->flags;
1606  }
1607 
1608  /* would fully-merged state have both INIT and FIN set? */
1609  if (((source_flags | target_flags) & (TSTATE_INIT | TSTATE_FIN)) ==
1610  (TSTATE_INIT | TSTATE_FIN))
1611  {
1612  canRemove = false;
1613  break;
1614  }
1615 
1616  /* ok so far, so remember planned merge */
1617  if (source != target)
1618  target->tentParent = source;
1619  }
1620 
1621  /* We must clear all the tentParent fields before continuing */
1622  foreach(cell, trgmInfo->arcs)
1623  {
1624  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1625  TrgmState *target = arcInfo->target;
1626  TrgmState *ttarget;
1627 
1628  while (target->parent)
1629  target = target->parent;
1630 
1631  while ((ttarget = target->tentParent) != NULL)
1632  {
1633  target->tentParent = NULL;
1634  target = ttarget;
1635  }
1636  }
1637 
1638  /* Now, move on if we can't drop this trigram */
1639  if (!canRemove)
1640  continue;
1641 
1642  /* OK, merge states linked by each arc labeled by the trigram */
1643  foreach(cell, trgmInfo->arcs)
1644  {
1645  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1646  TrgmState *source = arcInfo->source,
1647  *target = arcInfo->target;
1648 
1649  while (source->parent)
1650  source = source->parent;
1651  while (target->parent)
1652  target = target->parent;
1653  if (source != target)
1654  {
1655  mergeStates(source, target);
1656  /* Assert we didn't merge initial and final states */
1657  Assert((source->flags & (TSTATE_INIT | TSTATE_FIN)) !=
1658  (TSTATE_INIT | TSTATE_FIN));
1659  }
1660  }
1661 
1662  /* Mark trigram unexpanded, and update totals */
1663  trgmInfo->expanded = false;
1664  totalTrgmCount -= trgmInfo->count;
1665  totalTrgmPenalty -= trgmInfo->penalty;
1666  }
1667 
1668  /* Did we succeed in fitting into MAX_TRGM_COUNT? */
1669  if (totalTrgmCount > MAX_TRGM_COUNT)
1670  return false;
1671 
1672  trgmNFA->totalTrgmCount = (int) totalTrgmCount;
1673 
1674  /*
1675  * Sort color trigrams by colors (will be useful for bsearch in packGraph)
1676  * and enumerate the color trigrams that are expanded.
1677  */
1678  number = 0;
1679  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1681  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1682  {
1683  if (colorTrgms[i].expanded)
1684  {
1685  colorTrgms[i].number = number;
1686  number++;
1687  }
1688  }
1689 
1690  return true;
1691 }
static void mergeStates(TrgmState *state1, TrgmState *state2)
Definition: trgm_regexp.c:1800
ColorTrgm ctrgm
Definition: trgm_regexp.c:345
struct TrgmState * tentParent
Definition: trgm_regexp.c:336
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:400
float4 penalty
Definition: trgm_regexp.c:374
Definition: regguts.h:276
TrgmColor colors[3]
Definition: trgm_regexp.c:300
HTAB * states
Definition: trgm_regexp.c:404
#define WISH_TRGM_PENALTY
Definition: trgm_regexp.c:222
List * list_concat(List *list1, List *list2)
Definition: list.c:321
#define TSTATE_FIN
Definition: trgm_regexp.c:327
#define TSTATE_INIT
Definition: trgm_regexp.c:326
int totalTrgmCount
Definition: trgm_regexp.c:416
#define list_make1(x1)
Definition: pg_list.h:133
char * c
TrgmState * target
Definition: trgm_regexp.c:357
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:414
TrgmState * target
Definition: trgm_regexp.c:346
List * arcs
Definition: trgm_regexp.c:332
TrgmState * source
Definition: trgm_regexp.c:356
int arcsCount
Definition: trgm_regexp.c:410
#define COLOR_BLANK
Definition: trgm_regexp.c:287
float float4
Definition: c.h:377
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1830
int colorTrgmsCount
Definition: trgm_regexp.c:415
#define MAX_TRGM_COUNT
Definition: trgm_regexp.c:221
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1353
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1343
const float4 penalties[8]
Definition: trgm_regexp.c:229
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1817
struct TrgmState * parent
Definition: trgm_regexp.c:335
void * palloc(Size size)
Definition: mcxt.c:891
int i
ColorTrgm ctrgm
Definition: trgm_regexp.c:371
#define qsort(a, b, c, d)
Definition: port.h:440
int TrgmColor
Definition: trgm_regexp.c:283
static void transformGraph ( TrgmNFA trgmNFA)
static

Definition at line 901 of file trgm_regexp.c.

References TrgmNFA::arcsCount, COLOR_UNKNOWN, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::flags, getState(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, hash_get_num_entries(), HASHCTL::hcxt, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, linitial, list_delete_first(), MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, TrgmNFA::states, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

902 {
903  HASHCTL hashCtl;
904  TrgmStateKey initkey;
905  TrgmState *initstate;
906 
907  /* Initialize this stage's workspace in trgmNFA struct */
908  trgmNFA->queue = NIL;
909  trgmNFA->keysQueue = NIL;
910  trgmNFA->arcsCount = 0;
911  trgmNFA->overflowed = false;
912 
913  /* Create hashtable for states */
914  hashCtl.keysize = sizeof(TrgmStateKey);
915  hashCtl.entrysize = sizeof(TrgmState);
916  hashCtl.hcxt = CurrentMemoryContext;
917  trgmNFA->states = hash_create("Trigram NFA",
918  1024,
919  &hashCtl,
921 
922  /* Create initial state: ambiguous prefix, NFA's initial state */
923  MemSet(&initkey, 0, sizeof(initkey));
924  initkey.prefix.colors[0] = COLOR_UNKNOWN;
925  initkey.prefix.colors[1] = COLOR_UNKNOWN;
926  initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
927 
928  initstate = getState(trgmNFA, &initkey);
929  initstate->flags |= TSTATE_INIT;
930  trgmNFA->initState = initstate;
931 
932  /*
933  * Recursively build the expanded graph by processing queue of states
934  * (breadth-first search). getState already put initstate in the queue.
935  */
936  while (trgmNFA->queue != NIL)
937  {
938  TrgmState *state = (TrgmState *) linitial(trgmNFA->queue);
939 
940  trgmNFA->queue = list_delete_first(trgmNFA->queue);
941 
942  /*
943  * If we overflowed then just mark state as final. Otherwise do
944  * actual processing.
945  */
946  if (trgmNFA->overflowed)
947  state->flags |= TSTATE_FIN;
948  else
949  processState(trgmNFA, state);
950 
951  /* Did we overflow? */
952  if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
954  trgmNFA->overflowed = true;
955  }
956 }
#define MAX_EXPANDED_ARCS
Definition: trgm_regexp.c:220
#define NIL
Definition: pg_list.h:69
struct TrgmState TrgmState
#define HASH_CONTEXT
Definition: hsearch.h:93
#define HASH_ELEM
Definition: hsearch.h:87
MemoryContext hcxt
Definition: hsearch.h:78
regex_t * regex
Definition: trgm_regexp.c:399
HTAB * states
Definition: trgm_regexp.c:404
Size entrysize
Definition: hsearch.h:73
#define MemSet(start, val, len)
Definition: c.h:853
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1297
TrgmState * initState
Definition: trgm_regexp.c:405
#define TSTATE_FIN
Definition: trgm_regexp.c:327
#define TSTATE_INIT
Definition: trgm_regexp.c:326
bool overflowed
Definition: trgm_regexp.c:411
#define linitial(l)
Definition: pg_list.h:110
TrgmColor colors[2]
Definition: trgm_regexp.c:291
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
int arcsCount
Definition: trgm_regexp.c:410
List * queue
Definition: trgm_regexp.c:408
#define HASH_BLOBS
Definition: hsearch.h:88
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:301
Size keysize
Definition: hsearch.h:72
Definition: regguts.h:298
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1377
TrgmPrefix prefix
Definition: trgm_regexp.c:310
#define MAX_EXPANDED_STATES
Definition: trgm_regexp.c:219
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:286
List * keysQueue
Definition: trgm_regexp.c:409
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:962
List * list_delete_first(List *list)
Definition: list.c:666
bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 635 of file trgm_regexp.c.

References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

636 {
637  int i,
638  j,
639  k,
640  queueIn,
641  queueOut;
642 
643  /*
644  * Reset temporary working areas.
645  */
646  memset(graph->colorTrigramsActive, 0,
647  sizeof(bool) * graph->colorTrigramsCount);
648  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
649 
650  /*
651  * Check which color trigrams were matched. A match for any simple
652  * trigram associated with a color trigram counts as a match of the color
653  * trigram.
654  */
655  j = 0;
656  for (i = 0; i < graph->colorTrigramsCount; i++)
657  {
658  int cnt = graph->colorTrigramGroups[i];
659 
660  for (k = j; k < j + cnt; k++)
661  {
662  if (check[k])
663  {
664  /*
665  * Found one matched trigram in the group. Can skip the rest
666  * of them and go to the next group.
667  */
668  graph->colorTrigramsActive[i] = true;
669  break;
670  }
671  }
672  j = j + cnt;
673  }
674 
675  /*
676  * Initialize the statesQueue to hold just the initial state. Note:
677  * statesQueue has room for statesCount entries, which is certainly enough
678  * since no state will be put in the queue more than once. The
679  * statesActive array marks which states have been queued.
680  */
681  graph->statesActive[0] = true;
682  graph->statesQueue[0] = 0;
683  queueIn = 0;
684  queueOut = 1;
685 
686  /* Process queued states as long as there are any. */
687  while (queueIn < queueOut)
688  {
689  int stateno = graph->statesQueue[queueIn++];
690  TrgmPackedState *state = &graph->states[stateno];
691  int cnt = state->arcsCount;
692 
693  /* Loop over state's out-arcs */
694  for (i = 0; i < cnt; i++)
695  {
696  TrgmPackedArc *arc = &state->arcs[i];
697 
698  /*
699  * If corresponding color trigram is present then activate the
700  * corresponding state. We're done if that's the final state,
701  * otherwise queue the state if it's not been queued already.
702  */
703  if (graph->colorTrigramsActive[arc->colorTrgm])
704  {
705  int nextstate = arc->targetState;
706 
707  if (nextstate == 1)
708  return true; /* success: final state is reachable */
709 
710  if (!graph->statesActive[nextstate])
711  {
712  graph->statesActive[nextstate] = true;
713  graph->statesQueue[queueOut++] = nextstate;
714  }
715  }
716  }
717  }
718 
719  /* Queue is empty, so match fails. */
720  return false;
721 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:457
Definition: regguts.h:276
TrgmPackedState * states
Definition: trgm_regexp.c:454
TrgmPackedArc * arcs
Definition: trgm_regexp.c:431
bool * statesActive
Definition: trgm_regexp.c:458
Definition: regguts.h:298
int i
int * colorTrigramGroups
Definition: trgm_regexp.c:447
static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
)
static

Definition at line 1322 of file trgm_regexp.c.

References Assert, COLOR_BLANK, COLOR_UNKNOWN, TrgmPrefix::colors, and TrgmStateKey::prefix.

Referenced by addArc(), and addKey().

1323 {
1324  /*
1325  * We have to know full trigram in order to add outgoing arc. So we can't
1326  * do it if prefix is ambiguous.
1327  */
1328  if (key->prefix.colors[0] == COLOR_UNKNOWN)
1329  return false;
1330 
1331  /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
1332  Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
1333  /* And we should not be called with an unknown arc color anytime */
1334  Assert(co != COLOR_UNKNOWN);
1335 
1336  /*
1337  * We don't bother with making arcs representing three non-word
1338  * characters, since that's useless for trigram extraction.
1339  */
1340  if (key->prefix.colors[0] == COLOR_BLANK &&
1341  key->prefix.colors[1] == COLOR_BLANK &&
1342  co == COLOR_BLANK)
1343  return false;
1344 
1345  /*
1346  * We also reject nonblank-blank-anything. The nonblank-blank-nonblank
1347  * case doesn't correspond to any trigram the trigram extraction code
1348  * would make. The nonblank-blank-blank case is also not possible with
1349  * RPADDING = 1. (Note that in many cases we'd fail to generate such a
1350  * trigram even if it were valid, for example processing "foo bar" will
1351  * not result in considering the trigram "o ". So if you want to support
1352  * RPADDING = 2, there's more to do than just twiddle this test.)
1353  */
1354  if (key->prefix.colors[0] != COLOR_BLANK &&
1355  key->prefix.colors[1] == COLOR_BLANK)
1356  return false;
1357 
1358  /*
1359  * Other combinations involving blank are valid, in particular we assume
1360  * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
1361  *
1362  * Note: Using again the example "foo bar", we will not consider the
1363  * trigram " b", though this trigram would be found by the trigram
1364  * extraction code. Since we will find " ba", it doesn't seem worth
1365  * trying to hack the algorithm to generate the additional trigram.
1366  */
1367 
1368  /* arc label is valid */
1369  return true;
1370 }
TrgmColor colors[2]
Definition: trgm_regexp.c:291
#define COLOR_BLANK
Definition: trgm_regexp.c:287
#define Assert(condition)
Definition: c.h:671
TrgmPrefix prefix
Definition: trgm_regexp.c:310
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:286

Variable Documentation

const float4 penalties[8]
Initial value:
= {
1.0f,
3.5f,
0.0f,
0.0f,
4.2f,
2.1f,
25.0f,
0.0f
}

Definition at line 229 of file trgm_regexp.c.

Referenced by selectColorTrigrams().