PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
trgm_regexp.c File Reference
#include "postgres.h"
#include "trgm.h"
#include "regex/regexport.h"
#include "tsearch/ts_locale.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
 
struct  TrgmColorInfo
 
struct  TrgmPrefix
 
struct  ColorTrgm
 
struct  TrgmStateKey
 
struct  TrgmState
 
struct  TrgmArc
 
struct  TrgmArcInfo
 
struct  ColorTrgmInfo
 
struct  TrgmNFA
 
struct  TrgmPackedArc
 
struct  TrgmPackedState
 
struct  TrgmPackedGraph
 
struct  TrgmPackArcInfo
 

Macros

#define MAX_EXPANDED_STATES   128
 
#define MAX_EXPANDED_ARCS   1024
 
#define MAX_TRGM_COUNT   256
 
#define WISH_TRGM_PENALTY   16
 
#define COLOR_COUNT_LIMIT   256
 
#define COLOR_UNKNOWN   (-1)
 
#define COLOR_BLANK   (-2)
 
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */
 
#define TSTATE_FIN   0x02 /* flag indicating this state is final */
 

Typedefs

typedef int TrgmColor
 
typedef struct TrgmState TrgmState
 

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
 
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
 
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
 
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
 
static void transformGraph (TrgmNFA *trgmNFA)
 
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
 
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
 
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
 
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
 
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
 
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
 
static void mergeStates (TrgmState *state1, TrgmState *state2)
 
static int colorTrgmInfoCmp (const void *p1, const void *p2)
 
static int colorTrgmInfoPenaltyCmp (const void *p1, const void *p2)
 
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static int packArcInfoCmp (const void *a1, const void *a2)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

static const float4 penalties [8]
 

Macro Definition Documentation

#define COLOR_BLANK   (-2)
#define COLOR_COUNT_LIMIT   256

Definition at line 224 of file trgm_regexp.c.

Referenced by getColorInfo().

#define COLOR_UNKNOWN   (-1)

Definition at line 287 of file trgm_regexp.c.

Referenced by addKey(), prefixContains(), transformGraph(), and validArcLabel().

#define MAX_EXPANDED_ARCS   1024

Definition at line 221 of file trgm_regexp.c.

Referenced by transformGraph().

#define MAX_EXPANDED_STATES   128

Definition at line 220 of file trgm_regexp.c.

Referenced by transformGraph().

#define MAX_TRGM_COUNT   256

Definition at line 222 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

#define TSTATE_FIN   0x02 /* flag indicating this state is final */
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */

Definition at line 330 of file trgm_regexp.c.

Referenced by packGraph(), selectColorTrigrams(), and transformGraph().

#define WISH_TRGM_PENALTY   16

Definition at line 223 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

Typedef Documentation

Definition at line 284 of file trgm_regexp.c.

Function Documentation

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
)
static

Definition at line 1287 of file trgm_regexp.c.

References TrgmState::arcs, TrgmNFA::arcsCount, TrgmPrefix::colors, ColorTrgm::colors, TrgmArc::ctrgm, TrgmState::enterKeys, getState(), lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), TrgmArc::target, and validArcLabel().

Referenced by addArcs().

1289 {
1290  TrgmArc *arc;
1291  ListCell *cell;
1292 
1293  /* Do nothing if this wouldn't be a valid arc label trigram */
1294  if (!validArcLabel(key, co))
1295  return;
1296 
1297  /*
1298  * Check if we are going to reach key which is covered by a key which is
1299  * already listed in this state. If so arc is useless: the NFA can bypass
1300  * it through a path that doesn't require any predictable trigram, so
1301  * whether the arc's trigram is present or not doesn't really matter.
1302  */
1303  foreach(cell, state->enterKeys)
1304  {
1305  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1306 
1307  if (existingKey->nstate == destKey->nstate &&
1308  prefixContains(&existingKey->prefix, &destKey->prefix))
1309  return;
1310  }
1311 
1312  /* Checks were successful, add new arc */
1313  arc = (TrgmArc *) palloc(sizeof(TrgmArc));
1314  arc->target = getState(trgmNFA, destKey);
1315  arc->ctrgm.colors[0] = key->prefix.colors[0];
1316  arc->ctrgm.colors[1] = key->prefix.colors[1];
1317  arc->ctrgm.colors[2] = co;
1318 
1319  state->arcs = lappend(state->arcs, arc);
1320  trgmNFA->arcsCount++;
1321 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:350
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1415
Definition: regguts.h:276
TrgmColor colors[3]
Definition: trgm_regexp.c:301
TrgmColor colors[2]
Definition: trgm_regexp.c:292
List * enterKeys
Definition: trgm_regexp.c:337
TrgmState * target
Definition: trgm_regexp.c:351
List * arcs
Definition: trgm_regexp.c:336
List * lappend(List *list, void *datum)
Definition: list.c:128
int arcsCount
Definition: trgm_regexp.c:416
#define lfirst(lc)
Definition: pg_list.h:106
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1329
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1384
TrgmPrefix prefix
Definition: trgm_regexp.c:311
void * palloc(Size size)
Definition: mcxt.c:849
static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 1195 of file trgm_regexp.c.

References addArc(), regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, i, lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, regex_arc_t::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

1196 {
1197  TrgmStateKey destKey;
1198  ListCell *cell;
1199  regex_arc_t *arcs;
1200  int arcsCount,
1201  i;
1202 
1203  /*
1204  * Ensure any pad bytes in destKey are zero, since it may get used as a
1205  * hashtable key by getState.
1206  */
1207  MemSet(&destKey, 0, sizeof(destKey));
1208 
1209  /*
1210  * Iterate over enter keys associated with this expanded-graph state. This
1211  * includes both the state's own stateKey, and any enter keys we added to
1212  * it during addKey (which represent expanded-graph states that are not
1213  * distinguishable from this one by means of trigrams). For each such
1214  * enter key, examine all the out-arcs of the key's underlying NFA state,
1215  * and try to make a trigram arc leading to where the out-arc leads.
1216  * (addArc will deal with whether the arc is valid or not.)
1217  */
1218  foreach(cell, state->enterKeys)
1219  {
1220  TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
1221 
1222  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1223  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1224  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1225 
1226  for (i = 0; i < arcsCount; i++)
1227  {
1228  regex_arc_t *arc = &arcs[i];
1229  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1230 
1231  /*
1232  * Ignore non-expandable colors; addKey already handled the case.
1233  *
1234  * We need no special check for begin/end pseudocolors here. We
1235  * don't need to do any processing for them, and they will be
1236  * marked non-expandable since the regex engine will have reported
1237  * them that way.
1238  */
1239  if (!colorInfo->expandable)
1240  continue;
1241 
1242  if (colorInfo->containsNonWord)
1243  {
1244  /*
1245  * Color includes non-word character(s).
1246  *
1247  * Generate an arc, treating this transition as occurring on
1248  * BLANK. This allows word-ending trigrams to be manufactured
1249  * if possible.
1250  */
1251  destKey.prefix.colors[0] = key->prefix.colors[1];
1252  destKey.prefix.colors[1] = COLOR_BLANK;
1253  destKey.nstate = arc->to;
1254 
1255  addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
1256  }
1257 
1258  if (colorInfo->wordCharsCount > 0)
1259  {
1260  /*
1261  * Color includes word character(s).
1262  *
1263  * Generate an arc. Color is pushed into prefix of target
1264  * state.
1265  */
1266  destKey.prefix.colors[0] = key->prefix.colors[1];
1267  destKey.prefix.colors[1] = arc->co;
1268  destKey.nstate = arc->to;
1269 
1270  addArc(trgmNFA, state, key, arc->co, &destKey);
1271  }
1272  }
1273 
1274  pfree(arcs);
1275  }
1276 }
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
Definition: trgm_regexp.c:1287
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:405
regex_t * regex
Definition: trgm_regexp.c:404
Definition: regguts.h:276
#define MemSet(start, val, len)
Definition: c.h:857
bool containsNonWord
Definition: trgm_regexp.c:263
void pfree(void *pointer)
Definition: mcxt.c:950
TrgmColor colors[2]
Definition: trgm_regexp.c:292
List * enterKeys
Definition: trgm_regexp.c:337
#define COLOR_BLANK
Definition: trgm_regexp.c:288
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
#define lfirst(lc)
Definition: pg_list.h:106
TrgmPrefix prefix
Definition: trgm_regexp.c:311
void * palloc(Size size)
Definition: mcxt.c:849
int i
static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
)
static

Definition at line 1012 of file trgm_regexp.c.

References addKeyToQueue(), regex_arc_t::co, COLOR_BLANK, COLOR_UNKNOWN, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, TrgmState::flags, i, lappend(), lfirst, list_delete_cell(), list_head(), lnext, MemSet, next, TrgmStateKey::nstate, NULL, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, regex_arc_t::to, TSTATE_FIN, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

1013 {
1014  regex_arc_t *arcs;
1015  TrgmStateKey destKey;
1016  ListCell *cell,
1017  *prev,
1018  *next;
1019  int i,
1020  arcsCount;
1021 
1022  /*
1023  * Ensure any pad bytes in destKey are zero, since it may get used as a
1024  * hashtable key by getState.
1025  */
1026  MemSet(&destKey, 0, sizeof(destKey));
1027 
1028  /*
1029  * Compare key to each existing enter key of the state to check for
1030  * redundancy. We can drop either old key(s) or the new key if we find
1031  * redundancy.
1032  */
1033  prev = NULL;
1034  cell = list_head(state->enterKeys);
1035  while (cell)
1036  {
1037  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1038 
1039  next = lnext(cell);
1040  if (existingKey->nstate == key->nstate)
1041  {
1042  if (prefixContains(&existingKey->prefix, &key->prefix))
1043  {
1044  /* This old key already covers the new key. Nothing to do */
1045  return;
1046  }
1047  if (prefixContains(&key->prefix, &existingKey->prefix))
1048  {
1049  /*
1050  * The new key covers this old key. Remove the old key, it's
1051  * no longer needed once we add this key to the list.
1052  */
1053  state->enterKeys = list_delete_cell(state->enterKeys,
1054  cell, prev);
1055  }
1056  else
1057  prev = cell;
1058  }
1059  else
1060  prev = cell;
1061  cell = next;
1062  }
1063 
1064  /* No redundancy, so add this key to the state's list */
1065  state->enterKeys = lappend(state->enterKeys, key);
1066 
1067  /* If state is now known final, mark it and we're done */
1068  if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
1069  {
1070  state->flags |= TSTATE_FIN;
1071  return;
1072  }
1073 
1074  /*
1075  * Loop through all outgoing arcs of the corresponding state in the
1076  * original NFA.
1077  */
1078  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1079  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1080  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1081 
1082  for (i = 0; i < arcsCount; i++)
1083  {
1084  regex_arc_t *arc = &arcs[i];
1085 
1086  if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
1087  {
1088  /*
1089  * Start of line/string (^). Trigram extraction treats start of
1090  * line same as start of word: double space prefix is added.
1091  * Hence, make an enter key showing we can reach the arc
1092  * destination with all-blank prefix.
1093  */
1094  destKey.prefix.colors[0] = COLOR_BLANK;
1095  destKey.prefix.colors[1] = COLOR_BLANK;
1096  destKey.nstate = arc->to;
1097 
1098  /* Add enter key to this state */
1099  addKeyToQueue(trgmNFA, &destKey);
1100  }
1101  else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
1102  {
1103  /*
1104  * End of line/string ($). We must consider this arc as a
1105  * transition that doesn't read anything. The reason for adding
1106  * this enter key to the state is that if the arc leads to the
1107  * NFA's final state, we must mark this expanded state as final.
1108  */
1109  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1110  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1111  destKey.nstate = arc->to;
1112 
1113  /* Add enter key to this state */
1114  addKeyToQueue(trgmNFA, &destKey);
1115  }
1116  else
1117  {
1118  /* Regular color */
1119  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1120 
1121  if (colorInfo->expandable)
1122  {
1123  if (colorInfo->containsNonWord &&
1124  !validArcLabel(key, COLOR_BLANK))
1125  {
1126  /*
1127  * We can reach the arc destination after reading a
1128  * non-word character, but the prefix is not something
1129  * that addArc will accept with COLOR_BLANK, so no trigram
1130  * arc can get made for this transition. We must make an
1131  * enter key to show that the arc destination is
1132  * reachable. Set it up with an all-blank prefix, since
1133  * that corresponds to what the trigram extraction code
1134  * will do at a word starting boundary.
1135  */
1136  destKey.prefix.colors[0] = COLOR_BLANK;
1137  destKey.prefix.colors[1] = COLOR_BLANK;
1138  destKey.nstate = arc->to;
1139  addKeyToQueue(trgmNFA, &destKey);
1140  }
1141 
1142  if (colorInfo->wordCharsCount > 0 &&
1143  !validArcLabel(key, arc->co))
1144  {
1145  /*
1146  * We can reach the arc destination after reading a word
1147  * character, but the prefix is not something that addArc
1148  * will accept, so no trigram arc can get made for this
1149  * transition. We must make an enter key to show that the
1150  * arc destination is reachable. The prefix for the enter
1151  * key should reflect the info we have for this arc.
1152  */
1153  destKey.prefix.colors[0] = key->prefix.colors[1];
1154  destKey.prefix.colors[1] = arc->co;
1155  destKey.nstate = arc->to;
1156  addKeyToQueue(trgmNFA, &destKey);
1157  }
1158  }
1159  else
1160  {
1161  /*
1162  * Unexpandable color. Add enter key with ambiguous prefix,
1163  * showing we can reach the destination from this state, but
1164  * the preceding colors will be uncertain. (We do not set the
1165  * first prefix color to key->prefix.colors[1], because a
1166  * prefix of known followed by unknown is invalid.)
1167  */
1168  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1169  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1170  destKey.nstate = arc->to;
1171  addKeyToQueue(trgmNFA, &destKey);
1172  }
1173  }
1174  }
1175 
1176  pfree(arcs);
1177 }
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
static int32 next
Definition: blutils.c:210
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1415
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:405
regex_t * regex
Definition: trgm_regexp.c:404
Definition: regguts.h:276
#define MemSet(start, val, len)
Definition: c.h:857
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
#define TSTATE_FIN
Definition: trgm_regexp.c:331
bool containsNonWord
Definition: trgm_regexp.c:263
void pfree(void *pointer)
Definition: mcxt.c:950
TrgmColor colors[2]
Definition: trgm_regexp.c:292
List * enterKeys
Definition: trgm_regexp.c:337
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
#define lnext(lc)
Definition: pg_list.h:105
List * lappend(List *list, void *datum)
Definition: list.c:128
#define COLOR_BLANK
Definition: trgm_regexp.c:288
List * list_delete_cell(List *list, ListCell *cell, ListCell *prev)
Definition: list.c:528
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1183
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
#define NULL
Definition: c.h:229
#define lfirst(lc)
Definition: pg_list.h:106
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1329
TrgmPrefix prefix
Definition: trgm_regexp.c:311
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:287
void * palloc(Size size)
Definition: mcxt.c:849
int i
static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1183 of file trgm_regexp.c.

References TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

1184 {
1185  TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
1186 
1187  memcpy(keyCopy, key, sizeof(TrgmStateKey));
1188  trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
1189 }
List * lappend(List *list, void *datum)
Definition: list.c:128
void * palloc(Size size)
Definition: mcxt.c:849
List * keysQueue
Definition: trgm_regexp.c:415
static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1893 of file trgm_regexp.c.

References ColorTrgmInfo::ctrgm.

Referenced by packGraph(), and selectColorTrigrams().

1894 {
1895  const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
1896  const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
1897 
1898  return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
1899 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:376
static int colorTrgmInfoPenaltyCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1906 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

1907 {
1908  float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
1909  float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
1910 
1911  if (penalty1 < penalty2)
1912  return 1;
1913  else if (penalty1 == penalty2)
1914  return 0;
1915  else
1916  return -1;
1917 }
float float4
Definition: c.h:380
static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
)
static

Definition at line 841 of file trgm_regexp.c.

References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().

Referenced by getColorInfo().

842 {
843  /* "s" has enough space for a multibyte character and a trailing NUL */
844  char s[MAX_MULTIBYTE_CHAR_LEN + 1];
845 
846  /*
847  * We can ignore the NUL character, since it can never appear in a PG text
848  * string. This avoids the need for various special cases when
849  * reconstructing trigrams.
850  */
851  if (c == 0)
852  return false;
853 
854  /* Do the conversion, making sure the result is NUL-terminated */
855  memset(s, 0, sizeof(s));
856  pg_wchar2mb_with_len(&c, s, 1);
857 
858  /*
859  * In IGNORECASE mode, we can ignore uppercase characters. We assume that
860  * the regex engine generated both uppercase and lowercase equivalents
861  * within each color, since we used the REG_ICASE option; so there's no
862  * need to process the uppercase version.
863  *
864  * XXX this code is dependent on the assumption that lowerstr() works the
865  * same as the regex engine's internal case folding machinery. Might be
866  * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
867  * On the other hand, the trigrams in the index were created using
868  * lowerstr(), so we're probably screwed if there's any incompatibility
869  * anyway.
870  */
871 #ifdef IGNORECASE
872  {
873  char *lowerCased = lowerstr(s);
874 
875  if (strcmp(lowerCased, s) != 0)
876  {
877  pfree(lowerCased);
878  return false;
879  }
880  pfree(lowerCased);
881  }
882 #endif
883 
884  /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
885  memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
886  return true;
887 }
char * lowerstr(const char *str)
Definition: ts_locale.c:231
void pfree(void *pointer)
Definition: mcxt.c:950
char * c
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:244
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:756
TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 523 of file trgm_regexp.c.

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate(), createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_CATCH, PG_END_TRY, PG_RE_THROW, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

525 {
526  TRGM *trg;
527  regex_t regex;
528  MemoryContext tmpcontext;
529  MemoryContext oldcontext;
530 
531  /*
532  * This processing generates a great deal of cruft, which we'd like to
533  * clean up before returning (since this function may be called in a
534  * query-lifespan memory context). Make a temp context we can work in so
535  * that cleanup is easy.
536  */
538  "createTrgmNFA temporary context",
540  oldcontext = MemoryContextSwitchTo(tmpcontext);
541 
542  /*
543  * Stage 1: Compile the regexp into a NFA, using the regexp library.
544  */
545 #ifdef IGNORECASE
546  RE_compile(&regex, text_re, REG_ADVANCED | REG_ICASE, collation);
547 #else
548  RE_compile(&regex, text_re, REG_ADVANCED, collation);
549 #endif
550 
551  /*
552  * Since the regexp library allocates its internal data structures with
553  * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
554  * done even if there's an error.
555  */
556  PG_TRY();
557  {
558  trg = createTrgmNFAInternal(&regex, graph, rcontext);
559  }
560  PG_CATCH();
561  {
562  pg_regfree(&regex);
563  PG_RE_THROW();
564  }
565  PG_END_TRY();
566 
567  pg_regfree(&regex);
568 
569  /* Clean up all the cruft we created */
570  MemoryContextSwitchTo(oldcontext);
571  MemoryContextDelete(tmpcontext);
572 
573  return trg;
574 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:580
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define REG_ICASE
Definition: regex.h:106
Definition: trgm.h:63
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:165
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
#define REG_ADVANCED
Definition: regex.h:103
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
#define PG_CATCH()
Definition: elog.h:293
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:734
#define PG_RE_THROW()
Definition: elog.h:314
#define PG_TRY()
Definition: elog.h:284
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: regex.h:55
#define PG_END_TRY()
Definition: elog.h:300
static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)
static

Definition at line 580 of file trgm_regexp.c.

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::flags, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, NULL, packGraph(), TrgmNFA::regex, selectColorTrigrams(), transformGraph(), and TSTATE_FIN.

Referenced by createTrgmNFA().

582 {
583  TRGM *trg;
584  TrgmNFA trgmNFA;
585 
586  trgmNFA.regex = regex;
587 
588  /* Collect color information from the regex */
589  getColorInfo(regex, &trgmNFA);
590 
591 #ifdef TRGM_REGEXP_DEBUG
592  printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
593 #endif
594 
595  /*
596  * Stage 2: Create an expanded graph from the source NFA.
597  */
598  transformGraph(&trgmNFA);
599 
600 #ifdef TRGM_REGEXP_DEBUG
601  printTrgmNFA(&trgmNFA);
602 #endif
603 
604  /*
605  * Fail if we were unable to make a nontrivial graph, ie it is possible to
606  * get from the initial state to the final state without reading any
607  * predictable trigram.
608  */
609  if (trgmNFA.initState->flags & TSTATE_FIN)
610  return NULL;
611 
612  /*
613  * Stage 3: Select color trigrams to expand. Fail if too many trigrams.
614  */
615  if (!selectColorTrigrams(&trgmNFA))
616  return NULL;
617 
618  /*
619  * Stage 4: Expand color trigrams and pack graph into final
620  * representation.
621  */
622  trg = expandColorTrigrams(&trgmNFA, rcontext);
623 
624  *graph = packGraph(&trgmNFA, rcontext);
625 
626 #ifdef TRGM_REGEXP_DEBUG
627  printTrgmPackedGraph(*graph, trg);
628 #endif
629 
630  return trg;
631 }
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:778
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:405
regex_t * regex
Definition: trgm_regexp.c:404
TrgmState * initState
Definition: trgm_regexp.c:410
#define TSTATE_FIN
Definition: trgm_regexp.c:331
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:1457
Definition: trgm.h:63
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1931
#define NULL
Definition: c.h:229
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1776
static void transformGraph(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:907
int ncolors
Definition: trgm_regexp.c:406
static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1776 of file trgm_regexp.c.

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

1777 {
1778  TRGM *trg;
1779  trgm *p;
1780  int i;
1781  TrgmColorInfo blankColor;
1782  trgm_mb_char blankChar;
1783 
1784  /* Set up "blank" color structure containing a single zero character */
1785  memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
1786  blankColor.wordCharsCount = 1;
1787  blankColor.wordChars = &blankChar;
1788 
1789  /* Construct the trgm array */
1790  trg = (TRGM *)
1791  MemoryContextAllocZero(rcontext,
1792  TRGMHDRSIZE +
1793  trgmNFA->totalTrgmCount * sizeof(trgm));
1794  trg->flag = ARRKEY;
1795  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
1796  p = GETARR(trg);
1797  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1798  {
1799  ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
1800  TrgmColorInfo *c[3];
1801  trgm_mb_char s[3];
1802  int j,
1803  i1,
1804  i2,
1805  i3;
1806 
1807  /* Ignore any unexpanded trigrams ... */
1808  if (!colorTrgm->expanded)
1809  continue;
1810 
1811  /* Get colors, substituting the dummy struct for COLOR_BLANK */
1812  for (j = 0; j < 3; j++)
1813  {
1814  if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
1815  c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
1816  else
1817  c[j] = &blankColor;
1818  }
1819 
1820  /* Iterate over all possible combinations of colors' characters */
1821  for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
1822  {
1823  s[0] = c[0]->wordChars[i1];
1824  for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
1825  {
1826  s[1] = c[1]->wordChars[i2];
1827  for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
1828  {
1829  s[2] = c[2]->wordChars[i3];
1830  fillTrgm(p, s);
1831  p++;
1832  }
1833  }
1834  }
1835  }
1836 
1837  return trg;
1838 }
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:405
TrgmColor colors[3]
Definition: trgm_regexp.c:301
#define ARRKEY
Definition: trgm.h:94
int totalTrgmCount
Definition: trgm_regexp.c:422
#define GETARR(x)
Definition: trgm.h:104
Definition: trgm.h:63
char * c
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:420
#define TRGMHDRSIZE
Definition: trgm.h:70
#define COLOR_BLANK
Definition: trgm_regexp.c:288
uint8 flag
Definition: trgm.h:66
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:742
int colorTrgmsCount
Definition: trgm_regexp.c:421
char trgm[3]
Definition: trgm.h:38
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:52
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:244
int i
ColorTrgm ctrgm
Definition: trgm_regexp.c:376
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
trgm_mb_char * wordChars
Definition: trgm_regexp.c:265
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
Definition: trgm_regexp.c:1844
static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
)
static

Definition at line 1844 of file trgm_regexp.c.

References trgm_mb_char::bytes, compact_trigram(), i, and MAX_MULTIBYTE_CHAR_LEN.

Referenced by expandColorTrigrams().

1845 {
1846  char str[3 * MAX_MULTIBYTE_CHAR_LEN],
1847  *p;
1848  int i,
1849  j;
1850 
1851  /* Write multibyte string into "str" (we don't need null termination) */
1852  p = str;
1853 
1854  for (i = 0; i < 3; i++)
1855  {
1856  if (s[i].bytes[0] != 0)
1857  {
1858  for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
1859  *p++ = s[i].bytes[j];
1860  }
1861  else
1862  {
1863  /* Emit a space in place of COLOR_BLANK */
1864  *p++ = ' ';
1865  }
1866  }
1867 
1868  /* Convert "str" to a standard trigram (possibly hashing it) */
1869  compact_trigram(ptrgm, str, p - str);
1870 }
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:166
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:244
int i
static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
)
static

Definition at line 778 of file trgm_regexp.c.

References trgm_mb_char::bytes, chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

779 {
780  int colorsCount = pg_reg_getnumcolors(regex);
781  int i;
782 
783  trgmNFA->ncolors = colorsCount;
784  trgmNFA->colorInfo = (TrgmColorInfo *)
785  palloc0(colorsCount * sizeof(TrgmColorInfo));
786 
787  /*
788  * Loop over colors, filling TrgmColorInfo about each.
789  */
790  for (i = 0; i < colorsCount; i++)
791  {
792  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
793  int charsCount = pg_reg_getnumcharacters(regex, i);
794  pg_wchar *chars;
795  int j;
796 
797  if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
798  {
799  /* Non expandable, or too large to work with */
800  colorInfo->expandable = false;
801  continue;
802  }
803 
804  colorInfo->expandable = true;
805  colorInfo->containsNonWord = false;
806  colorInfo->wordChars = (trgm_mb_char *)
807  palloc(sizeof(trgm_mb_char) * charsCount);
808  colorInfo->wordCharsCount = 0;
809 
810  /* Extract all the chars in this color */
811  chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
812  pg_reg_getcharacters(regex, i, chars, charsCount);
813 
814  /*
815  * Convert characters back to multibyte form, and save only those that
816  * are word characters. Set "containsNonWord" if any non-word
817  * character. (Note: it'd probably be nicer to keep the chars in
818  * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
819  */
820  for (j = 0; j < charsCount; j++)
821  {
822  trgm_mb_char c;
823 
824  if (!convertPgWchar(chars[j], &c))
825  continue; /* ok to ignore it altogether */
826  if (ISWORDCHR(c.bytes))
827  colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
828  else
829  colorInfo->containsNonWord = true;
830  }
831 
832  pfree(chars);
833  }
834 }
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:405
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:229
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:265
#define ISWORDCHR(c)
Definition: trgm.h:51
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
bool containsNonWord
Definition: trgm_regexp.c:263
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
Definition: trgm_regexp.c:841
void pfree(void *pointer)
Definition: mcxt.c:950
char * c
#define COLOR_COUNT_LIMIT
Definition: trgm_regexp.c:224
unsigned int pg_wchar
Definition: mbprint.c:31
void * palloc0(Size size)
Definition: mcxt.c:878
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:244
void * palloc(Size size)
Definition: mcxt.c:849
int i
trgm_mb_char * wordChars
Definition: trgm_regexp.c:265
static char chars[TZ_MAX_CHARS]
Definition: zic.c:381
int ncolors
Definition: trgm_regexp.c:406
static TrgmState * getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1384 of file trgm_regexp.c.

References TrgmState::arcs, TrgmState::enterKeys, TrgmState::flags, HASH_ENTER, hash_search(), lappend(), NIL, TrgmNFA::nstates, NULL, TrgmState::parent, TrgmNFA::queue, TrgmState::snumber, TrgmNFA::states, TrgmState::tentFlags, and TrgmState::tentParent.

Referenced by addArc(), and transformGraph().

1385 {
1386  TrgmState *state;
1387  bool found;
1388 
1389  state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
1390  &found);
1391  if (!found)
1392  {
1393  /* New state: initialize and queue it */
1394  state->arcs = NIL;
1395  state->enterKeys = NIL;
1396  state->flags = 0;
1397  /* states are initially given negative numbers */
1398  state->snumber = -(++trgmNFA->nstates);
1399  state->parent = NULL;
1400  state->tentFlags = 0;
1401  state->tentParent = NULL;
1402 
1403  trgmNFA->queue = lappend(trgmNFA->queue, state);
1404  }
1405  return state;
1406 }
#define NIL
Definition: pg_list.h:69
struct TrgmState * tentParent
Definition: trgm_regexp.c:342
HTAB * states
Definition: trgm_regexp.c:409
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
int nstates
Definition: trgm_regexp.c:411
List * enterKeys
Definition: trgm_regexp.c:337
List * arcs
Definition: trgm_regexp.c:336
List * lappend(List *list, void *datum)
Definition: list.c:128
List * queue
Definition: trgm_regexp.c:414
#define NULL
Definition: c.h:229
int tentFlags
Definition: trgm_regexp.c:341
Definition: regguts.h:298
struct TrgmState * parent
Definition: trgm_regexp.c:340
static void mergeStates ( TrgmState state1,
TrgmState state2 
)
static

Definition at line 1876 of file trgm_regexp.c.

References Assert, TrgmState::flags, and TrgmState::parent.

Referenced by selectColorTrigrams().

1877 {
1878  Assert(state1 != state2);
1879  Assert(!state1->parent);
1880  Assert(!state2->parent);
1881 
1882  /* state1 absorbs state2's flags */
1883  state1->flags |= state2->flags;
1884 
1885  /* state2, and indirectly all its children, become children of state1 */
1886  state2->parent = state1;
1887 }
#define Assert(condition)
Definition: c.h:675
struct TrgmState * parent
Definition: trgm_regexp.c:340
static int packArcInfoCmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 2086 of file trgm_regexp.c.

References TrgmPackArcInfo::colorTrgm, TrgmPackArcInfo::sourceState, and TrgmPackArcInfo::targetState.

Referenced by packGraph().

2087 {
2088  const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
2089  const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
2090 
2091  if (p1->sourceState < p2->sourceState)
2092  return -1;
2093  if (p1->sourceState > p2->sourceState)
2094  return 1;
2095  if (p1->colorTrgm < p2->colorTrgm)
2096  return -1;
2097  if (p1->colorTrgm > p2->colorTrgm)
2098  return 1;
2099  if (p1->targetState < p2->targetState)
2100  return -1;
2101  if (p1->targetState > p2->targetState)
2102  return 1;
2103  return 0;
2104 }
static FormData_pg_attribute a1
Definition: heap.c:144
static FormData_pg_attribute a2
Definition: heap.c:150
static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1931 of file trgm_regexp.c.

References TrgmState::arcs, TrgmPackedState::arcs, TrgmNFA::arcsCount, TrgmPackedState::arcsCount, Assert, ColorTrgmInfo::cnumber, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, colorTrgmInfoCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::expanded, TrgmState::flags, hash_seq_init(), hash_seq_search(), i, lfirst, MemoryContextAlloc(), NULL, packArcInfoCmp(), palloc(), TrgmState::parent, qsort, result, TrgmState::snumber, TrgmPackArcInfo::sourceState, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmArc::target, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

1932 {
1933  int snumber = 2,
1934  arcIndex,
1935  arcsCount;
1936  HASH_SEQ_STATUS scan_status;
1937  TrgmState *state;
1938  TrgmPackArcInfo *arcs,
1939  *p1,
1940  *p2;
1941  TrgmPackedArc *packedArcs;
1943  int i,
1944  j;
1945 
1946  /* Enumerate surviving states, giving init and fin reserved numbers */
1947  hash_seq_init(&scan_status, trgmNFA->states);
1948  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1949  {
1950  while (state->parent)
1951  state = state->parent;
1952 
1953  if (state->snumber < 0)
1954  {
1955  if (state->flags & TSTATE_INIT)
1956  state->snumber = 0;
1957  else if (state->flags & TSTATE_FIN)
1958  state->snumber = 1;
1959  else
1960  {
1961  state->snumber = snumber;
1962  snumber++;
1963  }
1964  }
1965  }
1966 
1967  /* Collect array of all arcs */
1968  arcs = (TrgmPackArcInfo *)
1969  palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
1970  arcIndex = 0;
1971  hash_seq_init(&scan_status, trgmNFA->states);
1972  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1973  {
1974  TrgmState *source = state;
1975  ListCell *cell;
1976 
1977  while (source->parent)
1978  source = source->parent;
1979 
1980  foreach(cell, state->arcs)
1981  {
1982  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1983  TrgmState *target = arc->target;
1984 
1985  while (target->parent)
1986  target = target->parent;
1987 
1988  if (source->snumber != target->snumber)
1989  {
1990  ColorTrgmInfo *ctrgm;
1991 
1992  ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
1993  trgmNFA->colorTrgms,
1994  trgmNFA->colorTrgmsCount,
1995  sizeof(ColorTrgmInfo),
1997  Assert(ctrgm != NULL);
1998  Assert(ctrgm->expanded);
1999 
2000  arcs[arcIndex].sourceState = source->snumber;
2001  arcs[arcIndex].targetState = target->snumber;
2002  arcs[arcIndex].colorTrgm = ctrgm->cnumber;
2003  arcIndex++;
2004  }
2005  }
2006  }
2007 
2008  /* Sort arcs to ease duplicate detection */
2009  qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
2010 
2011  /* We could have duplicates because states were merged. Remove them. */
2012  /* p1 is probe point, p2 is last known non-duplicate. */
2013  p2 = arcs;
2014  for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2015  {
2016  if (packArcInfoCmp(p1, p2) > 0)
2017  {
2018  p2++;
2019  *p2 = *p1;
2020  }
2021  }
2022  arcsCount = (p2 - arcs) + 1;
2023 
2024  /* Create packed representation */
2025  result = (TrgmPackedGraph *)
2026  MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
2027 
2028  /* Pack color trigrams information */
2029  result->colorTrigramsCount = 0;
2030  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2031  {
2032  if (trgmNFA->colorTrgms[i].expanded)
2033  result->colorTrigramsCount++;
2034  }
2035  result->colorTrigramGroups = (int *)
2036  MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
2037  j = 0;
2038  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2039  {
2040  if (trgmNFA->colorTrgms[i].expanded)
2041  {
2042  result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
2043  j++;
2044  }
2045  }
2046 
2047  /* Pack states and arcs information */
2048  result->statesCount = snumber;
2049  result->states = (TrgmPackedState *)
2050  MemoryContextAlloc(rcontext, snumber * sizeof(TrgmPackedState));
2051  packedArcs = (TrgmPackedArc *)
2052  MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
2053  j = 0;
2054  for (i = 0; i < snumber; i++)
2055  {
2056  int cnt = 0;
2057 
2058  result->states[i].arcs = &packedArcs[j];
2059  while (j < arcsCount && arcs[j].sourceState == i)
2060  {
2061  packedArcs[j].targetState = arcs[j].targetState;
2062  packedArcs[j].colorTrgm = arcs[j].colorTrgm;
2063  cnt++;
2064  j++;
2065  }
2066  result->states[i].arcsCount = cnt;
2067  }
2068 
2069  /* Allocate working memory for trigramsMatchGraph() */
2070  result->colorTrigramsActive = (bool *)
2071  MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
2072  result->statesActive = (bool *)
2073  MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
2074  result->statesQueue = (int *)
2075  MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
2076 
2077  return result;
2078 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:463
ColorTrgm ctrgm
Definition: trgm_regexp.c:350
Definition: regguts.h:276
HTAB * states
Definition: trgm_regexp.c:409
return result
Definition: formatting.c:1632
TrgmPackedState * states
Definition: trgm_regexp.c:460
#define TSTATE_FIN
Definition: trgm_regexp.c:331
#define TSTATE_INIT
Definition: trgm_regexp.c:330
TrgmPackedArc * arcs
Definition: trgm_regexp.c:437
static int packArcInfoCmp(const void *a1, const void *a2)
Definition: trgm_regexp.c:2086
bool * statesActive
Definition: trgm_regexp.c:464
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:420
TrgmState * target
Definition: trgm_regexp.c:351
List * arcs
Definition: trgm_regexp.c:336
int arcsCount
Definition: trgm_regexp.c:416
int colorTrgmsCount
Definition: trgm_regexp.c:421
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1351
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1341
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1893
struct TrgmState * parent
Definition: trgm_regexp.c:340
void * palloc(Size size)
Definition: mcxt.c:849
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
int i
#define qsort(a, b, c, d)
Definition: port.h:440
int * colorTrigramGroups
Definition: trgm_regexp.c:453
static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
)
static

Definition at line 1415 of file trgm_regexp.c.

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

1416 {
1417  if (prefix1->colors[1] == COLOR_UNKNOWN)
1418  {
1419  /* Fully ambiguous prefix contains everything */
1420  return true;
1421  }
1422  else if (prefix1->colors[0] == COLOR_UNKNOWN)
1423  {
1424  /*
1425  * Prefix with only first unknown color contains every prefix with
1426  * same second color.
1427  */
1428  if (prefix1->colors[1] == prefix2->colors[1])
1429  return true;
1430  else
1431  return false;
1432  }
1433  else
1434  {
1435  /* Exact prefix contains only the exact same prefix */
1436  if (prefix1->colors[0] == prefix2->colors[0] &&
1437  prefix1->colors[1] == prefix2->colors[1])
1438  return true;
1439  else
1440  return false;
1441  }
1442 }
TrgmColor colors[2]
Definition: trgm_regexp.c:292
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:287
static void processState ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 969 of file trgm_regexp.c.

References addArcs(), addKey(), TrgmState::flags, TrgmNFA::keysQueue, linitial, list_delete_first(), NIL, TrgmState::stateKey, and TSTATE_FIN.

Referenced by transformGraph().

970 {
971  /* keysQueue should be NIL already, but make sure */
972  trgmNFA->keysQueue = NIL;
973 
974  /*
975  * Add state's own key, and then process all keys added to keysQueue until
976  * queue is empty. But we can quit if the state gets marked final.
977  */
978  addKey(trgmNFA, state, &state->stateKey);
979  while (trgmNFA->keysQueue != NIL && !(state->flags & TSTATE_FIN))
980  {
981  TrgmStateKey *key = (TrgmStateKey *) linitial(trgmNFA->keysQueue);
982 
983  trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
984  addKey(trgmNFA, state, key);
985  }
986 
987  /*
988  * Add outgoing arcs only if state isn't final (we have no interest in
989  * outgoing arcs if we already match)
990  */
991  if (!(state->flags & TSTATE_FIN))
992  addArcs(trgmNFA, state);
993 }
#define NIL
Definition: pg_list.h:69
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
Definition: trgm_regexp.c:1012
#define TSTATE_FIN
Definition: trgm_regexp.c:331
#define linitial(l)
Definition: pg_list.h:111
TrgmStateKey stateKey
Definition: trgm_regexp.c:335
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:1195
List * keysQueue
Definition: trgm_regexp.c:415
List * list_delete_first(List *list)
Definition: list.c:666
static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
)
static

Definition at line 734 of file trgm_regexp.c.

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

735 {
736  int text_re_len = VARSIZE_ANY_EXHDR(text_re);
737  char *text_re_val = VARDATA_ANY(text_re);
738  pg_wchar *pattern;
739  int pattern_len;
740  int regcomp_result;
741  char errMsg[100];
742 
743  /* Convert pattern string to wide characters */
744  pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
745  pattern_len = pg_mb2wchar_with_len(text_re_val,
746  pattern,
747  text_re_len);
748 
749  /* Compile regex */
750  regcomp_result = pg_regcomp(regex,
751  pattern,
752  pattern_len,
753  cflags,
754  collation);
755 
756  pfree(pattern);
757 
758  if (regcomp_result != REG_OKAY)
759  {
760  /* re didn't compile (no need for pg_regfree, if so) */
761  pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
762  ereport(ERROR,
763  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
764  errmsg("invalid regular expression: %s", errMsg)));
765  }
766 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
int errcode(int sqlerrcode)
Definition: elog.c:575
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:314
void pfree(void *pointer)
Definition: mcxt.c:950
#define REG_OKAY
Definition: regex.h:137
#define ERROR
Definition: elog.h:43
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define ereport(elevel, rest)
Definition: elog.h:122
unsigned int pg_wchar
Definition: mbprint.c:31
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:734
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
static bool selectColorTrigrams ( TrgmNFA trgmNFA)
static

Definition at line 1457 of file trgm_regexp.c.

References TrgmState::arcs, ColorTrgmInfo::arcs, TrgmNFA::arcsCount, Assert, ColorTrgmInfo::cnumber, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoPenaltyCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, TrgmState::flags, hash_seq_init(), hash_seq_search(), i, lfirst, list_concat(), list_length(), list_make1, MAX_TRGM_COUNT, mergeStates(), NULL, palloc(), palloc0(), TrgmState::parent, penalties, ColorTrgmInfo::penalty, qsort, TrgmState::snumber, TrgmArcInfo::source, TrgmNFA::states, TrgmArc::target, TrgmArcInfo::target, TrgmState::tentFlags, TrgmState::tentParent, TrgmNFA::totalTrgmCount, TSTATE_FIN, TSTATE_INIT, WISH_TRGM_PENALTY, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

1458 {
1459  HASH_SEQ_STATUS scan_status;
1460  int arcsCount = trgmNFA->arcsCount,
1461  i;
1462  TrgmState *state;
1463  ColorTrgmInfo *colorTrgms;
1464  int64 totalTrgmCount;
1465  float4 totalTrgmPenalty;
1466  int cnumber;
1467 
1468  /* Collect color trigrams from all arcs */
1469  colorTrgms = (ColorTrgmInfo *) palloc0(sizeof(ColorTrgmInfo) * arcsCount);
1470  trgmNFA->colorTrgms = colorTrgms;
1471 
1472  i = 0;
1473  hash_seq_init(&scan_status, trgmNFA->states);
1474  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1475  {
1476  ListCell *cell;
1477 
1478  foreach(cell, state->arcs)
1479  {
1480  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1481  TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
1482  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1483 
1484  arcInfo->source = state;
1485  arcInfo->target = arc->target;
1486  trgmInfo->ctrgm = arc->ctrgm;
1487  trgmInfo->cnumber = -1;
1488  /* count and penalty will be set below */
1489  trgmInfo->expanded = true;
1490  trgmInfo->arcs = list_make1(arcInfo);
1491  i++;
1492  }
1493  }
1494  Assert(i == arcsCount);
1495 
1496  /* Remove duplicates, merging their arcs lists */
1497  if (arcsCount >= 2)
1498  {
1499  ColorTrgmInfo *p1,
1500  *p2;
1501 
1502  /* Sort trigrams to ease duplicate detection */
1503  qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
1504 
1505  /* p1 is probe point, p2 is last known non-duplicate. */
1506  p2 = colorTrgms;
1507  for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1508  {
1509  if (colorTrgmInfoCmp(p1, p2) > 0)
1510  {
1511  p2++;
1512  *p2 = *p1;
1513  }
1514  else
1515  {
1516  p2->arcs = list_concat(p2->arcs, p1->arcs);
1517  }
1518  }
1519  trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
1520  }
1521  else
1522  {
1523  trgmNFA->colorTrgmsCount = arcsCount;
1524  }
1525 
1526  /*
1527  * Count number of simple trigrams generated by each color trigram, and
1528  * also compute a penalty value, which is the number of simple trigrams
1529  * times a multiplier that depends on its whitespace content.
1530  *
1531  * Note: per-color-trigram counts cannot overflow an int so long as
1532  * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
1533  * 1290. However, the grand total totalTrgmCount might conceivably
1534  * overflow an int, so we use int64 for that within this routine. Also,
1535  * penalties are calculated in float4 arithmetic to avoid any overflow
1536  * worries.
1537  */
1538  totalTrgmCount = 0;
1539  totalTrgmPenalty = 0.0f;
1540  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1541  {
1542  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1543  int j,
1544  count = 1,
1545  typeIndex = 0;
1546 
1547  for (j = 0; j < 3; j++)
1548  {
1549  TrgmColor c = trgmInfo->ctrgm.colors[j];
1550 
1551  typeIndex *= 2;
1552  if (c == COLOR_BLANK)
1553  typeIndex++;
1554  else
1555  count *= trgmNFA->colorInfo[c].wordCharsCount;
1556  }
1557  trgmInfo->count = count;
1558  totalTrgmCount += count;
1559  trgmInfo->penalty = penalties[typeIndex] * (float4) count;
1560  totalTrgmPenalty += trgmInfo->penalty;
1561  }
1562 
1563  /* Sort color trigrams in descending order of their penalties */
1564  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1566 
1567  /*
1568  * Remove color trigrams from the graph so long as total penalty of color
1569  * trigrams exceeds WISH_TRGM_PENALTY. (If we fail to get down to
1570  * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
1571  * MAX_TRGM_COUNT.) We prefer to remove color trigrams with higher
1572  * penalty, since those are the most promising for reducing the total
1573  * penalty. When removing a color trigram we have to merge states
1574  * connected by arcs labeled with that trigram. It's necessary to not
1575  * merge initial and final states, because our graph becomes useless if
1576  * that happens; so we cannot always remove the trigram we'd prefer to.
1577  */
1578  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1579  {
1580  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1581  bool canRemove = true;
1582  ListCell *cell;
1583 
1584  /* Done if we've reached the target */
1585  if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
1586  break;
1587 
1588 #ifdef TRGM_REGEXP_DEBUG
1589  fprintf(stderr, "considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1590  trgmInfo->ctrgm.colors[0],
1591  trgmInfo->ctrgm.colors[1],
1592  trgmInfo->ctrgm.colors[2],
1593  trgmInfo->penalty,
1594  list_length(trgmInfo->arcs));
1595 #endif
1596 
1597  /*
1598  * Does any arc of this color trigram connect initial and final
1599  * states? If so we can't remove it.
1600  */
1601  foreach(cell, trgmInfo->arcs)
1602  {
1603  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1604  TrgmState *source = arcInfo->source,
1605  *target = arcInfo->target;
1606  int source_flags,
1607  target_flags;
1608 
1609 #ifdef TRGM_REGEXP_DEBUG
1610  fprintf(stderr, "examining arc to s%d (%x) from s%d (%x)\n",
1611  -target->snumber, target->flags,
1612  -source->snumber, source->flags);
1613 #endif
1614 
1615  /* examine parent states, if any merging has already happened */
1616  while (source->parent)
1617  source = source->parent;
1618  while (target->parent)
1619  target = target->parent;
1620 
1621 #ifdef TRGM_REGEXP_DEBUG
1622  fprintf(stderr, " ... after completed merges: to s%d (%x) from s%d (%x)\n",
1623  -target->snumber, target->flags,
1624  -source->snumber, source->flags);
1625 #endif
1626 
1627  /* we must also consider merges we are planning right now */
1628  source_flags = source->flags | source->tentFlags;
1629  while (source->tentParent)
1630  {
1631  source = source->tentParent;
1632  source_flags |= source->flags | source->tentFlags;
1633  }
1634  target_flags = target->flags | target->tentFlags;
1635  while (target->tentParent)
1636  {
1637  target = target->tentParent;
1638  target_flags |= target->flags | target->tentFlags;
1639  }
1640 
1641 #ifdef TRGM_REGEXP_DEBUG
1642  fprintf(stderr, " ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1643  -target->snumber, target_flags,
1644  -source->snumber, source_flags);
1645 #endif
1646 
1647  /* would fully-merged state have both INIT and FIN set? */
1648  if (((source_flags | target_flags) & (TSTATE_INIT | TSTATE_FIN)) ==
1649  (TSTATE_INIT | TSTATE_FIN))
1650  {
1651  canRemove = false;
1652  break;
1653  }
1654 
1655  /* ok so far, so remember planned merge */
1656  if (source != target)
1657  {
1658 #ifdef TRGM_REGEXP_DEBUG
1659  fprintf(stderr, " ... tentatively merging s%d into s%d\n",
1660  -target->snumber, -source->snumber);
1661 #endif
1662  target->tentParent = source;
1663  source->tentFlags |= target_flags;
1664  }
1665  }
1666 
1667  /*
1668  * We must reset all the tentFlags/tentParent fields before
1669  * continuing. tentFlags could only have become set in states that
1670  * are the source or parent or tentative parent of one of the current
1671  * arcs; likewise tentParent could only have become set in states that
1672  * are the target or parent or tentative parent of one of the current
1673  * arcs. There might be some overlap between those sets, but if we
1674  * clear tentFlags in target states as well as source states, we
1675  * should be okay even if we visit a state as target before visiting
1676  * it as a source.
1677  */
1678  foreach(cell, trgmInfo->arcs)
1679  {
1680  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1681  TrgmState *source = arcInfo->source,
1682  *target = arcInfo->target;
1683  TrgmState *ttarget;
1684 
1685  /* no need to touch previously-merged states */
1686  while (source->parent)
1687  source = source->parent;
1688  while (target->parent)
1689  target = target->parent;
1690 
1691  while (source)
1692  {
1693  source->tentFlags = 0;
1694  source = source->tentParent;
1695  }
1696 
1697  while ((ttarget = target->tentParent) != NULL)
1698  {
1699  target->tentParent = NULL;
1700  target->tentFlags = 0; /* in case it was also a source */
1701  target = ttarget;
1702  }
1703  }
1704 
1705  /* Now, move on if we can't drop this trigram */
1706  if (!canRemove)
1707  {
1708 #ifdef TRGM_REGEXP_DEBUG
1709  fprintf(stderr, " ... not ok to merge\n");
1710 #endif
1711  continue;
1712  }
1713 
1714  /* OK, merge states linked by each arc labeled by the trigram */
1715  foreach(cell, trgmInfo->arcs)
1716  {
1717  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1718  TrgmState *source = arcInfo->source,
1719  *target = arcInfo->target;
1720 
1721  while (source->parent)
1722  source = source->parent;
1723  while (target->parent)
1724  target = target->parent;
1725  if (source != target)
1726  {
1727 #ifdef TRGM_REGEXP_DEBUG
1728  fprintf(stderr, "merging s%d into s%d\n",
1729  -target->snumber, -source->snumber);
1730 #endif
1731  mergeStates(source, target);
1732  /* Assert we didn't merge initial and final states */
1733  Assert((source->flags & (TSTATE_INIT | TSTATE_FIN)) !=
1734  (TSTATE_INIT | TSTATE_FIN));
1735  }
1736  }
1737 
1738  /* Mark trigram unexpanded, and update totals */
1739  trgmInfo->expanded = false;
1740  totalTrgmCount -= trgmInfo->count;
1741  totalTrgmPenalty -= trgmInfo->penalty;
1742  }
1743 
1744  /* Did we succeed in fitting into MAX_TRGM_COUNT? */
1745  if (totalTrgmCount > MAX_TRGM_COUNT)
1746  return false;
1747 
1748  trgmNFA->totalTrgmCount = (int) totalTrgmCount;
1749 
1750  /*
1751  * Sort color trigrams by colors (will be useful for bsearch in packGraph)
1752  * and enumerate the color trigrams that are expanded.
1753  */
1754  cnumber = 0;
1755  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1757  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1758  {
1759  if (colorTrgms[i].expanded)
1760  {
1761  colorTrgms[i].cnumber = cnumber;
1762  cnumber++;
1763  }
1764  }
1765 
1766  return true;
1767 }
static void mergeStates(TrgmState *state1, TrgmState *state2)
Definition: trgm_regexp.c:1876
ColorTrgm ctrgm
Definition: trgm_regexp.c:350
struct TrgmState * tentParent
Definition: trgm_regexp.c:342
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:405
float4 penalty
Definition: trgm_regexp.c:379
Definition: regguts.h:276
TrgmColor colors[3]
Definition: trgm_regexp.c:301
HTAB * states
Definition: trgm_regexp.c:409
#define WISH_TRGM_PENALTY
Definition: trgm_regexp.c:223
List * list_concat(List *list1, List *list2)
Definition: list.c:321
#define TSTATE_FIN
Definition: trgm_regexp.c:331
#define TSTATE_INIT
Definition: trgm_regexp.c:330
int totalTrgmCount
Definition: trgm_regexp.c:422
#define list_make1(x1)
Definition: pg_list.h:139
char * c
TrgmState * target
Definition: trgm_regexp.c:362
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:420
TrgmState * target
Definition: trgm_regexp.c:351
List * arcs
Definition: trgm_regexp.c:336
TrgmState * source
Definition: trgm_regexp.c:361
int arcsCount
Definition: trgm_regexp.c:416
#define COLOR_BLANK
Definition: trgm_regexp.c:288
float float4
Definition: c.h:380
void * palloc0(Size size)
Definition: mcxt.c:878
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1906
int colorTrgmsCount
Definition: trgm_regexp.c:421
#define MAX_TRGM_COUNT
Definition: trgm_regexp.c:222
#define NULL
Definition: c.h:229
int tentFlags
Definition: trgm_regexp.c:341
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
static int list_length(const List *l)
Definition: pg_list.h:89
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1351
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1341
static const float4 penalties[8]
Definition: trgm_regexp.c:230
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1893
struct TrgmState * parent
Definition: trgm_regexp.c:340
void * palloc(Size size)
Definition: mcxt.c:849
int i
ColorTrgm ctrgm
Definition: trgm_regexp.c:376
#define qsort(a, b, c, d)
Definition: port.h:440
int TrgmColor
Definition: trgm_regexp.c:284
static void transformGraph ( TrgmNFA trgmNFA)
static

Definition at line 907 of file trgm_regexp.c.

References TrgmNFA::arcsCount, COLOR_UNKNOWN, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::flags, getState(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, hash_get_num_entries(), HASHCTL::hcxt, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, linitial, list_delete_first(), MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::nstates, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, TrgmNFA::states, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

908 {
909  HASHCTL hashCtl;
910  TrgmStateKey initkey;
911  TrgmState *initstate;
912 
913  /* Initialize this stage's workspace in trgmNFA struct */
914  trgmNFA->queue = NIL;
915  trgmNFA->keysQueue = NIL;
916  trgmNFA->arcsCount = 0;
917  trgmNFA->overflowed = false;
918 
919  /* Create hashtable for states */
920  hashCtl.keysize = sizeof(TrgmStateKey);
921  hashCtl.entrysize = sizeof(TrgmState);
922  hashCtl.hcxt = CurrentMemoryContext;
923  trgmNFA->states = hash_create("Trigram NFA",
924  1024,
925  &hashCtl,
927  trgmNFA->nstates = 0;
928 
929  /* Create initial state: ambiguous prefix, NFA's initial state */
930  MemSet(&initkey, 0, sizeof(initkey));
931  initkey.prefix.colors[0] = COLOR_UNKNOWN;
932  initkey.prefix.colors[1] = COLOR_UNKNOWN;
933  initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
934 
935  initstate = getState(trgmNFA, &initkey);
936  initstate->flags |= TSTATE_INIT;
937  trgmNFA->initState = initstate;
938 
939  /*
940  * Recursively build the expanded graph by processing queue of states
941  * (breadth-first search). getState already put initstate in the queue.
942  */
943  while (trgmNFA->queue != NIL)
944  {
945  TrgmState *state = (TrgmState *) linitial(trgmNFA->queue);
946 
947  trgmNFA->queue = list_delete_first(trgmNFA->queue);
948 
949  /*
950  * If we overflowed then just mark state as final. Otherwise do
951  * actual processing.
952  */
953  if (trgmNFA->overflowed)
954  state->flags |= TSTATE_FIN;
955  else
956  processState(trgmNFA, state);
957 
958  /* Did we overflow? */
959  if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
961  trgmNFA->overflowed = true;
962  }
963 }
#define MAX_EXPANDED_ARCS
Definition: trgm_regexp.c:221
#define NIL
Definition: pg_list.h:69
struct TrgmState TrgmState
#define HASH_CONTEXT
Definition: hsearch.h:93
#define HASH_ELEM
Definition: hsearch.h:87
MemoryContext hcxt
Definition: hsearch.h:78
regex_t * regex
Definition: trgm_regexp.c:404
HTAB * states
Definition: trgm_regexp.c:409
Size entrysize
Definition: hsearch.h:73
#define MemSet(start, val, len)
Definition: c.h:857
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1297
TrgmState * initState
Definition: trgm_regexp.c:410
#define TSTATE_FIN
Definition: trgm_regexp.c:331
#define TSTATE_INIT
Definition: trgm_regexp.c:330
bool overflowed
Definition: trgm_regexp.c:417
int nstates
Definition: trgm_regexp.c:411
#define linitial(l)
Definition: pg_list.h:111
TrgmColor colors[2]
Definition: trgm_regexp.c:292
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
int arcsCount
Definition: trgm_regexp.c:416
List * queue
Definition: trgm_regexp.c:414
#define HASH_BLOBS
Definition: hsearch.h:88
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:301
Size keysize
Definition: hsearch.h:72
Definition: regguts.h:298
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1384
TrgmPrefix prefix
Definition: trgm_regexp.c:311
#define MAX_EXPANDED_STATES
Definition: trgm_regexp.c:220
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:287
List * keysQueue
Definition: trgm_regexp.c:415
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:969
List * list_delete_first(List *list)
Definition: list.c:666
bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 641 of file trgm_regexp.c.

References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

642 {
643  int i,
644  j,
645  k,
646  queueIn,
647  queueOut;
648 
649  /*
650  * Reset temporary working areas.
651  */
652  memset(graph->colorTrigramsActive, 0,
653  sizeof(bool) * graph->colorTrigramsCount);
654  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
655 
656  /*
657  * Check which color trigrams were matched. A match for any simple
658  * trigram associated with a color trigram counts as a match of the color
659  * trigram.
660  */
661  j = 0;
662  for (i = 0; i < graph->colorTrigramsCount; i++)
663  {
664  int cnt = graph->colorTrigramGroups[i];
665 
666  for (k = j; k < j + cnt; k++)
667  {
668  if (check[k])
669  {
670  /*
671  * Found one matched trigram in the group. Can skip the rest
672  * of them and go to the next group.
673  */
674  graph->colorTrigramsActive[i] = true;
675  break;
676  }
677  }
678  j = j + cnt;
679  }
680 
681  /*
682  * Initialize the statesQueue to hold just the initial state. Note:
683  * statesQueue has room for statesCount entries, which is certainly enough
684  * since no state will be put in the queue more than once. The
685  * statesActive array marks which states have been queued.
686  */
687  graph->statesActive[0] = true;
688  graph->statesQueue[0] = 0;
689  queueIn = 0;
690  queueOut = 1;
691 
692  /* Process queued states as long as there are any. */
693  while (queueIn < queueOut)
694  {
695  int stateno = graph->statesQueue[queueIn++];
696  TrgmPackedState *state = &graph->states[stateno];
697  int cnt = state->arcsCount;
698 
699  /* Loop over state's out-arcs */
700  for (i = 0; i < cnt; i++)
701  {
702  TrgmPackedArc *arc = &state->arcs[i];
703 
704  /*
705  * If corresponding color trigram is present then activate the
706  * corresponding state. We're done if that's the final state,
707  * otherwise queue the state if it's not been queued already.
708  */
709  if (graph->colorTrigramsActive[arc->colorTrgm])
710  {
711  int nextstate = arc->targetState;
712 
713  if (nextstate == 1)
714  return true; /* success: final state is reachable */
715 
716  if (!graph->statesActive[nextstate])
717  {
718  graph->statesActive[nextstate] = true;
719  graph->statesQueue[queueOut++] = nextstate;
720  }
721  }
722  }
723  }
724 
725  /* Queue is empty, so match fails. */
726  return false;
727 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:463
Definition: regguts.h:276
TrgmPackedState * states
Definition: trgm_regexp.c:460
TrgmPackedArc * arcs
Definition: trgm_regexp.c:437
bool * statesActive
Definition: trgm_regexp.c:464
Definition: regguts.h:298
int i
int * colorTrigramGroups
Definition: trgm_regexp.c:453
static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
)
static

Definition at line 1329 of file trgm_regexp.c.

References Assert, COLOR_BLANK, COLOR_UNKNOWN, TrgmPrefix::colors, and TrgmStateKey::prefix.

Referenced by addArc(), and addKey().

1330 {
1331  /*
1332  * We have to know full trigram in order to add outgoing arc. So we can't
1333  * do it if prefix is ambiguous.
1334  */
1335  if (key->prefix.colors[0] == COLOR_UNKNOWN)
1336  return false;
1337 
1338  /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
1339  Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
1340  /* And we should not be called with an unknown arc color anytime */
1341  Assert(co != COLOR_UNKNOWN);
1342 
1343  /*
1344  * We don't bother with making arcs representing three non-word
1345  * characters, since that's useless for trigram extraction.
1346  */
1347  if (key->prefix.colors[0] == COLOR_BLANK &&
1348  key->prefix.colors[1] == COLOR_BLANK &&
1349  co == COLOR_BLANK)
1350  return false;
1351 
1352  /*
1353  * We also reject nonblank-blank-anything. The nonblank-blank-nonblank
1354  * case doesn't correspond to any trigram the trigram extraction code
1355  * would make. The nonblank-blank-blank case is also not possible with
1356  * RPADDING = 1. (Note that in many cases we'd fail to generate such a
1357  * trigram even if it were valid, for example processing "foo bar" will
1358  * not result in considering the trigram "o ". So if you want to support
1359  * RPADDING = 2, there's more to do than just twiddle this test.)
1360  */
1361  if (key->prefix.colors[0] != COLOR_BLANK &&
1362  key->prefix.colors[1] == COLOR_BLANK)
1363  return false;
1364 
1365  /*
1366  * Other combinations involving blank are valid, in particular we assume
1367  * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
1368  *
1369  * Note: Using again the example "foo bar", we will not consider the
1370  * trigram " b", though this trigram would be found by the trigram
1371  * extraction code. Since we will find " ba", it doesn't seem worth
1372  * trying to hack the algorithm to generate the additional trigram.
1373  */
1374 
1375  /* arc label is valid */
1376  return true;
1377 }
TrgmColor colors[2]
Definition: trgm_regexp.c:292
#define COLOR_BLANK
Definition: trgm_regexp.c:288
#define Assert(condition)
Definition: c.h:675
TrgmPrefix prefix
Definition: trgm_regexp.c:311
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:287

Variable Documentation

const float4 penalties[8]
static
Initial value:
= {
1.0f,
3.5f,
0.0f,
0.0f,
4.2f,
2.1f,
25.0f,
0.0f
}

Definition at line 230 of file trgm_regexp.c.

Referenced by selectColorTrigrams().