PostgreSQL Source Code  git master
trgm_regexp.c File Reference
#include "postgres.h"
#include "regex/regexport.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
Include dependency graph for trgm_regexp.c:

Go to the source code of this file.

Data Structures

struct  trgm_mb_char
 
struct  TrgmColorInfo
 
struct  TrgmPrefix
 
struct  ColorTrgm
 
struct  TrgmStateKey
 
struct  TrgmState
 
struct  TrgmArc
 
struct  TrgmArcInfo
 
struct  ColorTrgmInfo
 
struct  TrgmNFA
 
struct  TrgmPackedArc
 
struct  TrgmPackedState
 
struct  TrgmPackedGraph
 
struct  TrgmPackArcInfo
 

Macros

#define MAX_EXPANDED_STATES   128
 
#define MAX_EXPANDED_ARCS   1024
 
#define MAX_TRGM_COUNT   256
 
#define WISH_TRGM_PENALTY   16
 
#define COLOR_COUNT_LIMIT   256
 
#define COLOR_UNKNOWN   (-1)
 
#define COLOR_BLANK   (-2)
 
#define TSTATE_INIT   0x01 /* flag indicating this state is initial */
 
#define TSTATE_FIN   0x02 /* flag indicating this state is final */
 

Typedefs

typedef int TrgmColor
 
typedef struct TrgmState TrgmState
 

Functions

static TRGMcreateTrgmNFAInternal (regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
 
static void RE_compile (regex_t *regex, text *text_re, int cflags, Oid collation)
 
static void getColorInfo (regex_t *regex, TrgmNFA *trgmNFA)
 
static bool convertPgWchar (pg_wchar c, trgm_mb_char *result)
 
static void transformGraph (TrgmNFA *trgmNFA)
 
static void processState (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addKey (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
 
static void addKeyToQueue (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static void addArcs (TrgmNFA *trgmNFA, TrgmState *state)
 
static void addArc (TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
 
static bool validArcLabel (TrgmStateKey *key, TrgmColor co)
 
static TrgmStategetState (TrgmNFA *trgmNFA, TrgmStateKey *key)
 
static bool prefixContains (TrgmPrefix *prefix1, TrgmPrefix *prefix2)
 
static bool selectColorTrigrams (TrgmNFA *trgmNFA)
 
static TRGMexpandColorTrigrams (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static void fillTrgm (trgm *ptrgm, trgm_mb_char s[3])
 
static void mergeStates (TrgmState *state1, TrgmState *state2)
 
static int colorTrgmInfoCmp (const void *p1, const void *p2)
 
static int colorTrgmInfoPenaltyCmp (const void *p1, const void *p2)
 
static TrgmPackedGraphpackGraph (TrgmNFA *trgmNFA, MemoryContext rcontext)
 
static int packArcInfoCmp (const void *a1, const void *a2)
 
TRGMcreateTrgmNFA (text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
 
bool trigramsMatchGraph (TrgmPackedGraph *graph, bool *check)
 

Variables

static const float4 penalties [8]
 

Macro Definition Documentation

◆ COLOR_BLANK

#define COLOR_BLANK   (-2)

◆ COLOR_COUNT_LIMIT

#define COLOR_COUNT_LIMIT   256

Definition at line 222 of file trgm_regexp.c.

Referenced by getColorInfo().

◆ COLOR_UNKNOWN

#define COLOR_UNKNOWN   (-1)

Definition at line 285 of file trgm_regexp.c.

Referenced by addKey(), packArcInfoCmp(), prefixContains(), transformGraph(), and validArcLabel().

◆ MAX_EXPANDED_ARCS

#define MAX_EXPANDED_ARCS   1024

Definition at line 219 of file trgm_regexp.c.

Referenced by transformGraph().

◆ MAX_EXPANDED_STATES

#define MAX_EXPANDED_STATES   128

Definition at line 218 of file trgm_regexp.c.

Referenced by transformGraph().

◆ MAX_TRGM_COUNT

#define MAX_TRGM_COUNT   256

Definition at line 220 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

◆ TSTATE_FIN

#define TSTATE_FIN   0x02 /* flag indicating this state is final */

◆ TSTATE_INIT

#define TSTATE_INIT   0x01 /* flag indicating this state is initial */

Definition at line 328 of file trgm_regexp.c.

Referenced by packArcInfoCmp(), packGraph(), selectColorTrigrams(), and transformGraph().

◆ WISH_TRGM_PENALTY

#define WISH_TRGM_PENALTY   16

Definition at line 221 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

Typedef Documentation

◆ TrgmColor

typedef int TrgmColor

Definition at line 282 of file trgm_regexp.c.

◆ TrgmState

typedef struct TrgmState TrgmState

Function Documentation

◆ addArc()

static void addArc ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key,
TrgmColor  co,
TrgmStateKey destKey 
)
static

Definition at line 1272 of file trgm_regexp.c.

References TrgmState::arcs, TrgmNFA::arcsCount, TrgmPrefix::colors, ColorTrgm::colors, TrgmArc::ctrgm, TrgmState::enterKeys, getState(), lappend(), lfirst, TrgmStateKey::nstate, palloc(), TrgmStateKey::prefix, prefixContains(), TrgmArc::target, and validArcLabel().

Referenced by addArcs().

1274 {
1275  TrgmArc *arc;
1276  ListCell *cell;
1277 
1278  /* Do nothing if this wouldn't be a valid arc label trigram */
1279  if (!validArcLabel(key, co))
1280  return;
1281 
1282  /*
1283  * Check if we are going to reach key which is covered by a key which is
1284  * already listed in this state. If so arc is useless: the NFA can bypass
1285  * it through a path that doesn't require any predictable trigram, so
1286  * whether the arc's trigram is present or not doesn't really matter.
1287  */
1288  foreach(cell, state->enterKeys)
1289  {
1290  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1291 
1292  if (existingKey->nstate == destKey->nstate &&
1293  prefixContains(&existingKey->prefix, &destKey->prefix))
1294  return;
1295  }
1296 
1297  /* Checks were successful, add new arc */
1298  arc = (TrgmArc *) palloc(sizeof(TrgmArc));
1299  arc->target = getState(trgmNFA, destKey);
1300  arc->ctrgm.colors[0] = key->prefix.colors[0];
1301  arc->ctrgm.colors[1] = key->prefix.colors[1];
1302  arc->ctrgm.colors[2] = co;
1303 
1304  state->arcs = lappend(state->arcs, arc);
1305  trgmNFA->arcsCount++;
1306 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:348
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1400
Definition: regguts.h:276
TrgmColor colors[3]
Definition: trgm_regexp.c:299
TrgmColor colors[2]
Definition: trgm_regexp.c:290
List * enterKeys
Definition: trgm_regexp.c:335
TrgmState * target
Definition: trgm_regexp.c:349
List * arcs
Definition: trgm_regexp.c:334
List * lappend(List *list, void *datum)
Definition: list.c:322
int arcsCount
Definition: trgm_regexp.c:414
#define lfirst(lc)
Definition: pg_list.h:190
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1314
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1369
TrgmPrefix prefix
Definition: trgm_regexp.c:309
void * palloc(Size size)
Definition: mcxt.c:949

◆ addArcs()

static void addArcs ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 1180 of file trgm_regexp.c.

References addArc(), TrgmState::arcs, regex_arc_t::co, COLOR_BLANK, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, i, sort-test::key, lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, TrgmNFA::regex, regex_arc_t::to, and TrgmColorInfo::wordCharsCount.

Referenced by processState().

1181 {
1182  TrgmStateKey destKey;
1183  ListCell *cell;
1184  regex_arc_t *arcs;
1185  int arcsCount,
1186  i;
1187 
1188  /*
1189  * Ensure any pad bytes in destKey are zero, since it may get used as a
1190  * hashtable key by getState.
1191  */
1192  MemSet(&destKey, 0, sizeof(destKey));
1193 
1194  /*
1195  * Iterate over enter keys associated with this expanded-graph state. This
1196  * includes both the state's own stateKey, and any enter keys we added to
1197  * it during addKey (which represent expanded-graph states that are not
1198  * distinguishable from this one by means of trigrams). For each such
1199  * enter key, examine all the out-arcs of the key's underlying NFA state,
1200  * and try to make a trigram arc leading to where the out-arc leads.
1201  * (addArc will deal with whether the arc is valid or not.)
1202  */
1203  foreach(cell, state->enterKeys)
1204  {
1205  TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
1206 
1207  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1208  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1209  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1210 
1211  for (i = 0; i < arcsCount; i++)
1212  {
1213  regex_arc_t *arc = &arcs[i];
1214  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1215 
1216  /*
1217  * Ignore non-expandable colors; addKey already handled the case.
1218  *
1219  * We need no special check for begin/end pseudocolors here. We
1220  * don't need to do any processing for them, and they will be
1221  * marked non-expandable since the regex engine will have reported
1222  * them that way.
1223  */
1224  if (!colorInfo->expandable)
1225  continue;
1226 
1227  if (colorInfo->containsNonWord)
1228  {
1229  /*
1230  * Color includes non-word character(s).
1231  *
1232  * Generate an arc, treating this transition as occurring on
1233  * BLANK. This allows word-ending trigrams to be manufactured
1234  * if possible.
1235  */
1236  destKey.prefix.colors[0] = key->prefix.colors[1];
1237  destKey.prefix.colors[1] = COLOR_BLANK;
1238  destKey.nstate = arc->to;
1239 
1240  addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
1241  }
1242 
1243  if (colorInfo->wordCharsCount > 0)
1244  {
1245  /*
1246  * Color includes word character(s).
1247  *
1248  * Generate an arc. Color is pushed into prefix of target
1249  * state.
1250  */
1251  destKey.prefix.colors[0] = key->prefix.colors[1];
1252  destKey.prefix.colors[1] = arc->co;
1253  destKey.nstate = arc->to;
1254 
1255  addArc(trgmNFA, state, key, arc->co, &destKey);
1256  }
1257  }
1258 
1259  pfree(arcs);
1260  }
1261 }
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key, TrgmColor co, TrgmStateKey *destKey)
Definition: trgm_regexp.c:1272
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
regex_t * regex
Definition: trgm_regexp.c:402
Definition: regguts.h:276
#define MemSet(start, val, len)
Definition: c.h:962
bool containsNonWord
Definition: trgm_regexp.c:261
void pfree(void *pointer)
Definition: mcxt.c:1056
TrgmColor colors[2]
Definition: trgm_regexp.c:290
List * enterKeys
Definition: trgm_regexp.c:335
#define COLOR_BLANK
Definition: trgm_regexp.c:286
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
#define lfirst(lc)
Definition: pg_list.h:190
TrgmPrefix prefix
Definition: trgm_regexp.c:309
void * palloc(Size size)
Definition: mcxt.c:949
int i

◆ addKey()

static void addKey ( TrgmNFA trgmNFA,
TrgmState state,
TrgmStateKey key 
)
static

Definition at line 1007 of file trgm_regexp.c.

References addKeyToQueue(), TrgmState::arcs, regex_arc_t::co, COLOR_BLANK, COLOR_UNKNOWN, TrgmNFA::colorInfo, TrgmPrefix::colors, TrgmColorInfo::containsNonWord, TrgmState::enterKeys, TrgmColorInfo::expandable, TrgmState::flags, foreach_delete_current, i, lappend(), lfirst, MemSet, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_colorisbegin(), pg_reg_colorisend(), pg_reg_getfinalstate(), pg_reg_getnumoutarcs(), pg_reg_getoutarcs(), TrgmStateKey::prefix, prefixContains(), TrgmNFA::regex, regex_arc_t::to, TSTATE_FIN, validArcLabel(), and TrgmColorInfo::wordCharsCount.

Referenced by processState().

1008 {
1009  regex_arc_t *arcs;
1010  TrgmStateKey destKey;
1011  ListCell *cell;
1012  int i,
1013  arcsCount;
1014 
1015  /*
1016  * Ensure any pad bytes in destKey are zero, since it may get used as a
1017  * hashtable key by getState.
1018  */
1019  MemSet(&destKey, 0, sizeof(destKey));
1020 
1021  /*
1022  * Compare key to each existing enter key of the state to check for
1023  * redundancy. We can drop either old key(s) or the new key if we find
1024  * redundancy.
1025  */
1026  foreach(cell, state->enterKeys)
1027  {
1028  TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
1029 
1030  if (existingKey->nstate == key->nstate)
1031  {
1032  if (prefixContains(&existingKey->prefix, &key->prefix))
1033  {
1034  /* This old key already covers the new key. Nothing to do */
1035  return;
1036  }
1037  if (prefixContains(&key->prefix, &existingKey->prefix))
1038  {
1039  /*
1040  * The new key covers this old key. Remove the old key, it's
1041  * no longer needed once we add this key to the list.
1042  */
1043  state->enterKeys = foreach_delete_current(state->enterKeys,
1044  cell);
1045  }
1046  }
1047  }
1048 
1049  /* No redundancy, so add this key to the state's list */
1050  state->enterKeys = lappend(state->enterKeys, key);
1051 
1052  /* If state is now known final, mark it and we're done */
1053  if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
1054  {
1055  state->flags |= TSTATE_FIN;
1056  return;
1057  }
1058 
1059  /*
1060  * Loop through all outgoing arcs of the corresponding state in the
1061  * original NFA.
1062  */
1063  arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
1064  arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
1065  pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
1066 
1067  for (i = 0; i < arcsCount; i++)
1068  {
1069  regex_arc_t *arc = &arcs[i];
1070 
1071  if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
1072  {
1073  /*
1074  * Start of line/string (^). Trigram extraction treats start of
1075  * line same as start of word: double space prefix is added.
1076  * Hence, make an enter key showing we can reach the arc
1077  * destination with all-blank prefix.
1078  */
1079  destKey.prefix.colors[0] = COLOR_BLANK;
1080  destKey.prefix.colors[1] = COLOR_BLANK;
1081  destKey.nstate = arc->to;
1082 
1083  /* Add enter key to this state */
1084  addKeyToQueue(trgmNFA, &destKey);
1085  }
1086  else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
1087  {
1088  /*
1089  * End of line/string ($). We must consider this arc as a
1090  * transition that doesn't read anything. The reason for adding
1091  * this enter key to the state is that if the arc leads to the
1092  * NFA's final state, we must mark this expanded state as final.
1093  */
1094  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1095  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1096  destKey.nstate = arc->to;
1097 
1098  /* Add enter key to this state */
1099  addKeyToQueue(trgmNFA, &destKey);
1100  }
1101  else
1102  {
1103  /* Regular color */
1104  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
1105 
1106  if (colorInfo->expandable)
1107  {
1108  if (colorInfo->containsNonWord &&
1109  !validArcLabel(key, COLOR_BLANK))
1110  {
1111  /*
1112  * We can reach the arc destination after reading a
1113  * non-word character, but the prefix is not something
1114  * that addArc will accept with COLOR_BLANK, so no trigram
1115  * arc can get made for this transition. We must make an
1116  * enter key to show that the arc destination is
1117  * reachable. Set it up with an all-blank prefix, since
1118  * that corresponds to what the trigram extraction code
1119  * will do at a word starting boundary.
1120  */
1121  destKey.prefix.colors[0] = COLOR_BLANK;
1122  destKey.prefix.colors[1] = COLOR_BLANK;
1123  destKey.nstate = arc->to;
1124  addKeyToQueue(trgmNFA, &destKey);
1125  }
1126 
1127  if (colorInfo->wordCharsCount > 0 &&
1128  !validArcLabel(key, arc->co))
1129  {
1130  /*
1131  * We can reach the arc destination after reading a word
1132  * character, but the prefix is not something that addArc
1133  * will accept, so no trigram arc can get made for this
1134  * transition. We must make an enter key to show that the
1135  * arc destination is reachable. The prefix for the enter
1136  * key should reflect the info we have for this arc.
1137  */
1138  destKey.prefix.colors[0] = key->prefix.colors[1];
1139  destKey.prefix.colors[1] = arc->co;
1140  destKey.nstate = arc->to;
1141  addKeyToQueue(trgmNFA, &destKey);
1142  }
1143  }
1144  else
1145  {
1146  /*
1147  * Unexpandable color. Add enter key with ambiguous prefix,
1148  * showing we can reach the destination from this state, but
1149  * the preceding colors will be uncertain. (We do not set the
1150  * first prefix color to key->prefix.colors[1], because a
1151  * prefix of known followed by unknown is invalid.)
1152  */
1153  destKey.prefix.colors[0] = COLOR_UNKNOWN;
1154  destKey.prefix.colors[1] = COLOR_UNKNOWN;
1155  destKey.nstate = arc->to;
1156  addKeyToQueue(trgmNFA, &destKey);
1157  }
1158  }
1159  }
1160 
1161  pfree(arcs);
1162 }
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
Definition: trgm_regexp.c:1400
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
regex_t * regex
Definition: trgm_regexp.c:402
Definition: regguts.h:276
#define MemSet(start, val, len)
Definition: c.h:962
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
#define TSTATE_FIN
Definition: trgm_regexp.c:329
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:368
bool containsNonWord
Definition: trgm_regexp.c:261
void pfree(void *pointer)
Definition: mcxt.c:1056
TrgmColor colors[2]
Definition: trgm_regexp.c:290
List * enterKeys
Definition: trgm_regexp.c:335
List * lappend(List *list, void *datum)
Definition: list.c:322
#define COLOR_BLANK
Definition: trgm_regexp.c:286
static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1168
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
#define lfirst(lc)
Definition: pg_list.h:190
static bool validArcLabel(TrgmStateKey *key, TrgmColor co)
Definition: trgm_regexp.c:1314
TrgmPrefix prefix
Definition: trgm_regexp.c:309
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:285
void * palloc(Size size)
Definition: mcxt.c:949
int i

◆ addKeyToQueue()

static void addKeyToQueue ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1168 of file trgm_regexp.c.

References TrgmNFA::keysQueue, lappend(), and palloc().

Referenced by addKey().

1169 {
1170  TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
1171 
1172  memcpy(keyCopy, key, sizeof(TrgmStateKey));
1173  trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
1174 }
List * lappend(List *list, void *datum)
Definition: list.c:322
void * palloc(Size size)
Definition: mcxt.c:949
List * keysQueue
Definition: trgm_regexp.c:413

◆ colorTrgmInfoCmp()

static int colorTrgmInfoCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1878 of file trgm_regexp.c.

References ColorTrgmInfo::ctrgm.

Referenced by packGraph(), and selectColorTrigrams().

1879 {
1880  const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
1881  const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
1882 
1883  return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
1884 }
ColorTrgm ctrgm
Definition: trgm_regexp.c:374

◆ colorTrgmInfoPenaltyCmp()

static int colorTrgmInfoPenaltyCmp ( const void *  p1,
const void *  p2 
)
static

Definition at line 1891 of file trgm_regexp.c.

Referenced by selectColorTrigrams().

1892 {
1893  float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
1894  float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
1895 
1896  if (penalty1 < penalty2)
1897  return 1;
1898  else if (penalty1 == penalty2)
1899  return 0;
1900  else
1901  return -1;
1902 }
float float4
Definition: c.h:491

◆ convertPgWchar()

static bool convertPgWchar ( pg_wchar  c,
trgm_mb_char result 
)
static

Definition at line 836 of file trgm_regexp.c.

References trgm_mb_char::bytes, lowerstr(), MAX_MULTIBYTE_CHAR_LEN, pfree(), and pg_wchar2mb_with_len().

Referenced by getColorInfo().

837 {
838  /* "s" has enough space for a multibyte character and a trailing NUL */
839  char s[MAX_MULTIBYTE_CHAR_LEN + 1];
840 
841  /*
842  * We can ignore the NUL character, since it can never appear in a PG text
843  * string. This avoids the need for various special cases when
844  * reconstructing trigrams.
845  */
846  if (c == 0)
847  return false;
848 
849  /* Do the conversion, making sure the result is NUL-terminated */
850  memset(s, 0, sizeof(s));
851  pg_wchar2mb_with_len(&c, s, 1);
852 
853  /*
854  * In IGNORECASE mode, we can ignore uppercase characters. We assume that
855  * the regex engine generated both uppercase and lowercase equivalents
856  * within each color, since we used the REG_ICASE option; so there's no
857  * need to process the uppercase version.
858  *
859  * XXX this code is dependent on the assumption that lowerstr() works the
860  * same as the regex engine's internal case folding machinery. Might be
861  * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
862  * On the other hand, the trigrams in the index were created using
863  * lowerstr(), so we're probably screwed if there's any incompatibility
864  * anyway.
865  */
866 #ifdef IGNORECASE
867  {
868  char *lowerCased = lowerstr(s);
869 
870  if (strcmp(lowerCased, s) != 0)
871  {
872  pfree(lowerCased);
873  return false;
874  }
875  pfree(lowerCased);
876  }
877 #endif
878 
879  /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
880  memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
881  return true;
882 }
char * lowerstr(const char *str)
Definition: ts_locale.c:239
void pfree(void *pointer)
Definition: mcxt.c:1056
char * c
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:242
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:787

◆ createTrgmNFA()

TRGM* createTrgmNFA ( text text_re,
Oid  collation,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)

Definition at line 521 of file trgm_regexp.c.

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, createTrgmNFAInternal(), CurrentMemoryContext, MemoryContextDelete(), MemoryContextSwitchTo(), PG_END_TRY, PG_FINALLY, pg_regfree(), PG_TRY, RE_compile(), REG_ADVANCED, and REG_ICASE.

Referenced by gin_extract_query_trgm(), and gtrgm_consistent().

523 {
524  TRGM *trg;
525  regex_t regex;
526  MemoryContext tmpcontext;
527  MemoryContext oldcontext;
528 
529  /*
530  * This processing generates a great deal of cruft, which we'd like to
531  * clean up before returning (since this function may be called in a
532  * query-lifespan memory context). Make a temp context we can work in so
533  * that cleanup is easy.
534  */
536  "createTrgmNFA temporary context",
538  oldcontext = MemoryContextSwitchTo(tmpcontext);
539 
540  /*
541  * Stage 1: Compile the regexp into a NFA, using the regexp library.
542  */
543 #ifdef IGNORECASE
544  RE_compile(&regex, text_re, REG_ADVANCED | REG_ICASE, collation);
545 #else
546  RE_compile(&regex, text_re, REG_ADVANCED, collation);
547 #endif
548 
549  /*
550  * Since the regexp library allocates its internal data structures with
551  * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
552  * done even if there's an error.
553  */
554  PG_TRY();
555  {
556  trg = createTrgmNFAInternal(&regex, graph, rcontext);
557  }
558  PG_FINALLY();
559  {
560  pg_regfree(&regex);
561  }
562  PG_END_TRY();
563 
564  /* Clean up all the cruft we created */
565  MemoryContextSwitchTo(oldcontext);
566  MemoryContextDelete(tmpcontext);
567 
568  return trg;
569 }
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
static TRGM * createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:575
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define REG_ICASE
Definition: regex.h:106
Definition: trgm.h:66
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
#define REG_ADVANCED
Definition: regex.h:103
#define PG_FINALLY()
Definition: elog.h:339
static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
Definition: trgm_regexp.c:729
#define PG_TRY()
Definition: elog.h:322
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: regex.h:55
#define PG_END_TRY()
Definition: elog.h:347

◆ createTrgmNFAInternal()

static TRGM * createTrgmNFAInternal ( regex_t regex,
TrgmPackedGraph **  graph,
MemoryContext  rcontext 
)
static

Definition at line 575 of file trgm_regexp.c.

References TrgmNFA::colorInfo, expandColorTrigrams(), TrgmState::flags, getColorInfo(), TrgmNFA::initState, TrgmNFA::ncolors, packGraph(), TrgmNFA::regex, selectColorTrigrams(), transformGraph(), and TSTATE_FIN.

Referenced by createTrgmNFA().

577 {
578  TRGM *trg;
579  TrgmNFA trgmNFA;
580 
581  trgmNFA.regex = regex;
582 
583  /* Collect color information from the regex */
584  getColorInfo(regex, &trgmNFA);
585 
586 #ifdef TRGM_REGEXP_DEBUG
587  printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
588 #endif
589 
590  /*
591  * Stage 2: Create an expanded graph from the source NFA.
592  */
593  transformGraph(&trgmNFA);
594 
595 #ifdef TRGM_REGEXP_DEBUG
596  printTrgmNFA(&trgmNFA);
597 #endif
598 
599  /*
600  * Fail if we were unable to make a nontrivial graph, ie it is possible to
601  * get from the initial state to the final state without reading any
602  * predictable trigram.
603  */
604  if (trgmNFA.initState->flags & TSTATE_FIN)
605  return NULL;
606 
607  /*
608  * Stage 3: Select color trigrams to expand. Fail if too many trigrams.
609  */
610  if (!selectColorTrigrams(&trgmNFA))
611  return NULL;
612 
613  /*
614  * Stage 4: Expand color trigrams and pack graph into final
615  * representation.
616  */
617  trg = expandColorTrigrams(&trgmNFA, rcontext);
618 
619  *graph = packGraph(&trgmNFA, rcontext);
620 
621 #ifdef TRGM_REGEXP_DEBUG
622  printTrgmPackedGraph(*graph, trg);
623 #endif
624 
625  return trg;
626 }
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:773
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
regex_t * regex
Definition: trgm_regexp.c:402
TrgmState * initState
Definition: trgm_regexp.c:408
#define TSTATE_FIN
Definition: trgm_regexp.c:329
static bool selectColorTrigrams(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:1442
Definition: trgm.h:66
static TrgmPackedGraph * packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1916
static TRGM * expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
Definition: trgm_regexp.c:1761
static void transformGraph(TrgmNFA *trgmNFA)
Definition: trgm_regexp.c:902
int ncolors
Definition: trgm_regexp.c:404

◆ expandColorTrigrams()

static TRGM * expandColorTrigrams ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1761 of file trgm_regexp.c.

References ARRKEY, trgm_mb_char::bytes, CALCGTSIZE, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, fillTrgm(), TRGM::flag, GETARR, i, MemoryContextAllocZero(), SET_VARSIZE, TrgmNFA::totalTrgmCount, TRGMHDRSIZE, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

1762 {
1763  TRGM *trg;
1764  trgm *p;
1765  int i;
1766  TrgmColorInfo blankColor;
1767  trgm_mb_char blankChar;
1768 
1769  /* Set up "blank" color structure containing a single zero character */
1770  memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
1771  blankColor.wordCharsCount = 1;
1772  blankColor.wordChars = &blankChar;
1773 
1774  /* Construct the trgm array */
1775  trg = (TRGM *)
1776  MemoryContextAllocZero(rcontext,
1777  TRGMHDRSIZE +
1778  trgmNFA->totalTrgmCount * sizeof(trgm));
1779  trg->flag = ARRKEY;
1780  SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
1781  p = GETARR(trg);
1782  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1783  {
1784  ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
1785  TrgmColorInfo *c[3];
1786  trgm_mb_char s[3];
1787  int j,
1788  i1,
1789  i2,
1790  i3;
1791 
1792  /* Ignore any unexpanded trigrams ... */
1793  if (!colorTrgm->expanded)
1794  continue;
1795 
1796  /* Get colors, substituting the dummy struct for COLOR_BLANK */
1797  for (j = 0; j < 3; j++)
1798  {
1799  if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
1800  c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
1801  else
1802  c[j] = &blankColor;
1803  }
1804 
1805  /* Iterate over all possible combinations of colors' characters */
1806  for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
1807  {
1808  s[0] = c[0]->wordChars[i1];
1809  for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
1810  {
1811  s[1] = c[1]->wordChars[i2];
1812  for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
1813  {
1814  s[2] = c[2]->wordChars[i3];
1815  fillTrgm(p, s);
1816  p++;
1817  }
1818  }
1819  }
1820  }
1821 
1822  return trg;
1823 }
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
TrgmColor colors[3]
Definition: trgm_regexp.c:299
#define ARRKEY
Definition: trgm.h:97
int totalTrgmCount
Definition: trgm_regexp.c:420
#define GETARR(x)
Definition: trgm.h:107
Definition: trgm.h:66
char * c
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:418
#define TRGMHDRSIZE
Definition: trgm.h:73
#define COLOR_BLANK
Definition: trgm_regexp.c:286
uint8 flag
Definition: trgm.h:69
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:839
int colorTrgmsCount
Definition: trgm_regexp.c:419
char trgm[3]
Definition: trgm.h:41
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:48
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:242
int i
ColorTrgm ctrgm
Definition: trgm_regexp.c:374
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
trgm_mb_char * wordChars
Definition: trgm_regexp.c:263
static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
Definition: trgm_regexp.c:1829

◆ fillTrgm()

static void fillTrgm ( trgm ptrgm,
trgm_mb_char  s[3] 
)
static

Definition at line 1829 of file trgm_regexp.c.

References generate_unaccent_rules::bytes(), trgm_mb_char::bytes, compact_trigram(), i, MAX_MULTIBYTE_CHAR_LEN, and generate_unaccent_rules::str.

Referenced by expandColorTrigrams().

1830 {
1831  char str[3 * MAX_MULTIBYTE_CHAR_LEN],
1832  *p;
1833  int i,
1834  j;
1835 
1836  /* Write multibyte string into "str" (we don't need null termination) */
1837  p = str;
1838 
1839  for (i = 0; i < 3; i++)
1840  {
1841  if (s[i].bytes[0] != 0)
1842  {
1843  for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
1844  *p++ = s[i].bytes[j];
1845  }
1846  else
1847  {
1848  /* Emit a space in place of COLOR_BLANK */
1849  *p++ = ' ';
1850  }
1851  }
1852 
1853  /* Convert "str" to a standard trigram (possibly hashing it) */
1854  compact_trigram(ptrgm, str, p - str);
1855 }
def bytes(source, encoding='ascii', errors='strict')
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:30
void compact_trigram(trgm *tptr, char *str, int bytelen)
Definition: trgm_op.c:198
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:242
int i

◆ getColorInfo()

static void getColorInfo ( regex_t regex,
TrgmNFA trgmNFA 
)
static

Definition at line 773 of file trgm_regexp.c.

References trgm_mb_char::bytes, chars, COLOR_COUNT_LIMIT, TrgmNFA::colorInfo, TrgmColorInfo::containsNonWord, convertPgWchar(), TrgmColorInfo::expandable, i, ISWORDCHR, TrgmNFA::ncolors, palloc(), palloc0(), pfree(), pg_reg_getcharacters(), pg_reg_getnumcharacters(), pg_reg_getnumcolors(), TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

774 {
775  int colorsCount = pg_reg_getnumcolors(regex);
776  int i;
777 
778  trgmNFA->ncolors = colorsCount;
779  trgmNFA->colorInfo = (TrgmColorInfo *)
780  palloc0(colorsCount * sizeof(TrgmColorInfo));
781 
782  /*
783  * Loop over colors, filling TrgmColorInfo about each.
784  */
785  for (i = 0; i < colorsCount; i++)
786  {
787  TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
788  int charsCount = pg_reg_getnumcharacters(regex, i);
789  pg_wchar *chars;
790  int j;
791 
792  if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
793  {
794  /* Non expandable, or too large to work with */
795  colorInfo->expandable = false;
796  continue;
797  }
798 
799  colorInfo->expandable = true;
800  colorInfo->containsNonWord = false;
801  colorInfo->wordChars = (trgm_mb_char *)
802  palloc(sizeof(trgm_mb_char) * charsCount);
803  colorInfo->wordCharsCount = 0;
804 
805  /* Extract all the chars in this color */
806  chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
807  pg_reg_getcharacters(regex, i, chars, charsCount);
808 
809  /*
810  * Convert characters back to multibyte form, and save only those that
811  * are word characters. Set "containsNonWord" if any non-word
812  * character. (Note: it'd probably be nicer to keep the chars in
813  * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
814  */
815  for (j = 0; j < charsCount; j++)
816  {
817  trgm_mb_char c;
818 
819  if (!convertPgWchar(chars[j], &c))
820  continue; /* ok to ignore it altogether */
821  if (ISWORDCHR(c.bytes))
822  colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
823  else
824  colorInfo->containsNonWord = true;
825  }
826 
827  pfree(chars);
828  }
829 }
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:229
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:265
#define ISWORDCHR(c)
Definition: trgm.h:54
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
bool containsNonWord
Definition: trgm_regexp.c:261
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result)
Definition: trgm_regexp.c:836
void pfree(void *pointer)
Definition: mcxt.c:1056
char * c
#define COLOR_COUNT_LIMIT
Definition: trgm_regexp.c:222
unsigned int pg_wchar
Definition: mbprint.c:31
void * palloc0(Size size)
Definition: mcxt.c:980
char bytes[MAX_MULTIBYTE_CHAR_LEN]
Definition: trgm_regexp.c:242
void * palloc(Size size)
Definition: mcxt.c:949
int i
trgm_mb_char * wordChars
Definition: trgm_regexp.c:263
static char chars[TZ_MAX_CHARS]
Definition: zic.c:395
int ncolors
Definition: trgm_regexp.c:404

◆ getState()

static TrgmState * getState ( TrgmNFA trgmNFA,
TrgmStateKey key 
)
static

Definition at line 1369 of file trgm_regexp.c.

References TrgmState::arcs, TrgmState::enterKeys, TrgmState::flags, HASH_ENTER, hash_search(), lappend(), NIL, TrgmNFA::nstates, TrgmState::parent, TrgmNFA::queue, TrgmState::snumber, TrgmNFA::states, TrgmState::tentFlags, and TrgmState::tentParent.

Referenced by addArc(), and transformGraph().

1370 {
1371  TrgmState *state;
1372  bool found;
1373 
1374  state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
1375  &found);
1376  if (!found)
1377  {
1378  /* New state: initialize and queue it */
1379  state->arcs = NIL;
1380  state->enterKeys = NIL;
1381  state->flags = 0;
1382  /* states are initially given negative numbers */
1383  state->snumber = -(++trgmNFA->nstates);
1384  state->parent = NULL;
1385  state->tentFlags = 0;
1386  state->tentParent = NULL;
1387 
1388  trgmNFA->queue = lappend(trgmNFA->queue, state);
1389  }
1390  return state;
1391 }
#define NIL
Definition: pg_list.h:65
struct TrgmState * tentParent
Definition: trgm_regexp.c:340
HTAB * states
Definition: trgm_regexp.c:407
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:906
int nstates
Definition: trgm_regexp.c:409
List * enterKeys
Definition: trgm_regexp.c:335
List * arcs
Definition: trgm_regexp.c:334
List * lappend(List *list, void *datum)
Definition: list.c:322
List * queue
Definition: trgm_regexp.c:412
int tentFlags
Definition: trgm_regexp.c:339
Definition: regguts.h:298
struct TrgmState * parent
Definition: trgm_regexp.c:338

◆ mergeStates()

static void mergeStates ( TrgmState state1,
TrgmState state2 
)
static

Definition at line 1861 of file trgm_regexp.c.

References Assert, TrgmState::flags, and TrgmState::parent.

Referenced by selectColorTrigrams().

1862 {
1863  Assert(state1 != state2);
1864  Assert(!state1->parent);
1865  Assert(!state2->parent);
1866 
1867  /* state1 absorbs state2's flags */
1868  state1->flags |= state2->flags;
1869 
1870  /* state2, and indirectly all its children, become children of state1 */
1871  state2->parent = state1;
1872 }
#define Assert(condition)
Definition: c.h:739
struct TrgmState * parent
Definition: trgm_regexp.c:338

◆ packArcInfoCmp()

static int packArcInfoCmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 2071 of file trgm_regexp.c.

References appendStringInfo(), appendStringInfoChar(), appendStringInfoString(), TrgmState::arcs, TrgmPackedState::arcs, TrgmPackedState::arcsCount, buf, trgm_mb_char::bytes, COLOR_BLANK, COLOR_UNKNOWN, ColorTrgm::colors, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsCount, TrgmArc::ctrgm, StringInfoData::data, TrgmColorInfo::expandable, TrgmState::flags, fprintf, GETARR, hash_seq_init(), hash_seq_search(), i, initStringInfo(), lfirst, MAX_MULTIBYTE_CHAR_LEN, TrgmStateKey::nstate, palloc(), pfree(), pg_reg_getfinalstate(), pg_reg_getinitialstate(), pg_reg_getnumoutarcs(), pg_reg_getnumstates(), pg_reg_getoutarcs(), TrgmState::snumber, TrgmPackArcInfo::sourceState, TrgmState::stateKey, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesCount, TrgmArc::target, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, TSTATE_INIT, TrgmColorInfo::wordChars, and TrgmColorInfo::wordCharsCount.

Referenced by packGraph().

2072 {
2073  const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
2074  const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
2075 
2076  if (p1->sourceState < p2->sourceState)
2077  return -1;
2078  if (p1->sourceState > p2->sourceState)
2079  return 1;
2080  if (p1->colorTrgm < p2->colorTrgm)
2081  return -1;
2082  if (p1->colorTrgm > p2->colorTrgm)
2083  return 1;
2084  if (p1->targetState < p2->targetState)
2085  return -1;
2086  if (p1->targetState > p2->targetState)
2087  return 1;
2088  return 0;
2089 }
static const FormData_pg_attribute a2
Definition: heap.c:165
static const FormData_pg_attribute a1
Definition: heap.c:151

◆ packGraph()

static TrgmPackedGraph * packGraph ( TrgmNFA trgmNFA,
MemoryContext  rcontext 
)
static

Definition at line 1916 of file trgm_regexp.c.

References TrgmState::arcs, TrgmPackedState::arcs, TrgmNFA::arcsCount, TrgmPackedState::arcsCount, Assert, ColorTrgmInfo::cnumber, TrgmPackedArc::colorTrgm, TrgmPackArcInfo::colorTrgm, colorTrgmInfoCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::expanded, TrgmState::flags, hash_seq_init(), hash_seq_search(), i, lfirst, MemoryContextAlloc(), packArcInfoCmp(), palloc(), TrgmState::parent, qsort, TrgmState::snumber, TrgmPackArcInfo::sourceState, TrgmNFA::states, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, TrgmArc::target, TrgmPackedArc::targetState, TrgmPackArcInfo::targetState, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

1917 {
1918  int snumber = 2,
1919  arcIndex,
1920  arcsCount;
1921  HASH_SEQ_STATUS scan_status;
1922  TrgmState *state;
1923  TrgmPackArcInfo *arcs,
1924  *p1,
1925  *p2;
1926  TrgmPackedArc *packedArcs;
1927  TrgmPackedGraph *result;
1928  int i,
1929  j;
1930 
1931  /* Enumerate surviving states, giving init and fin reserved numbers */
1932  hash_seq_init(&scan_status, trgmNFA->states);
1933  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1934  {
1935  while (state->parent)
1936  state = state->parent;
1937 
1938  if (state->snumber < 0)
1939  {
1940  if (state->flags & TSTATE_INIT)
1941  state->snumber = 0;
1942  else if (state->flags & TSTATE_FIN)
1943  state->snumber = 1;
1944  else
1945  {
1946  state->snumber = snumber;
1947  snumber++;
1948  }
1949  }
1950  }
1951 
1952  /* Collect array of all arcs */
1953  arcs = (TrgmPackArcInfo *)
1954  palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
1955  arcIndex = 0;
1956  hash_seq_init(&scan_status, trgmNFA->states);
1957  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1958  {
1959  TrgmState *source = state;
1960  ListCell *cell;
1961 
1962  while (source->parent)
1963  source = source->parent;
1964 
1965  foreach(cell, state->arcs)
1966  {
1967  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1968  TrgmState *target = arc->target;
1969 
1970  while (target->parent)
1971  target = target->parent;
1972 
1973  if (source->snumber != target->snumber)
1974  {
1975  ColorTrgmInfo *ctrgm;
1976 
1977  ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
1978  trgmNFA->colorTrgms,
1979  trgmNFA->colorTrgmsCount,
1980  sizeof(ColorTrgmInfo),
1982  Assert(ctrgm != NULL);
1983  Assert(ctrgm->expanded);
1984 
1985  arcs[arcIndex].sourceState = source->snumber;
1986  arcs[arcIndex].targetState = target->snumber;
1987  arcs[arcIndex].colorTrgm = ctrgm->cnumber;
1988  arcIndex++;
1989  }
1990  }
1991  }
1992 
1993  /* Sort arcs to ease duplicate detection */
1994  qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
1995 
1996  /* We could have duplicates because states were merged. Remove them. */
1997  /* p1 is probe point, p2 is last known non-duplicate. */
1998  p2 = arcs;
1999  for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
2000  {
2001  if (packArcInfoCmp(p1, p2) > 0)
2002  {
2003  p2++;
2004  *p2 = *p1;
2005  }
2006  }
2007  arcsCount = (p2 - arcs) + 1;
2008 
2009  /* Create packed representation */
2010  result = (TrgmPackedGraph *)
2011  MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
2012 
2013  /* Pack color trigrams information */
2014  result->colorTrigramsCount = 0;
2015  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2016  {
2017  if (trgmNFA->colorTrgms[i].expanded)
2018  result->colorTrigramsCount++;
2019  }
2020  result->colorTrigramGroups = (int *)
2021  MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
2022  j = 0;
2023  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
2024  {
2025  if (trgmNFA->colorTrgms[i].expanded)
2026  {
2027  result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
2028  j++;
2029  }
2030  }
2031 
2032  /* Pack states and arcs information */
2033  result->statesCount = snumber;
2034  result->states = (TrgmPackedState *)
2035  MemoryContextAlloc(rcontext, snumber * sizeof(TrgmPackedState));
2036  packedArcs = (TrgmPackedArc *)
2037  MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
2038  j = 0;
2039  for (i = 0; i < snumber; i++)
2040  {
2041  int cnt = 0;
2042 
2043  result->states[i].arcs = &packedArcs[j];
2044  while (j < arcsCount && arcs[j].sourceState == i)
2045  {
2046  packedArcs[j].targetState = arcs[j].targetState;
2047  packedArcs[j].colorTrgm = arcs[j].colorTrgm;
2048  cnt++;
2049  j++;
2050  }
2051  result->states[i].arcsCount = cnt;
2052  }
2053 
2054  /* Allocate working memory for trigramsMatchGraph() */
2055  result->colorTrigramsActive = (bool *)
2056  MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
2057  result->statesActive = (bool *)
2058  MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
2059  result->statesQueue = (int *)
2060  MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
2061 
2062  return result;
2063 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:461
ColorTrgm ctrgm
Definition: trgm_regexp.c:348
Definition: regguts.h:276
HTAB * states
Definition: trgm_regexp.c:407
TrgmPackedState * states
Definition: trgm_regexp.c:458
#define TSTATE_FIN
Definition: trgm_regexp.c:329
#define TSTATE_INIT
Definition: trgm_regexp.c:328
TrgmPackedArc * arcs
Definition: trgm_regexp.c:435
static int packArcInfoCmp(const void *a1, const void *a2)
Definition: trgm_regexp.c:2071
bool * statesActive
Definition: trgm_regexp.c:462
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:418
TrgmState * target
Definition: trgm_regexp.c:349
List * arcs
Definition: trgm_regexp.c:334
int arcsCount
Definition: trgm_regexp.c:414
int colorTrgmsCount
Definition: trgm_regexp.c:419
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
Definition: regguts.h:298
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1389
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1379
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1878
struct TrgmState * parent
Definition: trgm_regexp.c:338
void * palloc(Size size)
Definition: mcxt.c:949
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
int i
#define qsort(a, b, c, d)
Definition: port.h:491
int * colorTrigramGroups
Definition: trgm_regexp.c:451

◆ prefixContains()

static bool prefixContains ( TrgmPrefix prefix1,
TrgmPrefix prefix2 
)
static

Definition at line 1400 of file trgm_regexp.c.

References COLOR_UNKNOWN, and TrgmPrefix::colors.

Referenced by addArc(), and addKey().

1401 {
1402  if (prefix1->colors[1] == COLOR_UNKNOWN)
1403  {
1404  /* Fully ambiguous prefix contains everything */
1405  return true;
1406  }
1407  else if (prefix1->colors[0] == COLOR_UNKNOWN)
1408  {
1409  /*
1410  * Prefix with only first unknown color contains every prefix with
1411  * same second color.
1412  */
1413  if (prefix1->colors[1] == prefix2->colors[1])
1414  return true;
1415  else
1416  return false;
1417  }
1418  else
1419  {
1420  /* Exact prefix contains only the exact same prefix */
1421  if (prefix1->colors[0] == prefix2->colors[0] &&
1422  prefix1->colors[1] == prefix2->colors[1])
1423  return true;
1424  else
1425  return false;
1426  }
1427 }
TrgmColor colors[2]
Definition: trgm_regexp.c:290
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:285

◆ processState()

static void processState ( TrgmNFA trgmNFA,
TrgmState state 
)
static

Definition at line 964 of file trgm_regexp.c.

References addArcs(), addKey(), TrgmState::flags, sort-test::key, TrgmNFA::keysQueue, linitial, list_delete_first(), NIL, TrgmState::stateKey, and TSTATE_FIN.

Referenced by transformGraph().

965 {
966  /* keysQueue should be NIL already, but make sure */
967  trgmNFA->keysQueue = NIL;
968 
969  /*
970  * Add state's own key, and then process all keys added to keysQueue until
971  * queue is empty. But we can quit if the state gets marked final.
972  */
973  addKey(trgmNFA, state, &state->stateKey);
974  while (trgmNFA->keysQueue != NIL && !(state->flags & TSTATE_FIN))
975  {
977 
978  trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
979  addKey(trgmNFA, state, key);
980  }
981 
982  /*
983  * Add outgoing arcs only if state isn't final (we have no interest in
984  * outgoing arcs if we already match)
985  */
986  if (!(state->flags & TSTATE_FIN))
987  addArcs(trgmNFA, state);
988 }
#define NIL
Definition: pg_list.h:65
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
Definition: trgm_regexp.c:1007
#define TSTATE_FIN
Definition: trgm_regexp.c:329
#define linitial(l)
Definition: pg_list.h:195
TrgmStateKey stateKey
Definition: trgm_regexp.c:333
static void addArcs(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:1180
List * keysQueue
Definition: trgm_regexp.c:413
List * list_delete_first(List *list)
Definition: list.c:861

◆ RE_compile()

static void RE_compile ( regex_t regex,
text text_re,
int  cflags,
Oid  collation 
)
static

Definition at line 729 of file trgm_regexp.c.

References ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by createTrgmNFA().

730 {
731  int text_re_len = VARSIZE_ANY_EXHDR(text_re);
732  char *text_re_val = VARDATA_ANY(text_re);
733  pg_wchar *pattern;
734  int pattern_len;
735  int regcomp_result;
736  char errMsg[100];
737 
738  /* Convert pattern string to wide characters */
739  pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
740  pattern_len = pg_mb2wchar_with_len(text_re_val,
741  pattern,
742  text_re_len);
743 
744  /* Compile regex */
745  regcomp_result = pg_regcomp(regex,
746  pattern,
747  pattern_len,
748  cflags,
749  collation);
750 
751  pfree(pattern);
752 
753  if (regcomp_result != REG_OKAY)
754  {
755  /* re didn't compile (no need for pg_regfree, if so) */
756  pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
757  ereport(ERROR,
758  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
759  errmsg("invalid regular expression: %s", errMsg)));
760  }
761 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
int errcode(int sqlerrcode)
Definition: elog.c:608
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:313
void pfree(void *pointer)
Definition: mcxt.c:1056
#define REG_OKAY
Definition: regex.h:137
#define ERROR
Definition: elog.h:43
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define ereport(elevel, rest)
Definition: elog.h:141
unsigned int pg_wchar
Definition: mbprint.c:31
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:765
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822

◆ selectColorTrigrams()

static bool selectColorTrigrams ( TrgmNFA trgmNFA)
static

Definition at line 1442 of file trgm_regexp.c.

References TrgmState::arcs, ColorTrgmInfo::arcs, TrgmNFA::arcsCount, Assert, ColorTrgmInfo::cnumber, COLOR_BLANK, TrgmNFA::colorInfo, ColorTrgm::colors, colorTrgmInfoCmp(), colorTrgmInfoPenaltyCmp(), TrgmNFA::colorTrgms, TrgmNFA::colorTrgmsCount, ColorTrgmInfo::count, TrgmArc::ctrgm, ColorTrgmInfo::ctrgm, ColorTrgmInfo::expanded, TrgmState::flags, fprintf, hash_seq_init(), hash_seq_search(), i, lfirst, list_concat(), list_length(), list_make1, MAX_TRGM_COUNT, mergeStates(), palloc(), palloc0(), TrgmState::parent, penalties, ColorTrgmInfo::penalty, qsort, TrgmState::snumber, TrgmArcInfo::source, TrgmNFA::states, TrgmArc::target, TrgmArcInfo::target, TrgmState::tentFlags, TrgmState::tentParent, TrgmNFA::totalTrgmCount, TSTATE_FIN, TSTATE_INIT, WISH_TRGM_PENALTY, and TrgmColorInfo::wordCharsCount.

Referenced by createTrgmNFAInternal().

1443 {
1444  HASH_SEQ_STATUS scan_status;
1445  int arcsCount = trgmNFA->arcsCount,
1446  i;
1447  TrgmState *state;
1448  ColorTrgmInfo *colorTrgms;
1449  int64 totalTrgmCount;
1450  float4 totalTrgmPenalty;
1451  int cnumber;
1452 
1453  /* Collect color trigrams from all arcs */
1454  colorTrgms = (ColorTrgmInfo *) palloc0(sizeof(ColorTrgmInfo) * arcsCount);
1455  trgmNFA->colorTrgms = colorTrgms;
1456 
1457  i = 0;
1458  hash_seq_init(&scan_status, trgmNFA->states);
1459  while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
1460  {
1461  ListCell *cell;
1462 
1463  foreach(cell, state->arcs)
1464  {
1465  TrgmArc *arc = (TrgmArc *) lfirst(cell);
1466  TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
1467  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1468 
1469  arcInfo->source = state;
1470  arcInfo->target = arc->target;
1471  trgmInfo->ctrgm = arc->ctrgm;
1472  trgmInfo->cnumber = -1;
1473  /* count and penalty will be set below */
1474  trgmInfo->expanded = true;
1475  trgmInfo->arcs = list_make1(arcInfo);
1476  i++;
1477  }
1478  }
1479  Assert(i == arcsCount);
1480 
1481  /* Remove duplicates, merging their arcs lists */
1482  if (arcsCount >= 2)
1483  {
1484  ColorTrgmInfo *p1,
1485  *p2;
1486 
1487  /* Sort trigrams to ease duplicate detection */
1488  qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
1489 
1490  /* p1 is probe point, p2 is last known non-duplicate. */
1491  p2 = colorTrgms;
1492  for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
1493  {
1494  if (colorTrgmInfoCmp(p1, p2) > 0)
1495  {
1496  p2++;
1497  *p2 = *p1;
1498  }
1499  else
1500  {
1501  p2->arcs = list_concat(p2->arcs, p1->arcs);
1502  }
1503  }
1504  trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
1505  }
1506  else
1507  {
1508  trgmNFA->colorTrgmsCount = arcsCount;
1509  }
1510 
1511  /*
1512  * Count number of simple trigrams generated by each color trigram, and
1513  * also compute a penalty value, which is the number of simple trigrams
1514  * times a multiplier that depends on its whitespace content.
1515  *
1516  * Note: per-color-trigram counts cannot overflow an int so long as
1517  * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
1518  * 1290. However, the grand total totalTrgmCount might conceivably
1519  * overflow an int, so we use int64 for that within this routine. Also,
1520  * penalties are calculated in float4 arithmetic to avoid any overflow
1521  * worries.
1522  */
1523  totalTrgmCount = 0;
1524  totalTrgmPenalty = 0.0f;
1525  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1526  {
1527  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1528  int j,
1529  count = 1,
1530  typeIndex = 0;
1531 
1532  for (j = 0; j < 3; j++)
1533  {
1534  TrgmColor c = trgmInfo->ctrgm.colors[j];
1535 
1536  typeIndex *= 2;
1537  if (c == COLOR_BLANK)
1538  typeIndex++;
1539  else
1540  count *= trgmNFA->colorInfo[c].wordCharsCount;
1541  }
1542  trgmInfo->count = count;
1543  totalTrgmCount += count;
1544  trgmInfo->penalty = penalties[typeIndex] * (float4) count;
1545  totalTrgmPenalty += trgmInfo->penalty;
1546  }
1547 
1548  /* Sort color trigrams in descending order of their penalties */
1549  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1551 
1552  /*
1553  * Remove color trigrams from the graph so long as total penalty of color
1554  * trigrams exceeds WISH_TRGM_PENALTY. (If we fail to get down to
1555  * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
1556  * MAX_TRGM_COUNT.) We prefer to remove color trigrams with higher
1557  * penalty, since those are the most promising for reducing the total
1558  * penalty. When removing a color trigram we have to merge states
1559  * connected by arcs labeled with that trigram. It's necessary to not
1560  * merge initial and final states, because our graph becomes useless if
1561  * that happens; so we cannot always remove the trigram we'd prefer to.
1562  */
1563  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1564  {
1565  ColorTrgmInfo *trgmInfo = &colorTrgms[i];
1566  bool canRemove = true;
1567  ListCell *cell;
1568 
1569  /* Done if we've reached the target */
1570  if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
1571  break;
1572 
1573 #ifdef TRGM_REGEXP_DEBUG
1574  fprintf(stderr, "considering ctrgm %d %d %d, penalty %f, %d arcs\n",
1575  trgmInfo->ctrgm.colors[0],
1576  trgmInfo->ctrgm.colors[1],
1577  trgmInfo->ctrgm.colors[2],
1578  trgmInfo->penalty,
1579  list_length(trgmInfo->arcs));
1580 #endif
1581 
1582  /*
1583  * Does any arc of this color trigram connect initial and final
1584  * states? If so we can't remove it.
1585  */
1586  foreach(cell, trgmInfo->arcs)
1587  {
1588  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1589  TrgmState *source = arcInfo->source,
1590  *target = arcInfo->target;
1591  int source_flags,
1592  target_flags;
1593 
1594 #ifdef TRGM_REGEXP_DEBUG
1595  fprintf(stderr, "examining arc to s%d (%x) from s%d (%x)\n",
1596  -target->snumber, target->flags,
1597  -source->snumber, source->flags);
1598 #endif
1599 
1600  /* examine parent states, if any merging has already happened */
1601  while (source->parent)
1602  source = source->parent;
1603  while (target->parent)
1604  target = target->parent;
1605 
1606 #ifdef TRGM_REGEXP_DEBUG
1607  fprintf(stderr, " ... after completed merges: to s%d (%x) from s%d (%x)\n",
1608  -target->snumber, target->flags,
1609  -source->snumber, source->flags);
1610 #endif
1611 
1612  /* we must also consider merges we are planning right now */
1613  source_flags = source->flags | source->tentFlags;
1614  while (source->tentParent)
1615  {
1616  source = source->tentParent;
1617  source_flags |= source->flags | source->tentFlags;
1618  }
1619  target_flags = target->flags | target->tentFlags;
1620  while (target->tentParent)
1621  {
1622  target = target->tentParent;
1623  target_flags |= target->flags | target->tentFlags;
1624  }
1625 
1626 #ifdef TRGM_REGEXP_DEBUG
1627  fprintf(stderr, " ... after tentative merges: to s%d (%x) from s%d (%x)\n",
1628  -target->snumber, target_flags,
1629  -source->snumber, source_flags);
1630 #endif
1631 
1632  /* would fully-merged state have both INIT and FIN set? */
1633  if (((source_flags | target_flags) & (TSTATE_INIT | TSTATE_FIN)) ==
1634  (TSTATE_INIT | TSTATE_FIN))
1635  {
1636  canRemove = false;
1637  break;
1638  }
1639 
1640  /* ok so far, so remember planned merge */
1641  if (source != target)
1642  {
1643 #ifdef TRGM_REGEXP_DEBUG
1644  fprintf(stderr, " ... tentatively merging s%d into s%d\n",
1645  -target->snumber, -source->snumber);
1646 #endif
1647  target->tentParent = source;
1648  source->tentFlags |= target_flags;
1649  }
1650  }
1651 
1652  /*
1653  * We must reset all the tentFlags/tentParent fields before
1654  * continuing. tentFlags could only have become set in states that
1655  * are the source or parent or tentative parent of one of the current
1656  * arcs; likewise tentParent could only have become set in states that
1657  * are the target or parent or tentative parent of one of the current
1658  * arcs. There might be some overlap between those sets, but if we
1659  * clear tentFlags in target states as well as source states, we
1660  * should be okay even if we visit a state as target before visiting
1661  * it as a source.
1662  */
1663  foreach(cell, trgmInfo->arcs)
1664  {
1665  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1666  TrgmState *source = arcInfo->source,
1667  *target = arcInfo->target;
1668  TrgmState *ttarget;
1669 
1670  /* no need to touch previously-merged states */
1671  while (source->parent)
1672  source = source->parent;
1673  while (target->parent)
1674  target = target->parent;
1675 
1676  while (source)
1677  {
1678  source->tentFlags = 0;
1679  source = source->tentParent;
1680  }
1681 
1682  while ((ttarget = target->tentParent) != NULL)
1683  {
1684  target->tentParent = NULL;
1685  target->tentFlags = 0; /* in case it was also a source */
1686  target = ttarget;
1687  }
1688  }
1689 
1690  /* Now, move on if we can't drop this trigram */
1691  if (!canRemove)
1692  {
1693 #ifdef TRGM_REGEXP_DEBUG
1694  fprintf(stderr, " ... not ok to merge\n");
1695 #endif
1696  continue;
1697  }
1698 
1699  /* OK, merge states linked by each arc labeled by the trigram */
1700  foreach(cell, trgmInfo->arcs)
1701  {
1702  TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
1703  TrgmState *source = arcInfo->source,
1704  *target = arcInfo->target;
1705 
1706  while (source->parent)
1707  source = source->parent;
1708  while (target->parent)
1709  target = target->parent;
1710  if (source != target)
1711  {
1712 #ifdef TRGM_REGEXP_DEBUG
1713  fprintf(stderr, "merging s%d into s%d\n",
1714  -target->snumber, -source->snumber);
1715 #endif
1716  mergeStates(source, target);
1717  /* Assert we didn't merge initial and final states */
1718  Assert((source->flags & (TSTATE_INIT | TSTATE_FIN)) !=
1719  (TSTATE_INIT | TSTATE_FIN));
1720  }
1721  }
1722 
1723  /* Mark trigram unexpanded, and update totals */
1724  trgmInfo->expanded = false;
1725  totalTrgmCount -= trgmInfo->count;
1726  totalTrgmPenalty -= trgmInfo->penalty;
1727  }
1728 
1729  /* Did we succeed in fitting into MAX_TRGM_COUNT? */
1730  if (totalTrgmCount > MAX_TRGM_COUNT)
1731  return false;
1732 
1733  trgmNFA->totalTrgmCount = (int) totalTrgmCount;
1734 
1735  /*
1736  * Sort color trigrams by colors (will be useful for bsearch in packGraph)
1737  * and enumerate the color trigrams that are expanded.
1738  */
1739  cnumber = 0;
1740  qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
1742  for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
1743  {
1744  if (colorTrgms[i].expanded)
1745  {
1746  colorTrgms[i].cnumber = cnumber;
1747  cnumber++;
1748  }
1749  }
1750 
1751  return true;
1752 }
static void mergeStates(TrgmState *state1, TrgmState *state2)
Definition: trgm_regexp.c:1861
ColorTrgm ctrgm
Definition: trgm_regexp.c:348
struct TrgmState * tentParent
Definition: trgm_regexp.c:340
TrgmColorInfo * colorInfo
Definition: trgm_regexp.c:403
float4 penalty
Definition: trgm_regexp.c:377
Definition: regguts.h:276
TrgmColor colors[3]
Definition: trgm_regexp.c:299
HTAB * states
Definition: trgm_regexp.c:407
#define WISH_TRGM_PENALTY
Definition: trgm_regexp.c:221
List * list_concat(List *list1, const List *list2)
Definition: list.c:516
#define TSTATE_FIN
Definition: trgm_regexp.c:329
#define TSTATE_INIT
Definition: trgm_regexp.c:328
int totalTrgmCount
Definition: trgm_regexp.c:420
#define fprintf
Definition: port.h:196
#define list_make1(x1)
Definition: pg_list.h:227
char * c
TrgmState * target
Definition: trgm_regexp.c:360
ColorTrgmInfo * colorTrgms
Definition: trgm_regexp.c:418
TrgmState * target
Definition: trgm_regexp.c:349
List * arcs
Definition: trgm_regexp.c:334
TrgmState * source
Definition: trgm_regexp.c:359
int arcsCount
Definition: trgm_regexp.c:414
#define COLOR_BLANK
Definition: trgm_regexp.c:286
float float4
Definition: c.h:491
void * palloc0(Size size)
Definition: mcxt.c:980
static int colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1891
int colorTrgmsCount
Definition: trgm_regexp.c:419
#define MAX_TRGM_COUNT
Definition: trgm_regexp.c:220
int tentFlags
Definition: trgm_regexp.c:339
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
Definition: regguts.h:298
static int list_length(const List *l)
Definition: pg_list.h:169
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1389
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1379
static const float4 penalties[8]
Definition: trgm_regexp.c:228
static int colorTrgmInfoCmp(const void *p1, const void *p2)
Definition: trgm_regexp.c:1878
struct TrgmState * parent
Definition: trgm_regexp.c:338
void * palloc(Size size)
Definition: mcxt.c:949
int i
ColorTrgm ctrgm
Definition: trgm_regexp.c:374
#define qsort(a, b, c, d)
Definition: port.h:491
int TrgmColor
Definition: trgm_regexp.c:282

◆ transformGraph()

static void transformGraph ( TrgmNFA trgmNFA)
static

Definition at line 902 of file trgm_regexp.c.

References TrgmNFA::arcsCount, COLOR_UNKNOWN, TrgmPrefix::colors, CurrentMemoryContext, HASHCTL::entrysize, TrgmState::flags, getState(), HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, hash_get_num_entries(), HASHCTL::hcxt, TrgmNFA::initState, HASHCTL::keysize, TrgmNFA::keysQueue, linitial, list_delete_first(), MAX_EXPANDED_ARCS, MAX_EXPANDED_STATES, MemSet, NIL, TrgmStateKey::nstate, TrgmNFA::nstates, TrgmNFA::overflowed, pg_reg_getinitialstate(), TrgmStateKey::prefix, processState(), TrgmNFA::queue, TrgmNFA::regex, TrgmNFA::states, TSTATE_FIN, and TSTATE_INIT.

Referenced by createTrgmNFAInternal().

903 {
904  HASHCTL hashCtl;
905  TrgmStateKey initkey;
906  TrgmState *initstate;
907 
908  /* Initialize this stage's workspace in trgmNFA struct */
909  trgmNFA->queue = NIL;
910  trgmNFA->keysQueue = NIL;
911  trgmNFA->arcsCount = 0;
912  trgmNFA->overflowed = false;
913 
914  /* Create hashtable for states */
915  hashCtl.keysize = sizeof(TrgmStateKey);
916  hashCtl.entrysize = sizeof(TrgmState);
917  hashCtl.hcxt = CurrentMemoryContext;
918  trgmNFA->states = hash_create("Trigram NFA",
919  1024,
920  &hashCtl,
922  trgmNFA->nstates = 0;
923 
924  /* Create initial state: ambiguous prefix, NFA's initial state */
925  MemSet(&initkey, 0, sizeof(initkey));
926  initkey.prefix.colors[0] = COLOR_UNKNOWN;
927  initkey.prefix.colors[1] = COLOR_UNKNOWN;
928  initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
929 
930  initstate = getState(trgmNFA, &initkey);
931  initstate->flags |= TSTATE_INIT;
932  trgmNFA->initState = initstate;
933 
934  /*
935  * Recursively build the expanded graph by processing queue of states
936  * (breadth-first search). getState already put initstate in the queue.
937  */
938  while (trgmNFA->queue != NIL)
939  {
940  TrgmState *state = (TrgmState *) linitial(trgmNFA->queue);
941 
942  trgmNFA->queue = list_delete_first(trgmNFA->queue);
943 
944  /*
945  * If we overflowed then just mark state as final. Otherwise do
946  * actual processing.
947  */
948  if (trgmNFA->overflowed)
949  state->flags |= TSTATE_FIN;
950  else
951  processState(trgmNFA, state);
952 
953  /* Did we overflow? */
954  if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
956  trgmNFA->overflowed = true;
957  }
958 }
#define MAX_EXPANDED_ARCS
Definition: trgm_regexp.c:219
#define NIL
Definition: pg_list.h:65
struct TrgmState TrgmState
#define HASH_CONTEXT
Definition: hsearch.h:93
#define HASH_ELEM
Definition: hsearch.h:87
MemoryContext hcxt
Definition: hsearch.h:78
regex_t * regex
Definition: trgm_regexp.c:402
HTAB * states
Definition: trgm_regexp.c:407
Size entrysize
Definition: hsearch.h:73
#define MemSet(start, val, len)
Definition: c.h:962
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1335
TrgmState * initState
Definition: trgm_regexp.c:408
#define TSTATE_FIN
Definition: trgm_regexp.c:329
#define TSTATE_INIT
Definition: trgm_regexp.c:328
bool overflowed
Definition: trgm_regexp.c:415
int nstates
Definition: trgm_regexp.c:409
#define linitial(l)
Definition: pg_list.h:195
TrgmColor colors[2]
Definition: trgm_regexp.c:290
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
int arcsCount
Definition: trgm_regexp.c:414
List * queue
Definition: trgm_regexp.c:412
#define HASH_BLOBS
Definition: hsearch.h:88
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:316
Size keysize
Definition: hsearch.h:72
Definition: regguts.h:298
static TrgmState * getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
Definition: trgm_regexp.c:1369
TrgmPrefix prefix
Definition: trgm_regexp.c:309
#define MAX_EXPANDED_STATES
Definition: trgm_regexp.c:218
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:285
List * keysQueue
Definition: trgm_regexp.c:413
static void processState(TrgmNFA *trgmNFA, TrgmState *state)
Definition: trgm_regexp.c:964
List * list_delete_first(List *list)
Definition: list.c:861

◆ trigramsMatchGraph()

bool trigramsMatchGraph ( TrgmPackedGraph graph,
bool check 
)

Definition at line 636 of file trgm_regexp.c.

References TrgmPackedState::arcs, TrgmPackedState::arcsCount, TrgmPackedArc::colorTrgm, TrgmPackedGraph::colorTrigramGroups, TrgmPackedGraph::colorTrigramsActive, TrgmPackedGraph::colorTrigramsCount, i, TrgmPackedGraph::states, TrgmPackedGraph::statesActive, TrgmPackedGraph::statesCount, TrgmPackedGraph::statesQueue, and TrgmPackedArc::targetState.

Referenced by gin_trgm_consistent(), gin_trgm_triconsistent(), and gtrgm_consistent().

637 {
638  int i,
639  j,
640  k,
641  queueIn,
642  queueOut;
643 
644  /*
645  * Reset temporary working areas.
646  */
647  memset(graph->colorTrigramsActive, 0,
648  sizeof(bool) * graph->colorTrigramsCount);
649  memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
650 
651  /*
652  * Check which color trigrams were matched. A match for any simple
653  * trigram associated with a color trigram counts as a match of the color
654  * trigram.
655  */
656  j = 0;
657  for (i = 0; i < graph->colorTrigramsCount; i++)
658  {
659  int cnt = graph->colorTrigramGroups[i];
660 
661  for (k = j; k < j + cnt; k++)
662  {
663  if (check[k])
664  {
665  /*
666  * Found one matched trigram in the group. Can skip the rest
667  * of them and go to the next group.
668  */
669  graph->colorTrigramsActive[i] = true;
670  break;
671  }
672  }
673  j = j + cnt;
674  }
675 
676  /*
677  * Initialize the statesQueue to hold just the initial state. Note:
678  * statesQueue has room for statesCount entries, which is certainly enough
679  * since no state will be put in the queue more than once. The
680  * statesActive array marks which states have been queued.
681  */
682  graph->statesActive[0] = true;
683  graph->statesQueue[0] = 0;
684  queueIn = 0;
685  queueOut = 1;
686 
687  /* Process queued states as long as there are any. */
688  while (queueIn < queueOut)
689  {
690  int stateno = graph->statesQueue[queueIn++];
691  TrgmPackedState *state = &graph->states[stateno];
692  int cnt = state->arcsCount;
693 
694  /* Loop over state's out-arcs */
695  for (i = 0; i < cnt; i++)
696  {
697  TrgmPackedArc *arc = &state->arcs[i];
698 
699  /*
700  * If corresponding color trigram is present then activate the
701  * corresponding state. We're done if that's the final state,
702  * otherwise queue the state if it's not been queued already.
703  */
704  if (graph->colorTrigramsActive[arc->colorTrgm])
705  {
706  int nextstate = arc->targetState;
707 
708  if (nextstate == 1)
709  return true; /* success: final state is reachable */
710 
711  if (!graph->statesActive[nextstate])
712  {
713  graph->statesActive[nextstate] = true;
714  graph->statesQueue[queueOut++] = nextstate;
715  }
716  }
717  }
718  }
719 
720  /* Queue is empty, so match fails. */
721  return false;
722 }
bool * colorTrigramsActive
Definition: trgm_regexp.c:461
Definition: regguts.h:276
TrgmPackedState * states
Definition: trgm_regexp.c:458
TrgmPackedArc * arcs
Definition: trgm_regexp.c:435
bool * statesActive
Definition: trgm_regexp.c:462
Definition: regguts.h:298
int i
int * colorTrigramGroups
Definition: trgm_regexp.c:451

◆ validArcLabel()

static bool validArcLabel ( TrgmStateKey key,
TrgmColor  co 
)
static

Definition at line 1314 of file trgm_regexp.c.

References Assert, COLOR_BLANK, COLOR_UNKNOWN, TrgmPrefix::colors, and TrgmStateKey::prefix.

Referenced by addArc(), and addKey().

1315 {
1316  /*
1317  * We have to know full trigram in order to add outgoing arc. So we can't
1318  * do it if prefix is ambiguous.
1319  */
1320  if (key->prefix.colors[0] == COLOR_UNKNOWN)
1321  return false;
1322 
1323  /* If key->prefix.colors[0] isn't unknown, its second color isn't either */
1324  Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
1325  /* And we should not be called with an unknown arc color anytime */
1326  Assert(co != COLOR_UNKNOWN);
1327 
1328  /*
1329  * We don't bother with making arcs representing three non-word
1330  * characters, since that's useless for trigram extraction.
1331  */
1332  if (key->prefix.colors[0] == COLOR_BLANK &&
1333  key->prefix.colors[1] == COLOR_BLANK &&
1334  co == COLOR_BLANK)
1335  return false;
1336 
1337  /*
1338  * We also reject nonblank-blank-anything. The nonblank-blank-nonblank
1339  * case doesn't correspond to any trigram the trigram extraction code
1340  * would make. The nonblank-blank-blank case is also not possible with
1341  * RPADDING = 1. (Note that in many cases we'd fail to generate such a
1342  * trigram even if it were valid, for example processing "foo bar" will
1343  * not result in considering the trigram "o ". So if you want to support
1344  * RPADDING = 2, there's more to do than just twiddle this test.)
1345  */
1346  if (key->prefix.colors[0] != COLOR_BLANK &&
1347  key->prefix.colors[1] == COLOR_BLANK)
1348  return false;
1349 
1350  /*
1351  * Other combinations involving blank are valid, in particular we assume
1352  * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
1353  *
1354  * Note: Using again the example "foo bar", we will not consider the
1355  * trigram " b", though this trigram would be found by the trigram
1356  * extraction code. Since we will find " ba", it doesn't seem worth
1357  * trying to hack the algorithm to generate the additional trigram.
1358  */
1359 
1360  /* arc label is valid */
1361  return true;
1362 }
TrgmColor colors[2]
Definition: trgm_regexp.c:290
#define COLOR_BLANK
Definition: trgm_regexp.c:286
#define Assert(condition)
Definition: c.h:739
TrgmPrefix prefix
Definition: trgm_regexp.c:309
#define COLOR_UNKNOWN
Definition: trgm_regexp.c:285

Variable Documentation

◆ penalties

const float4 penalties[8]
static
Initial value:
= {
1.0f,
3.5f,
0.0f,
0.0f,
4.2f,
2.1f,
25.0f,
0.0f
}

Definition at line 228 of file trgm_regexp.c.

Referenced by selectColorTrigrams().