PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
regprefix.c File Reference
#include "regex/regguts.h"
Include dependency graph for regprefix.c:

Go to the source code of this file.

Functions

static int findprefix (struct cnfa *cnfa, struct colormap *cm, chr *string, size_t *slength)
 
int pg_regprefix (regex_t *re, chr **string, size_t *slength)
 

Function Documentation

◆ findprefix()

static int findprefix ( struct cnfa cnfa,
struct colormap cm,
chr string,
size_t *  slength 
)
static

Definition at line 116 of file regprefix.c.

120{
121 int st;
122 int nextst;
123 color thiscolor;
124 chr c;
125 struct carc *ca;
126
127 /*
128 * The "pre" state must have only BOS/BOL outarcs, else pattern isn't
129 * anchored left. If we have both BOS and BOL, they must go to the same
130 * next state.
131 */
132 st = cnfa->pre;
133 nextst = -1;
134 for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
135 {
136 if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
137 {
138 if (nextst == -1)
139 nextst = ca->to;
140 else if (nextst != ca->to)
141 return REG_NOMATCH;
142 }
143 else
144 return REG_NOMATCH;
145 }
146 if (nextst == -1)
147 return REG_NOMATCH;
148
149 /*
150 * Scan through successive states, stopping as soon as we find one with
151 * more than one acceptable transition character (either multiple colors
152 * on out-arcs, or a color with more than one member chr).
153 *
154 * We could find a state with multiple out-arcs that are all labeled with
155 * the same singleton color; this comes from patterns like "^ab(cde|cxy)".
156 * In that case we add the chr "c" to the output string but then exit the
157 * loop with nextst == -1. This leaves a little bit on the table: if the
158 * pattern is like "^ab(cde|cdy)", we won't notice that "d" could be added
159 * to the prefix. But chasing multiple parallel state chains doesn't seem
160 * worth the trouble.
161 */
162 do
163 {
164 st = nextst;
165 nextst = -1;
166 thiscolor = COLORLESS;
167 for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
168 {
169 /* We can ignore BOS/BOL arcs */
170 if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
171 continue;
172
173 /*
174 * ... but EOS/EOL arcs terminate the search, as do RAINBOW arcs
175 * and LACONs
176 */
177 if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1] ||
178 ca->co == RAINBOW || ca->co >= cnfa->ncolors)
179 {
180 thiscolor = COLORLESS;
181 break;
182 }
183 if (thiscolor == COLORLESS)
184 {
185 /* First plain outarc */
186 thiscolor = ca->co;
187 nextst = ca->to;
188 }
189 else if (thiscolor == ca->co)
190 {
191 /* Another plain outarc for same color */
192 nextst = -1;
193 }
194 else
195 {
196 /* More than one plain outarc color terminates the search */
197 thiscolor = COLORLESS;
198 break;
199 }
200 }
201 /* Done if we didn't find exactly one color on plain outarcs */
202 if (thiscolor == COLORLESS)
203 break;
204 /* The color must be a singleton */
205 if (cm->cd[thiscolor].nschrs != 1)
206 break;
207 /* Must not have any high-color-map entries */
208 if (cm->cd[thiscolor].nuchrs != 0)
209 break;
210
211 /*
212 * Identify the color's sole member chr and add it to the prefix
213 * string. In general the colormap data structure doesn't provide a
214 * way to find color member chrs, except by trying GETCOLOR() on each
215 * possible chr value, which won't do at all. However, for the cases
216 * we care about it should be sufficient to test the "firstchr" value,
217 * that is the first chr ever added to the color. There are cases
218 * where this might no longer be a member of the color (so we do need
219 * to test), but none of them are likely to arise for a character that
220 * is a member of a common prefix. If we do hit such a corner case,
221 * we just fall out without adding anything to the prefix string.
222 */
223 c = cm->cd[thiscolor].firstchr;
224 if (GETCOLOR(cm, c) != thiscolor)
225 break;
226
227 string[(*slength)++] = c;
228
229 /* Advance to next state, but only if we have a unique next state */
230 } while (nextst != -1);
231
232 /*
233 * If we ended at a state that only has EOS/EOL outarcs leading to the
234 * "post" state, then we have an exact-match string. Note this is true
235 * even if the string is of zero length.
236 */
237 nextst = -1;
238 for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
239 {
240 if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1])
241 {
242 if (nextst == -1)
243 nextst = ca->to;
244 else if (nextst != ca->to)
245 {
246 nextst = -1;
247 break;
248 }
249 }
250 else
251 {
252 nextst = -1;
253 break;
254 }
255 }
256 if (nextst == cnfa->post)
257 return REG_EXACT;
258
259 /*
260 * Otherwise, if we were unable to identify any prefix characters, say
261 * NOMATCH --- the pattern is anchored left, but doesn't specify any
262 * particular first character.
263 */
264 if (*slength > 0)
265 return REG_PREFIX;
266
267 return REG_NOMATCH;
268}
char * c
pg_wchar chr
Definition: regcustom.h:59
#define REG_NOMATCH
Definition: regex.h:216
#define REG_EXACT
Definition: regex.h:240
#define REG_PREFIX
Definition: regex.h:239
#define RAINBOW
Definition: regguts.h:159
#define GETCOLOR(cm, c)
Definition: regguts.h:257
short color
Definition: regguts.h:155
#define COLORLESS
Definition: regguts.h:158
Definition: regguts.h:401
int to
Definition: regguts.h:403
color co
Definition: regguts.h:402
Definition: regguts.h:407
int pre
Definition: regguts.h:415
color eos[2]
Definition: regguts.h:418
struct carc ** states
Definition: regguts.h:421
int ncolors
Definition: regguts.h:409
int post
Definition: regguts.h:416
color bos[2]
Definition: regguts.h:417
chr firstchr
Definition: regguts.h:183
int nuchrs
Definition: regguts.h:179
int nschrs
Definition: regguts.h:178
struct colordesc * cd
Definition: regguts.h:236

References cnfa::bos, colormap::cd, carc::co, COLORLESS, cnfa::eos, colordesc::firstchr, GETCOLOR, cnfa::ncolors, colordesc::nschrs, colordesc::nuchrs, cnfa::post, cnfa::pre, RAINBOW, REG_EXACT, REG_NOMATCH, REG_PREFIX, cnfa::states, and carc::to.

Referenced by pg_regprefix().

◆ pg_regprefix()

int pg_regprefix ( regex_t re,
chr **  string,
size_t *  slength 
)

Definition at line 46 of file regprefix.c.

49{
50 struct guts *g;
51 struct cnfa *cnfa;
52 int st;
53
54 /* sanity checks */
55 if (string == NULL || slength == NULL)
56 return REG_INVARG;
57 *string = NULL; /* initialize for failure cases */
58 *slength = 0;
59 if (re == NULL || re->re_magic != REMAGIC)
60 return REG_INVARG;
61 if (re->re_csize != sizeof(chr))
62 return REG_MIXED;
63
64 /* Initialize locale-dependent support */
65 pg_set_regex_collation(re->re_collation);
66
67 /* setup */
68 g = (struct guts *) re->re_guts;
70 return REG_NOMATCH;
71
72 /*
73 * This implementation considers only the search NFA for the topmost regex
74 * tree node. Therefore, constraints such as backrefs are not fully
75 * applied, which is allowed per the function's API spec.
76 */
77 assert(g->tree != NULL);
78 cnfa = &g->tree->cnfa;
79
80 /* matchall NFAs never have a fixed prefix */
81 if (cnfa->flags & MATCHALL)
82 return REG_NOMATCH;
83
84 /*
85 * Since a correct NFA should never contain any exit-free loops, it should
86 * not be possible for our traversal to return to a previously visited NFA
87 * state. Hence we need at most nstates chrs in the output string.
88 */
89 *string = (chr *) MALLOC(cnfa->nstates * sizeof(chr));
90 if (*string == NULL)
91 return REG_ESPACE;
92
93 /* do it */
94 st = findprefix(cnfa, &g->cmap, *string, slength);
95
96 assert(*slength <= cnfa->nstates);
97
98 /* clean up */
99 if (st != REG_PREFIX && st != REG_EXACT)
100 {
101 FREE(*string);
102 *string = NULL;
103 *slength = 0;
104 }
105
106 return st;
107}
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void pg_set_regex_collation(Oid collation)
#define MALLOC(n)
Definition: regcustom.h:52
#define assert(x)
Definition: regcustom.h:56
#define REG_UIMPOSSIBLE
Definition: regex.h:150
#define REG_INVARG
Definition: regex.h:230
#define REG_MIXED
Definition: regex.h:231
#define REG_ESPACE
Definition: regex.h:227
#define REMAGIC
Definition: regguts.h:101
#define MATCHALL
Definition: regguts.h:412
static int findprefix(struct cnfa *cnfa, struct colormap *cm, chr *string, size_t *slength)
Definition: regprefix.c:116
int flags
Definition: regguts.h:410
int nstates
Definition: regguts.h:408
Definition: regguts.h:531
struct subre * tree
Definition: regguts.h:537
long info
Definition: regguts.h:535
struct colormap cmap
Definition: regguts.h:540
struct cnfa cnfa
Definition: regguts.h:507
@ FREE
Definition: task.c:94

References assert, guts::cmap, subre::cnfa, findprefix(), cnfa::flags, FREE, if(), guts::info, MALLOC, MATCHALL, cnfa::nstates, pg_set_regex_collation(), REG_ESPACE, REG_EXACT, REG_INVARG, REG_MIXED, REG_NOMATCH, REG_PREFIX, REG_UIMPOSSIBLE, REMAGIC, and guts::tree.

Referenced by regexp_fixed_prefix().