PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
regexport.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * regexport.c
4  * Functions for exporting info about a regex's NFA
5  *
6  * In this implementation, the NFA defines a necessary but not sufficient
7  * condition for a string to match the regex: that is, there can be strings
8  * that match the NFA but don't match the full regex, but not vice versa.
9  * Thus, for example, it is okay for the functions below to treat lookaround
10  * constraints as no-ops, since they merely constrain the string some more.
11  *
12  * Notice that these functions return info into caller-provided arrays
13  * rather than doing their own malloc's. This simplifies the APIs by
14  * eliminating a class of error conditions, and in the case of colors
15  * allows the caller to decide how big is too big to bother with.
16  *
17  *
18  * Portions Copyright (c) 2013-2017, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1998, 1999 Henry Spencer
20  *
21  * IDENTIFICATION
22  * src/backend/regex/regexport.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "regex/regguts.h"
28 
29 #include "regex/regexport.h"
30 
31 
32 /*
33  * Get total number of NFA states.
34  */
35 int
37 {
38  struct cnfa *cnfa;
39 
40  assert(regex != NULL && regex->re_magic == REMAGIC);
41  cnfa = &((struct guts *) regex->re_guts)->search;
42 
43  return cnfa->nstates;
44 }
45 
46 /*
47  * Get initial state of NFA.
48  */
49 int
51 {
52  struct cnfa *cnfa;
53 
54  assert(regex != NULL && regex->re_magic == REMAGIC);
55  cnfa = &((struct guts *) regex->re_guts)->search;
56 
57  return cnfa->pre;
58 }
59 
60 /*
61  * Get final state of NFA.
62  */
63 int
65 {
66  struct cnfa *cnfa;
67 
68  assert(regex != NULL && regex->re_magic == REMAGIC);
69  cnfa = &((struct guts *) regex->re_guts)->search;
70 
71  return cnfa->post;
72 }
73 
74 /*
75  * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
76  * arcs from the caller, treating any LACON as being automatically satisfied.
77  * Since the output representation does not support arcs that consume no
78  * character when traversed, we have to recursively traverse LACON arcs here,
79  * and report whatever normal arcs are reachable by traversing LACON arcs.
80  * Note that this wouldn't work if it were possible to reach the final state
81  * via LACON traversal, but the regex library never builds NFAs that have
82  * LACON arcs leading directly to the final state. (This is because the
83  * regex executor is designed to consume one character beyond the nominal
84  * match end --- possibly an EOS indicator --- so there is always a set of
85  * ordinary arcs leading to the final state.)
86  *
87  * traverse_lacons is a recursive subroutine used by both exported functions
88  * to count and then emit the reachable regular arcs. *arcs_count is
89  * incremented by the number of reachable arcs, and as many as will fit in
90  * arcs_len (possibly 0) are emitted into arcs[].
91  */
92 static void
93 traverse_lacons(struct cnfa * cnfa, int st,
94  int *arcs_count,
95  regex_arc_t *arcs, int arcs_len)
96 {
97  struct carc *ca;
98 
99  /*
100  * Since this function recurses, it could theoretically be driven to stack
101  * overflow. In practice, this is mostly useful to backstop against a
102  * failure of the regex compiler to remove a loop of LACON arcs.
103  */
105 
106  for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
107  {
108  if (ca->co < cnfa->ncolors)
109  {
110  /* Ordinary arc, so count and possibly emit it */
111  int ndx = (*arcs_count)++;
112 
113  if (ndx < arcs_len)
114  {
115  arcs[ndx].co = ca->co;
116  arcs[ndx].to = ca->to;
117  }
118  }
119  else
120  {
121  /* LACON arc --- assume it's satisfied and recurse... */
122  /* ... but first, assert it doesn't lead directly to post state */
123  Assert(ca->to != cnfa->post);
124 
125  traverse_lacons(cnfa, ca->to, arcs_count, arcs, arcs_len);
126  }
127  }
128 }
129 
130 /*
131  * Get number of outgoing NFA arcs of state number "st".
132  */
133 int
134 pg_reg_getnumoutarcs(const regex_t *regex, int st)
135 {
136  struct cnfa *cnfa;
137  int arcs_count;
138 
139  assert(regex != NULL && regex->re_magic == REMAGIC);
140  cnfa = &((struct guts *) regex->re_guts)->search;
141 
142  if (st < 0 || st >= cnfa->nstates)
143  return 0;
144  arcs_count = 0;
145  traverse_lacons(cnfa, st, &arcs_count, NULL, 0);
146  return arcs_count;
147 }
148 
149 /*
150  * Write array of outgoing NFA arcs of state number "st" into arcs[],
151  * whose length arcs_len must be at least as long as indicated by
152  * pg_reg_getnumoutarcs(), else not all arcs will be returned.
153  */
154 void
155 pg_reg_getoutarcs(const regex_t *regex, int st,
156  regex_arc_t *arcs, int arcs_len)
157 {
158  struct cnfa *cnfa;
159  int arcs_count;
160 
161  assert(regex != NULL && regex->re_magic == REMAGIC);
162  cnfa = &((struct guts *) regex->re_guts)->search;
163 
164  if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
165  return;
166  arcs_count = 0;
167  traverse_lacons(cnfa, st, &arcs_count, arcs, arcs_len);
168 }
169 
170 /*
171  * Get total number of colors.
172  */
173 int
175 {
176  struct colormap *cm;
177 
178  assert(regex != NULL && regex->re_magic == REMAGIC);
179  cm = &((struct guts *) regex->re_guts)->cmap;
180 
181  return cm->max + 1;
182 }
183 
184 /*
185  * Check if color is beginning of line/string.
186  *
187  * (We might at some point need to offer more refined handling of pseudocolors,
188  * but this will do for now.)
189  */
190 int
191 pg_reg_colorisbegin(const regex_t *regex, int co)
192 {
193  struct cnfa *cnfa;
194 
195  assert(regex != NULL && regex->re_magic == REMAGIC);
196  cnfa = &((struct guts *) regex->re_guts)->search;
197 
198  if (co == cnfa->bos[0] || co == cnfa->bos[1])
199  return true;
200  else
201  return false;
202 }
203 
204 /*
205  * Check if color is end of line/string.
206  */
207 int
208 pg_reg_colorisend(const regex_t *regex, int co)
209 {
210  struct cnfa *cnfa;
211 
212  assert(regex != NULL && regex->re_magic == REMAGIC);
213  cnfa = &((struct guts *) regex->re_guts)->search;
214 
215  if (co == cnfa->eos[0] || co == cnfa->eos[1])
216  return true;
217  else
218  return false;
219 }
220 
221 /*
222  * Get number of member chrs of color number "co".
223  *
224  * Note: we return -1 if the color number is invalid, or if it is a special
225  * color (WHITE or a pseudocolor), or if the number of members is uncertain.
226  * Callers should not try to extract the members if -1 is returned.
227  */
228 int
229 pg_reg_getnumcharacters(const regex_t *regex, int co)
230 {
231  struct colormap *cm;
232 
233  assert(regex != NULL && regex->re_magic == REMAGIC);
234  cm = &((struct guts *) regex->re_guts)->cmap;
235 
236  if (co <= 0 || co > cm->max) /* we reject 0 which is WHITE */
237  return -1;
238  if (cm->cd[co].flags & PSEUDO) /* also pseudocolors (BOS etc) */
239  return -1;
240 
241  /*
242  * If the color appears anywhere in the high colormap, treat its number of
243  * members as uncertain. In principle we could determine all the specific
244  * chrs corresponding to each such entry, but it would be expensive
245  * (particularly if character class tests are required) and it doesn't
246  * seem worth it.
247  */
248  if (cm->cd[co].nuchrs != 0)
249  return -1;
250 
251  /* OK, return the known number of member chrs */
252  return cm->cd[co].nschrs;
253 }
254 
255 /*
256  * Write array of member chrs of color number "co" into chars[],
257  * whose length chars_len must be at least as long as indicated by
258  * pg_reg_getnumcharacters(), else not all chars will be returned.
259  *
260  * Fetching the members of WHITE or a pseudocolor is not supported.
261  *
262  * Caution: this is a relatively expensive operation.
263  */
264 void
265 pg_reg_getcharacters(const regex_t *regex, int co,
266  pg_wchar *chars, int chars_len)
267 {
268  struct colormap *cm;
269  chr c;
270 
271  assert(regex != NULL && regex->re_magic == REMAGIC);
272  cm = &((struct guts *) regex->re_guts)->cmap;
273 
274  if (co <= 0 || co > cm->max || chars_len <= 0)
275  return;
276  if (cm->cd[co].flags & PSEUDO)
277  return;
278 
279  /*
280  * We need only examine the low character map; there should not be any
281  * matching entries in the high map.
282  */
283  for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
284  {
285  if (cm->locolormap[c - CHR_MIN] == co)
286  {
287  *chars++ = c;
288  if (--chars_len == 0)
289  break;
290  }
291  }
292 }
int pg_reg_getnumstates(const regex_t *regex)
Definition: regexport.c:36
static void traverse_lacons(struct cnfa *cnfa, int st, int *arcs_count, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:93
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:229
Definition: regguts.h:354
int pre
Definition: regguts.h:360
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:265
color * locolormap
Definition: regguts.h:218
#define REMAGIC
Definition: regguts.h:96
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
struct carc ** states
Definition: regguts.h:366
struct colordesc * cd
Definition: regguts.h:214
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
int nstates
Definition: regguts.h:356
int re_magic
Definition: regex.h:57
pg_wchar chr
Definition: regcustom.h:68
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
char * c
void check_stack_depth(void)
Definition: postgres.c:3102
#define assert(TEST)
Definition: imath.c:37
#define CHR_MIN
Definition: regcustom.h:74
unsigned int pg_wchar
Definition: mbprint.c:31
color bos[2]
Definition: regguts.h:362
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
Definition: regguts.h:462
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
int to
Definition: regguts.h:351
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
int nschrs
Definition: regguts.h:156
int flags
Definition: regguts.h:162
int post
Definition: regguts.h:361
char * re_guts
Definition: regex.h:78
#define PSEUDO
Definition: regguts.h:164
color co
Definition: regguts.h:350
#define COLORLESS
Definition: regguts.h:137
int nuchrs
Definition: regguts.h:157
static char chars[TZ_MAX_CHARS]
Definition: zic.c:381
Definition: regex.h:55
size_t max
Definition: regguts.h:212
color eos[2]
Definition: regguts.h:363
int ncolors
Definition: regguts.h:357
#define MAX_SIMPLE_CHR
Definition: regcustom.h:97
Definition: regguts.h:348