PostgreSQL Source Code git master
regexport.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * regexport.c
4 * Functions for exporting info about a regex's NFA
5 *
6 * In this implementation, the NFA defines a necessary but not sufficient
7 * condition for a string to match the regex: that is, there can be strings
8 * that match the NFA but don't match the full regex, but not vice versa.
9 * Thus, for example, it is okay for the functions below to treat lookaround
10 * constraints as no-ops, since they merely constrain the string some more.
11 *
12 * Notice that these functions return info into caller-provided arrays
13 * rather than doing their own malloc's. This simplifies the APIs by
14 * eliminating a class of error conditions, and in the case of colors
15 * allows the caller to decide how big is too big to bother with.
16 *
17 *
18 * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
19 * Portions Copyright (c) 1998, 1999 Henry Spencer
20 *
21 * IDENTIFICATION
22 * src/backend/regex/regexport.c
23 *
24 *-------------------------------------------------------------------------
25 */
26
27#include "regex/regguts.h"
28
29#include "regex/regexport.h"
30
31
32/*
33 * Get total number of NFA states.
34 */
35int
37{
38 struct cnfa *cnfa;
39
40 assert(regex != NULL && regex->re_magic == REMAGIC);
41 cnfa = &((struct guts *) regex->re_guts)->search;
42
43 return cnfa->nstates;
44}
45
46/*
47 * Get initial state of NFA.
48 */
49int
51{
52 struct cnfa *cnfa;
53
54 assert(regex != NULL && regex->re_magic == REMAGIC);
55 cnfa = &((struct guts *) regex->re_guts)->search;
56
57 return cnfa->pre;
58}
59
60/*
61 * Get final state of NFA.
62 */
63int
65{
66 struct cnfa *cnfa;
67
68 assert(regex != NULL && regex->re_magic == REMAGIC);
69 cnfa = &((struct guts *) regex->re_guts)->search;
70
71 return cnfa->post;
72}
73
74/*
75 * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
76 * arcs from the caller, treating any LACON as being automatically satisfied.
77 * Since the output representation does not support arcs that consume no
78 * character when traversed, we have to recursively traverse LACON arcs here,
79 * and report whatever normal arcs are reachable by traversing LACON arcs.
80 * Note that this wouldn't work if it were possible to reach the final state
81 * via LACON traversal, but the regex library never builds NFAs that have
82 * LACON arcs leading directly to the final state. (This is because the
83 * regex executor is designed to consume one character beyond the nominal
84 * match end --- possibly an EOS indicator --- so there is always a set of
85 * ordinary arcs leading to the final state.)
86 *
87 * traverse_lacons is a recursive subroutine used by both exported functions
88 * to count and then emit the reachable regular arcs. *arcs_count is
89 * incremented by the number of reachable arcs, and as many as will fit in
90 * arcs_len (possibly 0) are emitted into arcs[].
91 */
92static void
93traverse_lacons(struct cnfa *cnfa, int st,
94 int *arcs_count,
95 regex_arc_t *arcs, int arcs_len)
96{
97 struct carc *ca;
98
99 /*
100 * Since this function recurses, it could theoretically be driven to stack
101 * overflow. In practice, this is mostly useful to backstop against a
102 * failure of the regex compiler to remove a loop of LACON arcs.
103 */
105
106 for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
107 {
108 if (ca->co < cnfa->ncolors)
109 {
110 /* Ordinary arc, so count and possibly emit it */
111 int ndx = (*arcs_count)++;
112
113 if (ndx < arcs_len)
114 {
115 arcs[ndx].co = ca->co;
116 arcs[ndx].to = ca->to;
117 }
118 }
119 else
120 {
121 /* LACON arc --- assume it's satisfied and recurse... */
122 /* ... but first, assert it doesn't lead directly to post state */
123 Assert(ca->to != cnfa->post);
124
125 traverse_lacons(cnfa, ca->to, arcs_count, arcs, arcs_len);
126 }
127 }
128}
129
130/*
131 * Get number of outgoing NFA arcs of state number "st".
132 */
133int
134pg_reg_getnumoutarcs(const regex_t *regex, int st)
135{
136 struct cnfa *cnfa;
137 int arcs_count;
138
139 assert(regex != NULL && regex->re_magic == REMAGIC);
140 cnfa = &((struct guts *) regex->re_guts)->search;
141
142 if (st < 0 || st >= cnfa->nstates)
143 return 0;
144 arcs_count = 0;
145 traverse_lacons(cnfa, st, &arcs_count, NULL, 0);
146 return arcs_count;
147}
148
149/*
150 * Write array of outgoing NFA arcs of state number "st" into arcs[],
151 * whose length arcs_len must be at least as long as indicated by
152 * pg_reg_getnumoutarcs(), else not all arcs will be returned.
153 */
154void
155pg_reg_getoutarcs(const regex_t *regex, int st,
156 regex_arc_t *arcs, int arcs_len)
157{
158 struct cnfa *cnfa;
159 int arcs_count;
160
161 assert(regex != NULL && regex->re_magic == REMAGIC);
162 cnfa = &((struct guts *) regex->re_guts)->search;
163
164 if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
165 return;
166 arcs_count = 0;
167 traverse_lacons(cnfa, st, &arcs_count, arcs, arcs_len);
168}
169
170/*
171 * Get total number of colors.
172 */
173int
175{
176 struct colormap *cm;
177
178 assert(regex != NULL && regex->re_magic == REMAGIC);
179 cm = &((struct guts *) regex->re_guts)->cmap;
180
181 return cm->max + 1;
182}
183
184/*
185 * Check if color is beginning of line/string.
186 *
187 * (We might at some point need to offer more refined handling of pseudocolors,
188 * but this will do for now.)
189 */
190int
191pg_reg_colorisbegin(const regex_t *regex, int co)
192{
193 struct cnfa *cnfa;
194
195 assert(regex != NULL && regex->re_magic == REMAGIC);
196 cnfa = &((struct guts *) regex->re_guts)->search;
197
198 if (co == cnfa->bos[0] || co == cnfa->bos[1])
199 return true;
200 else
201 return false;
202}
203
204/*
205 * Check if color is end of line/string.
206 */
207int
208pg_reg_colorisend(const regex_t *regex, int co)
209{
210 struct cnfa *cnfa;
211
212 assert(regex != NULL && regex->re_magic == REMAGIC);
213 cnfa = &((struct guts *) regex->re_guts)->search;
214
215 if (co == cnfa->eos[0] || co == cnfa->eos[1])
216 return true;
217 else
218 return false;
219}
220
221/*
222 * Get number of member chrs of color number "co".
223 *
224 * Note: we return -1 if the color number is invalid, or if it is a special
225 * color (WHITE, RAINBOW, or a pseudocolor), or if the number of members is
226 * uncertain.
227 * Callers should not try to extract the members if -1 is returned.
228 */
229int
230pg_reg_getnumcharacters(const regex_t *regex, int co)
231{
232 struct colormap *cm;
233
234 assert(regex != NULL && regex->re_magic == REMAGIC);
235 cm = &((struct guts *) regex->re_guts)->cmap;
236
237 if (co <= 0 || co > cm->max) /* <= 0 rejects WHITE and RAINBOW */
238 return -1;
239 if (cm->cd[co].flags & PSEUDO) /* also pseudocolors (BOS etc) */
240 return -1;
241
242 /*
243 * If the color appears anywhere in the high colormap, treat its number of
244 * members as uncertain. In principle we could determine all the specific
245 * chrs corresponding to each such entry, but it would be expensive
246 * (particularly if character class tests are required) and it doesn't
247 * seem worth it.
248 */
249 if (cm->cd[co].nuchrs != 0)
250 return -1;
251
252 /* OK, return the known number of member chrs */
253 return cm->cd[co].nschrs;
254}
255
256/*
257 * Write array of member chrs of color number "co" into chars[],
258 * whose length chars_len must be at least as long as indicated by
259 * pg_reg_getnumcharacters(), else not all chars will be returned.
260 *
261 * Fetching the members of WHITE, RAINBOW, or a pseudocolor is not supported.
262 *
263 * Caution: this is a relatively expensive operation.
264 */
265void
266pg_reg_getcharacters(const regex_t *regex, int co,
267 pg_wchar *chars, int chars_len)
268{
269 struct colormap *cm;
270 chr c;
271
272 assert(regex != NULL && regex->re_magic == REMAGIC);
273 cm = &((struct guts *) regex->re_guts)->cmap;
274
275 if (co <= 0 || co > cm->max || chars_len <= 0)
276 return;
277 if (cm->cd[co].flags & PSEUDO)
278 return;
279
280 /*
281 * We need only examine the low character map; there should not be any
282 * matching entries in the high map.
283 */
284 for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
285 {
286 if (cm->locolormap[c - CHR_MIN] == co)
287 {
288 *chars++ = c;
289 if (--chars_len == 0)
290 break;
291 }
292 }
293}
#define Assert(condition)
Definition: c.h:815
unsigned int pg_wchar
Definition: mbprint.c:31
char * c
#define MAX_SIMPLE_CHR
Definition: regcustom.h:87
pg_wchar chr
Definition: regcustom.h:59
#define CHR_MIN
Definition: regcustom.h:65
#define assert(x)
Definition: regcustom.h:56
#define regex_t
Definition: regex.h:245
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:134
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:174
int pg_reg_getnumstates(const regex_t *regex)
Definition: regexport.c:36
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:155
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:208
static void traverse_lacons(struct cnfa *cnfa, int st, int *arcs_count, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:93
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:266
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:230
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:191
#define PSEUDO
Definition: regguts.h:186
#define REMAGIC
Definition: regguts.h:101
#define COLORLESS
Definition: regguts.h:158
void check_stack_depth(void)
Definition: stack_depth.c:95
Definition: regguts.h:401
int to
Definition: regguts.h:403
color co
Definition: regguts.h:402
Definition: regguts.h:407
int pre
Definition: regguts.h:415
color eos[2]
Definition: regguts.h:418
struct carc ** states
Definition: regguts.h:421
int ncolors
Definition: regguts.h:409
int post
Definition: regguts.h:416
color bos[2]
Definition: regguts.h:417
int nstates
Definition: regguts.h:408
int nuchrs
Definition: regguts.h:179
int nschrs
Definition: regguts.h:178
int flags
Definition: regguts.h:184
color * locolormap
Definition: regguts.h:240
size_t max
Definition: regguts.h:234
struct colordesc * cd
Definition: regguts.h:236
Definition: regguts.h:531
static char chars[TZ_MAX_CHARS]
Definition: zic.c:401