PostgreSQL Source Code
git master
Loading...
Searching...
No Matches
regprefix.c
Go to the documentation of this file.
1
/*-------------------------------------------------------------------------
2
*
3
* regprefix.c
4
* Extract a common prefix, if any, from a compiled regex.
5
*
6
*
7
* Portions Copyright (c) 2012-2026, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1998, 1999 Henry Spencer
9
*
10
* IDENTIFICATION
11
* src/backend/regex/regprefix.c
12
*
13
*-------------------------------------------------------------------------
14
*/
15
16
#include "
regex/regguts.h
"
17
18
19
/*
20
* forward declarations
21
*/
22
static
int
findprefix
(
struct
cnfa
*
cnfa
,
struct
colormap
*cm,
23
chr
*
string
,
size_t
*
slength
);
24
25
26
/*
27
* pg_regprefix - get common prefix for regular expression
28
*
29
* Returns one of:
30
* REG_NOMATCH: there is no common prefix of strings matching the regex
31
* REG_PREFIX: there is a common prefix of strings matching the regex
32
* REG_EXACT: all strings satisfying the regex must match the same string
33
* or a REG_XXX error code
34
*
35
* In the non-failure cases, *string is set to a palloc'd string containing
36
* the common prefix or exact value, of length *slength (measured in chrs
37
* not bytes!).
38
*
39
* This function does not analyze all complex cases (such as lookaround
40
* constraints) exactly. Therefore it is possible that some strings matching
41
* the reported prefix or exact-match string do not satisfy the regex. But
42
* it should never be the case that a string satisfying the regex does not
43
* match the reported prefix or exact-match string.
44
*/
45
int
46
pg_regprefix
(
regex_t
*re,
47
chr
**
string
,
48
size_t
*
slength
)
49
{
50
struct
guts
*g;
51
struct
cnfa
*
cnfa
;
52
int
st;
53
54
/* sanity checks */
55
if
(
string
==
NULL
||
slength
==
NULL
)
56
return
REG_INVARG
;
57
*
string
=
NULL
;
/* initialize for failure cases */
58
*
slength
= 0;
59
if
(re ==
NULL
|| re->re_magic !=
REMAGIC
)
60
return
REG_INVARG
;
61
if
(re->re_csize !=
sizeof
(
chr
))
62
return
REG_MIXED
;
63
64
/* Initialize locale-dependent support */
65
pg_set_regex_collation
(re->re_collation);
66
67
/* setup */
68
g = (
struct
guts
*) re->re_guts;
69
if
(g->
info
&
REG_UIMPOSSIBLE
)
70
return
REG_NOMATCH
;
71
72
/*
73
* This implementation considers only the search NFA for the topmost regex
74
* tree node. Therefore, constraints such as backrefs are not fully
75
* applied, which is allowed per the function's API spec.
76
*/
77
assert
(g->
tree
!=
NULL
);
78
cnfa
= &g->
tree
->
cnfa
;
79
80
/* matchall NFAs never have a fixed prefix */
81
if
(
cnfa
->
flags
&
MATCHALL
)
82
return
REG_NOMATCH
;
83
84
/*
85
* Since a correct NFA should never contain any exit-free loops, it should
86
* not be possible for our traversal to return to a previously visited NFA
87
* state. Hence we need at most nstates chrs in the output string.
88
*/
89
*
string
= (
chr
*)
MALLOC
(
cnfa
->
nstates
*
sizeof
(
chr
));
90
if
(*
string
==
NULL
)
91
return
REG_ESPACE
;
92
93
/* do it */
94
st =
findprefix
(
cnfa
, &g->
cmap
, *
string
,
slength
);
95
96
assert
(*
slength <= cnfa->
nstates);
97
98
/* clean up */
99
if
(st !=
REG_PREFIX
&& st !=
REG_EXACT
)
100
{
101
FREE
(*
string
);
102
*
string
=
NULL
;
103
*
slength
= 0;
104
}
105
106
return
st;
107
}
108
109
/*
110
* findprefix - extract common prefix from cNFA
111
*
112
* Results are returned into the preallocated chr array string[], with
113
* *slength (which must be preset to zero) incremented for each chr.
114
*/
115
static
int
/* regprefix return code */
116
findprefix
(
struct
cnfa
*
cnfa
,
117
struct
colormap
*cm,
118
chr
*
string
,
119
size_t
*
slength
)
120
{
121
int
st;
122
int
nextst
;
123
color
thiscolor
;
124
chr
c
;
125
struct
carc
*
ca
;
126
127
/*
128
* The "pre" state must have only BOS/BOL outarcs, else pattern isn't
129
* anchored left. If we have both BOS and BOL, they must go to the same
130
* next state.
131
*/
132
st =
cnfa
->
pre
;
133
nextst
= -1;
134
for
(
ca
=
cnfa
->
states
[st];
ca
->co !=
COLORLESS
;
ca
++)
135
{
136
if
(
ca
->co ==
cnfa
->
bos
[0] ||
ca
->co ==
cnfa
->
bos
[1])
137
{
138
if
(
nextst
== -1)
139
nextst
=
ca
->to;
140
else
if
(
nextst
!=
ca
->to)
141
return
REG_NOMATCH
;
142
}
143
else
144
return
REG_NOMATCH
;
145
}
146
if
(
nextst
== -1)
147
return
REG_NOMATCH
;
148
149
/*
150
* Scan through successive states, stopping as soon as we find one with
151
* more than one acceptable transition character (either multiple colors
152
* on out-arcs, or a color with more than one member chr).
153
*
154
* We could find a state with multiple out-arcs that are all labeled with
155
* the same singleton color; this comes from patterns like "^ab(cde|cxy)".
156
* In that case we add the chr "c" to the output string but then exit the
157
* loop with nextst == -1. This leaves a little bit on the table: if the
158
* pattern is like "^ab(cde|cdy)", we won't notice that "d" could be added
159
* to the prefix. But chasing multiple parallel state chains doesn't seem
160
* worth the trouble.
161
*/
162
do
163
{
164
st =
nextst
;
165
nextst
= -1;
166
thiscolor
=
COLORLESS
;
167
for
(
ca
=
cnfa
->
states
[st];
ca
->co !=
COLORLESS
;
ca
++)
168
{
169
/* We can ignore BOS/BOL arcs */
170
if
(
ca
->co ==
cnfa
->
bos
[0] ||
ca
->co ==
cnfa
->
bos
[1])
171
continue
;
172
173
/*
174
* ... but EOS/EOL arcs terminate the search, as do RAINBOW arcs
175
* and LACONs
176
*/
177
if
(
ca
->co ==
cnfa
->
eos
[0] ||
ca
->co ==
cnfa
->
eos
[1] ||
178
ca
->co ==
RAINBOW
||
ca
->co >=
cnfa
->
ncolors
)
179
{
180
thiscolor
=
COLORLESS
;
181
break
;
182
}
183
if
(
thiscolor
==
COLORLESS
)
184
{
185
/* First plain outarc */
186
thiscolor
=
ca
->co;
187
nextst
=
ca
->to;
188
}
189
else
if
(
thiscolor
==
ca
->co)
190
{
191
/* Another plain outarc for same color */
192
nextst
= -1;
193
}
194
else
195
{
196
/* More than one plain outarc color terminates the search */
197
thiscolor
=
COLORLESS
;
198
break
;
199
}
200
}
201
/* Done if we didn't find exactly one color on plain outarcs */
202
if
(
thiscolor
==
COLORLESS
)
203
break
;
204
/* The color must be a singleton */
205
if
(cm->
cd
[
thiscolor
].
nschrs
!= 1)
206
break
;
207
/* Must not have any high-color-map entries */
208
if
(cm->
cd
[
thiscolor
].
nuchrs
!= 0)
209
break
;
210
211
/*
212
* Identify the color's sole member chr and add it to the prefix
213
* string. In general the colormap data structure doesn't provide a
214
* way to find color member chrs, except by trying GETCOLOR() on each
215
* possible chr value, which won't do at all. However, for the cases
216
* we care about it should be sufficient to test the "firstchr" value,
217
* that is the first chr ever added to the color. There are cases
218
* where this might no longer be a member of the color (so we do need
219
* to test), but none of them are likely to arise for a character that
220
* is a member of a common prefix. If we do hit such a corner case,
221
* we just fall out without adding anything to the prefix string.
222
*/
223
c
= cm->
cd
[
thiscolor
].
firstchr
;
224
if
(
GETCOLOR
(cm,
c
) !=
thiscolor
)
225
break
;
226
227
string
[(*slength)++] =
c
;
228
229
/* Advance to next state, but only if we have a unique next state */
230
}
while
(
nextst
!= -1);
231
232
/*
233
* If we ended at a state that only has EOS/EOL outarcs leading to the
234
* "post" state, then we have an exact-match string. Note this is true
235
* even if the string is of zero length.
236
*/
237
nextst
= -1;
238
for
(
ca
=
cnfa
->
states
[st];
ca
->co !=
COLORLESS
;
ca
++)
239
{
240
if
(
ca
->co ==
cnfa
->
eos
[0] ||
ca
->co ==
cnfa
->
eos
[1])
241
{
242
if
(
nextst
== -1)
243
nextst
=
ca
->to;
244
else
if
(
nextst
!=
ca
->to)
245
{
246
nextst
= -1;
247
break
;
248
}
249
}
250
else
251
{
252
nextst
= -1;
253
break
;
254
}
255
}
256
if
(
nextst
==
cnfa
->
post
)
257
return
REG_EXACT
;
258
259
/*
260
* Otherwise, if we were unable to identify any prefix characters, say
261
* NOMATCH --- the pattern is anchored left, but doesn't specify any
262
* particular first character.
263
*/
264
if
(*
slength
> 0)
265
return
REG_PREFIX
;
266
267
return
REG_NOMATCH
;
268
}
FREE
#define FREE(ptr)
Definition
cryptohash.c:37
c
char * c
Definition
preproc-cursor.c:31
fb
static int fb(int x)
Definition
preproc-init.c:92
pg_set_regex_collation
void pg_set_regex_collation(Oid collation)
Definition
regc_pg_locale.c:35
MALLOC
#define MALLOC(n)
Definition
regcustom.h:52
chr
pg_wchar chr
Definition
regcustom.h:60
assert
#define assert(x)
Definition
regcustom.h:57
REG_NOMATCH
#define REG_NOMATCH
Definition
regex.h:216
REG_EXACT
#define REG_EXACT
Definition
regex.h:240
REG_PREFIX
#define REG_PREFIX
Definition
regex.h:239
REG_UIMPOSSIBLE
#define REG_UIMPOSSIBLE
Definition
regex.h:150
REG_INVARG
#define REG_INVARG
Definition
regex.h:230
REG_MIXED
#define REG_MIXED
Definition
regex.h:231
regex_t
#define regex_t
Definition
regex.h:245
REG_ESPACE
#define REG_ESPACE
Definition
regex.h:227
regguts.h
RAINBOW
#define RAINBOW
Definition
regguts.h:159
REMAGIC
#define REMAGIC
Definition
regguts.h:101
GETCOLOR
#define GETCOLOR(cm, c)
Definition
regguts.h:257
color
short color
Definition
regguts.h:155
COLORLESS
#define COLORLESS
Definition
regguts.h:158
MATCHALL
#define MATCHALL
Definition
regguts.h:412
pg_regprefix
int pg_regprefix(regex_t *re, chr **string, size_t *slength)
Definition
regprefix.c:46
findprefix
static int findprefix(struct cnfa *cnfa, struct colormap *cm, chr *string, size_t *slength)
Definition
regprefix.c:116
carc
Definition
regguts.h:401
cnfa
Definition
regguts.h:407
cnfa::pre
int pre
Definition
regguts.h:415
cnfa::eos
color eos[2]
Definition
regguts.h:418
cnfa::states
struct carc ** states
Definition
regguts.h:421
cnfa::ncolors
int ncolors
Definition
regguts.h:409
cnfa::flags
int flags
Definition
regguts.h:410
cnfa::post
int post
Definition
regguts.h:416
cnfa::bos
color bos[2]
Definition
regguts.h:417
cnfa::nstates
int nstates
Definition
regguts.h:408
colordesc::firstchr
chr firstchr
Definition
regguts.h:183
colordesc::nuchrs
int nuchrs
Definition
regguts.h:179
colordesc::nschrs
int nschrs
Definition
regguts.h:178
colormap
Definition
regguts.h:229
colormap::cd
struct colordesc * cd
Definition
regguts.h:236
guts
Definition
regguts.h:531
guts::tree
struct subre * tree
Definition
regguts.h:537
guts::info
long info
Definition
regguts.h:535
guts::cmap
struct colormap cmap
Definition
regguts.h:540
subre::cnfa
struct cnfa cnfa
Definition
regguts.h:507
src
backend
regex
regprefix.c
Generated on Thu Jan 29 2026 06:13:13 for PostgreSQL Source Code by
1.9.8