PostgreSQL Source Code  git master
regex.h
Go to the documentation of this file.
1 #ifndef _PG_REGEX_H_
2 #define _PG_REGEX_H_ /* never again */
3 /*
4  * regular expressions
5  *
6  * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7  *
8  * Development of this software was funded, in part, by Cray Research Inc.,
9  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10  * Corporation, none of whom are responsible for the results. The author
11  * thanks all of them.
12  *
13  * Redistribution and use in source and binary forms -- with or without
14  * modification -- are permitted for any purpose, provided that
15  * redistributions in source form retain this entire copyright notice and
16  * indicate the origin and nature of any modifications.
17  *
18  * I'd appreciate being given credit for this package in the documentation
19  * of software which uses it, but that is not a requirement.
20  *
21  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  * src/include/regex/regex.h
33  */
34 
35 /*
36  * This is an implementation of POSIX regex_t, so it clashes with the
37  * system-provided <regex.h> header. That header might be unintentionally
38  * included already, so we force that to happen now on all systems to show that
39  * we can cope and that we completely replace the system regex interfaces.
40  *
41  * Note that we avoided using _REGEX_H_ as an include guard, as that confuses
42  * matters on BSD family systems including macOS that use the same include
43  * guard.
44  */
45 #ifndef _WIN32
46 #include <regex.h>
47 #endif
48 
49 /* Avoid redefinition errors due to the system header. */
50 #undef REG_UBACKREF
51 #undef REG_ULOOKAROUND
52 #undef REG_UBOUNDS
53 #undef REG_UBRACES
54 #undef REG_UBSALNUM
55 #undef REG_UPBOTCH
56 #undef REG_UBBS
57 #undef REG_UNONPOSIX
58 #undef REG_UUNSPEC
59 #undef REG_UUNPORT
60 #undef REG_ULOCALE
61 #undef REG_UEMPTYMATCH
62 #undef REG_UIMPOSSIBLE
63 #undef REG_USHORTEST
64 #undef REG_BASIC
65 #undef REG_EXTENDED
66 #undef REG_ADVF
67 #undef REG_ADVANCED
68 #undef REG_QUOTE
69 #undef REG_NOSPEC
70 #undef REG_ICASE
71 #undef REG_NOSUB
72 #undef REG_EXPANDED
73 #undef REG_NLSTOP
74 #undef REG_NLANCH
75 #undef REG_NEWLINE
76 #undef REG_PEND
77 #undef REG_EXPECT
78 #undef REG_BOSONLY
79 #undef REG_DUMP
80 #undef REG_FAKE
81 #undef REG_PROGRESS
82 #undef REG_NOTBOL
83 #undef REG_NOTEOL
84 #undef REG_STARTEND
85 #undef REG_FTRACE
86 #undef REG_MTRACE
87 #undef REG_SMALL
88 #undef REG_OKAY
89 #undef REG_NOMATCH
90 #undef REG_BADPAT
91 #undef REG_ECOLLATE
92 #undef REG_ECTYPE
93 #undef REG_EESCAPE
94 #undef REG_ESUBREG
95 #undef REG_EBRACK
96 #undef REG_EPAREN
97 #undef REG_EBRACE
98 #undef REG_BADBR
99 #undef REG_ERANGE
100 #undef REG_ESPACE
101 #undef REG_BADRPT
102 #undef REG_ASSERT
103 #undef REG_INVARG
104 #undef REG_MIXED
105 #undef REG_BADOPT
106 #undef REG_ETOOBIG
107 #undef REG_ECOLORS
108 #undef REG_ATOI
109 #undef REG_ITOA
110 #undef REG_PREFIX
111 #undef REG_EXACT
112 
113 /*
114  * Add your own defines, if needed, here.
115  */
116 #include "mb/pg_wchar.h"
117 
118 /*
119  * interface types etc.
120  */
121 
122 /*
123  * regoff_t has to be large enough to hold either off_t or ssize_t,
124  * and must be signed; it's only a guess that long is suitable.
125  */
126 typedef long pg_regoff_t;
127 
128 /*
129  * other interface types
130  */
131 
132 /* the biggie, a compiled RE (or rather, a front end to same) */
133 typedef struct
134 {
135  int re_magic; /* magic number */
136  size_t re_nsub; /* number of subexpressions */
137  long re_info; /* bitmask of the following flags: */
138 #define REG_UBACKREF 000001 /* has back-reference (\n) */
139 #define REG_ULOOKAROUND 000002 /* has lookahead/lookbehind constraint */
140 #define REG_UBOUNDS 000004 /* has bounded quantifier ({m,n}) */
141 #define REG_UBRACES 000010 /* has { that doesn't begin a quantifier */
142 #define REG_UBSALNUM 000020 /* has backslash-alphanumeric in non-ARE */
143 #define REG_UPBOTCH 000040 /* has unmatched right paren in ERE (legal
144  * per spec, but that was a mistake) */
145 #define REG_UBBS 000100 /* has backslash within bracket expr */
146 #define REG_UNONPOSIX 000200 /* has any construct that extends POSIX */
147 #define REG_UUNSPEC 000400 /* has any case disallowed by POSIX, e.g.
148  * an empty branch */
149 #define REG_UUNPORT 001000 /* has numeric character code dependency */
150 #define REG_ULOCALE 002000 /* has locale dependency */
151 #define REG_UEMPTYMATCH 004000 /* can match a zero-length string */
152 #define REG_UIMPOSSIBLE 010000 /* provably cannot match anything */
153 #define REG_USHORTEST 020000 /* has non-greedy quantifier */
154  int re_csize; /* sizeof(character) */
155  char *re_endp; /* backward compatibility kludge */
156  Oid re_collation; /* Collation that defines LC_CTYPE behavior */
157  /* the rest is opaque pointers to hidden innards */
158  char *re_guts; /* `char *' is more portable than `void *' */
159  char *re_fns;
160 } pg_regex_t;
161 
162 /* result reporting (may acquire more fields later) */
163 typedef struct
164 {
165  pg_regoff_t rm_so; /* start of substring */
166  pg_regoff_t rm_eo; /* end of substring */
167 } pg_regmatch_t;
168 
169 /* supplementary control and reporting */
170 typedef struct
171 {
172  pg_regmatch_t rm_extend; /* see REG_EXPECT */
173 } rm_detail_t;
174 
175 
176 
177 /*
178  * regex compilation flags
179  */
180 #define REG_BASIC 000000 /* BREs (convenience) */
181 #define REG_EXTENDED 000001 /* EREs */
182 #define REG_ADVF 000002 /* advanced features in EREs */
183 #define REG_ADVANCED 000003 /* AREs (which are also EREs) */
184 #define REG_QUOTE 000004 /* no special characters, none */
185 #define REG_NOSPEC REG_QUOTE /* historical synonym */
186 #define REG_ICASE 000010 /* ignore case */
187 #define REG_NOSUB 000020 /* caller doesn't need subexpr match data */
188 #define REG_EXPANDED 000040 /* expanded format, white space & comments */
189 #define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
190 #define REG_NLANCH 000200 /* ^ matches after \n, $ before */
191 #define REG_NEWLINE 000300 /* newlines are line terminators */
192 #define REG_PEND 000400 /* ugh -- backward-compatibility hack */
193 #define REG_EXPECT 001000 /* report details on partial/limited matches */
194 #define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
195 #define REG_DUMP 004000 /* none of your business :-) */
196 #define REG_FAKE 010000 /* none of your business :-) */
197 #define REG_PROGRESS 020000 /* none of your business :-) */
198 
199 
200 
201 /*
202  * regex execution flags
203  */
204 #define REG_NOTBOL 0001 /* BOS is not BOL */
205 #define REG_NOTEOL 0002 /* EOS is not EOL */
206 #define REG_STARTEND 0004 /* backward compatibility kludge */
207 #define REG_FTRACE 0010 /* none of your business */
208 #define REG_MTRACE 0020 /* none of your business */
209 #define REG_SMALL 0040 /* none of your business */
210 
211 
212 /*
213  * error reporting
214  * Be careful if modifying the list of error codes -- the table used by
215  * regerror() is generated automatically from this file!
216  */
217 #define REG_OKAY 0 /* no errors detected */
218 #define REG_NOMATCH 1 /* failed to match */
219 #define REG_BADPAT 2 /* invalid regexp */
220 #define REG_ECOLLATE 3 /* invalid collating element */
221 #define REG_ECTYPE 4 /* invalid character class */
222 #define REG_EESCAPE 5 /* invalid escape \ sequence */
223 #define REG_ESUBREG 6 /* invalid backreference number */
224 #define REG_EBRACK 7 /* brackets [] not balanced */
225 #define REG_EPAREN 8 /* parentheses () not balanced */
226 #define REG_EBRACE 9 /* braces {} not balanced */
227 #define REG_BADBR 10 /* invalid repetition count(s) */
228 #define REG_ERANGE 11 /* invalid character range */
229 #define REG_ESPACE 12 /* out of memory */
230 #define REG_BADRPT 13 /* quantifier operand invalid */
231 #define REG_ASSERT 15 /* "can't happen" -- you found a bug */
232 #define REG_INVARG 16 /* invalid argument to regex function */
233 #define REG_MIXED 17 /* character widths of regex and string differ */
234 #define REG_BADOPT 18 /* invalid embedded option */
235 #define REG_ETOOBIG 19 /* regular expression is too complex */
236 #define REG_ECOLORS 20 /* too many colors */
237 /* two specials for debugging and testing */
238 #define REG_ATOI 101 /* convert error-code name to number */
239 #define REG_ITOA 102 /* convert error-code number to name */
240 /* non-error result codes for pg_regprefix */
241 #define REG_PREFIX (-1) /* identified a common prefix */
242 #define REG_EXACT (-2) /* identified an exact match */
243 
244 
245 /* Redirect the standard typenames to our typenames. */
246 #define regoff_t pg_regoff_t
247 #define regex_t pg_regex_t
248 #define regmatch_t pg_regmatch_t
249 
250 
251 /*
252  * the prototypes for exported functions
253  */
254 
255 /* regcomp.c */
256 extern int pg_regcomp(regex_t *re, const pg_wchar *string, size_t len,
257  int flags, Oid collation);
258 extern int pg_regexec(regex_t *re, const pg_wchar *string, size_t len,
259  size_t search_start, rm_detail_t *details,
260  size_t nmatch, regmatch_t pmatch[], int flags);
261 extern int pg_regprefix(regex_t *re, pg_wchar **string, size_t *slength);
262 extern void pg_regfree(regex_t *re);
263 extern size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf,
264  size_t errbuf_size);
265 
266 /* regexp.c */
267 extern regex_t *RE_compile_and_cache(text *text_re, int cflags, Oid collation);
268 extern bool RE_compile_and_execute(text *text_re, char *dat, int dat_len,
269  int cflags, Oid collation,
270  int nmatch, regmatch_t *pmatch);
271 
272 #endif /* _PG_REGEX_H_ */
int errcode(int sqlerrcode)
Definition: elog.c:853
unsigned int pg_wchar
Definition: mbprint.c:31
const void size_t len
unsigned int Oid
Definition: postgres_ext.h:31
bool RE_compile_and_execute(text *text_re, char *dat, int dat_len, int cflags, Oid collation, int nmatch, regmatch_t *pmatch)
Definition: regexp.c:358
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
int pg_regprefix(regex_t *re, pg_wchar **string, size_t *slength)
Definition: regprefix.c:46
int pg_regcomp(regex_t *re, const pg_wchar *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:370
#define regmatch_t
Definition: regex.h:246
regex_t * RE_compile_and_cache(text *text_re, int cflags, Oid collation)
Definition: regexp.c:141
long pg_regoff_t
Definition: regex.h:126
#define regex_t
Definition: regex.h:245
int pg_regexec(regex_t *re, const pg_wchar *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:185
void pg_regfree(regex_t *re)
Definition: regfree.c:49
int re_magic
Definition: regex.h:135
size_t re_nsub
Definition: regex.h:136
long re_info
Definition: regex.h:137
Definition: c.h:690