PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
regexport.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * regexport.c
4  * Functions for exporting info about a regex's NFA
5  *
6  * In this implementation, the NFA defines a necessary but not sufficient
7  * condition for a string to match the regex: that is, there can be strings
8  * that match the NFA but don't match the full regex, but not vice versa.
9  * Thus, for example, it is okay for the functions below to ignore lookaround
10  * constraints, which merely constrain the string some more.
11  *
12  * Notice that these functions return info into caller-provided arrays
13  * rather than doing their own malloc's. This simplifies the APIs by
14  * eliminating a class of error conditions, and in the case of colors
15  * allows the caller to decide how big is too big to bother with.
16  *
17  *
18  * Portions Copyright (c) 2013-2017, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1998, 1999 Henry Spencer
20  *
21  * IDENTIFICATION
22  * src/backend/regex/regexport.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "regex/regguts.h"
28 
29 #include "regex/regexport.h"
30 
31 
32 /*
33  * Get total number of NFA states.
34  */
35 int
37 {
38  struct cnfa *cnfa;
39 
40  assert(regex != NULL && regex->re_magic == REMAGIC);
41  cnfa = &((struct guts *) regex->re_guts)->search;
42 
43  return cnfa->nstates;
44 }
45 
46 /*
47  * Get initial state of NFA.
48  */
49 int
51 {
52  struct cnfa *cnfa;
53 
54  assert(regex != NULL && regex->re_magic == REMAGIC);
55  cnfa = &((struct guts *) regex->re_guts)->search;
56 
57  return cnfa->pre;
58 }
59 
60 /*
61  * Get final state of NFA.
62  */
63 int
65 {
66  struct cnfa *cnfa;
67 
68  assert(regex != NULL && regex->re_magic == REMAGIC);
69  cnfa = &((struct guts *) regex->re_guts)->search;
70 
71  return cnfa->post;
72 }
73 
74 /*
75  * Get number of outgoing NFA arcs of state number "st".
76  *
77  * Note: LACON arcs are ignored, both here and in pg_reg_getoutarcs().
78  */
79 int
80 pg_reg_getnumoutarcs(const regex_t *regex, int st)
81 {
82  struct cnfa *cnfa;
83  struct carc *ca;
84  int count;
85 
86  assert(regex != NULL && regex->re_magic == REMAGIC);
87  cnfa = &((struct guts *) regex->re_guts)->search;
88 
89  if (st < 0 || st >= cnfa->nstates)
90  return 0;
91  count = 0;
92  for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
93  {
94  if (ca->co < cnfa->ncolors)
95  count++;
96  }
97  return count;
98 }
99 
100 /*
101  * Write array of outgoing NFA arcs of state number "st" into arcs[],
102  * whose length arcs_len must be at least as long as indicated by
103  * pg_reg_getnumoutarcs(), else not all arcs will be returned.
104  */
105 void
106 pg_reg_getoutarcs(const regex_t *regex, int st,
107  regex_arc_t *arcs, int arcs_len)
108 {
109  struct cnfa *cnfa;
110  struct carc *ca;
111 
112  assert(regex != NULL && regex->re_magic == REMAGIC);
113  cnfa = &((struct guts *) regex->re_guts)->search;
114 
115  if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
116  return;
117  for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
118  {
119  if (ca->co < cnfa->ncolors)
120  {
121  arcs->co = ca->co;
122  arcs->to = ca->to;
123  arcs++;
124  if (--arcs_len == 0)
125  break;
126  }
127  }
128 }
129 
130 /*
131  * Get total number of colors.
132  */
133 int
135 {
136  struct colormap *cm;
137 
138  assert(regex != NULL && regex->re_magic == REMAGIC);
139  cm = &((struct guts *) regex->re_guts)->cmap;
140 
141  return cm->max + 1;
142 }
143 
144 /*
145  * Check if color is beginning of line/string.
146  *
147  * (We might at some point need to offer more refined handling of pseudocolors,
148  * but this will do for now.)
149  */
150 int
151 pg_reg_colorisbegin(const regex_t *regex, int co)
152 {
153  struct cnfa *cnfa;
154 
155  assert(regex != NULL && regex->re_magic == REMAGIC);
156  cnfa = &((struct guts *) regex->re_guts)->search;
157 
158  if (co == cnfa->bos[0] || co == cnfa->bos[1])
159  return true;
160  else
161  return false;
162 }
163 
164 /*
165  * Check if color is end of line/string.
166  */
167 int
168 pg_reg_colorisend(const regex_t *regex, int co)
169 {
170  struct cnfa *cnfa;
171 
172  assert(regex != NULL && regex->re_magic == REMAGIC);
173  cnfa = &((struct guts *) regex->re_guts)->search;
174 
175  if (co == cnfa->eos[0] || co == cnfa->eos[1])
176  return true;
177  else
178  return false;
179 }
180 
181 /*
182  * Get number of member chrs of color number "co".
183  *
184  * Note: we return -1 if the color number is invalid, or if it is a special
185  * color (WHITE or a pseudocolor), or if the number of members is uncertain.
186  * Callers should not try to extract the members if -1 is returned.
187  */
188 int
189 pg_reg_getnumcharacters(const regex_t *regex, int co)
190 {
191  struct colormap *cm;
192 
193  assert(regex != NULL && regex->re_magic == REMAGIC);
194  cm = &((struct guts *) regex->re_guts)->cmap;
195 
196  if (co <= 0 || co > cm->max) /* we reject 0 which is WHITE */
197  return -1;
198  if (cm->cd[co].flags & PSEUDO) /* also pseudocolors (BOS etc) */
199  return -1;
200 
201  /*
202  * If the color appears anywhere in the high colormap, treat its number of
203  * members as uncertain. In principle we could determine all the specific
204  * chrs corresponding to each such entry, but it would be expensive
205  * (particularly if character class tests are required) and it doesn't
206  * seem worth it.
207  */
208  if (cm->cd[co].nuchrs != 0)
209  return -1;
210 
211  /* OK, return the known number of member chrs */
212  return cm->cd[co].nschrs;
213 }
214 
215 /*
216  * Write array of member chrs of color number "co" into chars[],
217  * whose length chars_len must be at least as long as indicated by
218  * pg_reg_getnumcharacters(), else not all chars will be returned.
219  *
220  * Fetching the members of WHITE or a pseudocolor is not supported.
221  *
222  * Caution: this is a relatively expensive operation.
223  */
224 void
225 pg_reg_getcharacters(const regex_t *regex, int co,
226  pg_wchar *chars, int chars_len)
227 {
228  struct colormap *cm;
229  chr c;
230 
231  assert(regex != NULL && regex->re_magic == REMAGIC);
232  cm = &((struct guts *) regex->re_guts)->cmap;
233 
234  if (co <= 0 || co > cm->max || chars_len <= 0)
235  return;
236  if (cm->cd[co].flags & PSEUDO)
237  return;
238 
239  /*
240  * We need only examine the low character map; there should not be any
241  * matching entries in the high map.
242  */
243  for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
244  {
245  if (cm->locolormap[c - CHR_MIN] == co)
246  {
247  *chars++ = c;
248  if (--chars_len == 0)
249  break;
250  }
251  }
252 }
int pg_reg_getnumstates(const regex_t *regex)
Definition: regexport.c:36
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
Definition: regexport.c:106
int pg_reg_getfinalstate(const regex_t *regex)
Definition: regexport.c:64
int pg_reg_getnumcharacters(const regex_t *regex, int co)
Definition: regexport.c:189
Definition: regguts.h:354
int pre
Definition: regguts.h:360
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
Definition: regexport.c:225
color * locolormap
Definition: regguts.h:218
#define REMAGIC
Definition: regguts.h:96
int pg_reg_colorisbegin(const regex_t *regex, int co)
Definition: regexport.c:151
struct carc ** states
Definition: regguts.h:366
struct colordesc * cd
Definition: regguts.h:214
int pg_reg_getnumcolors(const regex_t *regex)
Definition: regexport.c:134
int nstates
Definition: regguts.h:356
int re_magic
Definition: regex.h:57
pg_wchar chr
Definition: regcustom.h:59
int pg_reg_getinitialstate(const regex_t *regex)
Definition: regexport.c:50
char * c
#define assert(TEST)
Definition: imath.c:37
#define CHR_MIN
Definition: regcustom.h:65
unsigned int pg_wchar
Definition: mbprint.c:31
color bos[2]
Definition: regguts.h:362
int pg_reg_colorisend(const regex_t *regex, int co)
Definition: regexport.c:168
Definition: regguts.h:462
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
Definition: regexport.c:80
int to
Definition: regguts.h:351
#define NULL
Definition: c.h:226
int nschrs
Definition: regguts.h:156
int flags
Definition: regguts.h:162
int post
Definition: regguts.h:361
char * re_guts
Definition: regex.h:78
#define PSEUDO
Definition: regguts.h:164
color co
Definition: regguts.h:350
#define COLORLESS
Definition: regguts.h:137
int nuchrs
Definition: regguts.h:157
static char chars[TZ_MAX_CHARS]
Definition: zic.c:382
Definition: regex.h:55
size_t max
Definition: regguts.h:212
color eos[2]
Definition: regguts.h:363
int ncolors
Definition: regguts.h:357
#define MAX_SIMPLE_CHR
Definition: regcustom.h:88
Definition: regguts.h:348