PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
regis.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * regis.c
4  * Fast regex subset
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/regis.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "tsearch/dicts/regis.h"
18 #include "tsearch/ts_locale.h"
19 
20 #define RS_IN_ONEOF 1
21 #define RS_IN_ONEOF_IN 2
22 #define RS_IN_NONEOF 3
23 #define RS_IN_WAIT 4
24 
25 
26 /*
27  * Test whether a regex is of the subset supported here.
28  * Keep this in sync with RS_compile!
29  */
30 bool
31 RS_isRegis(const char *str)
32 {
33  int state = RS_IN_WAIT;
34  const char *c = str;
35 
36  while (*c)
37  {
38  if (state == RS_IN_WAIT)
39  {
40  if (t_isalpha(c))
41  /* okay */ ;
42  else if (t_iseq(c, '['))
43  state = RS_IN_ONEOF;
44  else
45  return false;
46  }
47  else if (state == RS_IN_ONEOF)
48  {
49  if (t_iseq(c, '^'))
50  state = RS_IN_NONEOF;
51  else if (t_isalpha(c))
52  state = RS_IN_ONEOF_IN;
53  else
54  return false;
55  }
56  else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
57  {
58  if (t_isalpha(c))
59  /* okay */ ;
60  else if (t_iseq(c, ']'))
61  state = RS_IN_WAIT;
62  else
63  return false;
64  }
65  else
66  elog(ERROR, "internal error in RS_isRegis: state %d", state);
67  c += pg_mblen(c);
68  }
69 
70  return (state == RS_IN_WAIT);
71 }
72 
73 static RegisNode *
74 newRegisNode(RegisNode *prev, int len)
75 {
76  RegisNode *ptr;
77 
78  ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
79  if (prev)
80  prev->next = ptr;
81  return ptr;
82 }
83 
84 void
85 RS_compile(Regis *r, bool issuffix, const char *str)
86 {
87  int len = strlen(str);
88  int state = RS_IN_WAIT;
89  const char *c = str;
90  RegisNode *ptr = NULL;
91 
92  memset(r, 0, sizeof(Regis));
93  r->issuffix = (issuffix) ? 1 : 0;
94 
95  while (*c)
96  {
97  if (state == RS_IN_WAIT)
98  {
99  if (t_isalpha(c))
100  {
101  if (ptr)
102  ptr = newRegisNode(ptr, len);
103  else
104  ptr = r->node = newRegisNode(NULL, len);
105  COPYCHAR(ptr->data, c);
106  ptr->type = RSF_ONEOF;
107  ptr->len = pg_mblen(c);
108  }
109  else if (t_iseq(c, '['))
110  {
111  if (ptr)
112  ptr = newRegisNode(ptr, len);
113  else
114  ptr = r->node = newRegisNode(NULL, len);
115  ptr->type = RSF_ONEOF;
116  state = RS_IN_ONEOF;
117  }
118  else /* shouldn't get here */
119  elog(ERROR, "invalid regis pattern: \"%s\"", str);
120  }
121  else if (state == RS_IN_ONEOF)
122  {
123  if (t_iseq(c, '^'))
124  {
125  ptr->type = RSF_NONEOF;
126  state = RS_IN_NONEOF;
127  }
128  else if (t_isalpha(c))
129  {
130  COPYCHAR(ptr->data, c);
131  ptr->len = pg_mblen(c);
132  state = RS_IN_ONEOF_IN;
133  }
134  else /* shouldn't get here */
135  elog(ERROR, "invalid regis pattern: \"%s\"", str);
136  }
137  else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
138  {
139  if (t_isalpha(c))
140  {
141  COPYCHAR(ptr->data + ptr->len, c);
142  ptr->len += pg_mblen(c);
143  }
144  else if (t_iseq(c, ']'))
145  state = RS_IN_WAIT;
146  else /* shouldn't get here */
147  elog(ERROR, "invalid regis pattern: \"%s\"", str);
148  }
149  else
150  elog(ERROR, "internal error in RS_compile: state %d", state);
151  c += pg_mblen(c);
152  }
153 
154  if (state != RS_IN_WAIT) /* shouldn't get here */
155  elog(ERROR, "invalid regis pattern: \"%s\"", str);
156 
157  ptr = r->node;
158  while (ptr)
159  {
160  r->nchar++;
161  ptr = ptr->next;
162  }
163 }
164 
165 void
167 {
168  RegisNode *ptr = r->node,
169  *tmp;
170 
171  while (ptr)
172  {
173  tmp = ptr->next;
174  pfree(ptr);
175  ptr = tmp;
176  }
177 
178  r->node = NULL;
179 }
180 
181 #ifdef USE_WIDE_UPPER_LOWER
182 static bool
183 mb_strchr(char *str, char *c)
184 {
185  int clen,
186  plen,
187  i;
188  char *ptr = str;
189  bool res = false;
190 
191  clen = pg_mblen(c);
192  while (*ptr && !res)
193  {
194  plen = pg_mblen(ptr);
195  if (plen == clen)
196  {
197  i = plen;
198  res = true;
199  while (i--)
200  if (*(ptr + i) != *(c + i))
201  {
202  res = false;
203  break;
204  }
205  }
206 
207  ptr += plen;
208  }
209 
210  return res;
211 }
212 #else
213 #define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
214 #endif
215 
216 
217 bool
218 RS_execute(Regis *r, char *str)
219 {
220  RegisNode *ptr = r->node;
221  char *c = str;
222  int len = 0;
223 
224  while (*c)
225  {
226  len++;
227  c += pg_mblen(c);
228  }
229 
230  if (len < r->nchar)
231  return 0;
232 
233  c = str;
234  if (r->issuffix)
235  {
236  len -= r->nchar;
237  while (len-- > 0)
238  c += pg_mblen(c);
239  }
240 
241 
242  while (ptr)
243  {
244  switch (ptr->type)
245  {
246  case RSF_ONEOF:
247  if (!mb_strchr((char *) ptr->data, c))
248  return false;
249  break;
250  case RSF_NONEOF:
251  if (mb_strchr((char *) ptr->data, c))
252  return false;
253  break;
254  default:
255  elog(ERROR, "unrecognized regis node type: %d", ptr->type);
256  }
257  ptr = ptr->next;
258  c += pg_mblen(c);
259  }
260 
261  return true;
262 }
#define COPYCHAR(d, s)
Definition: ts_locale.h:63
Definition: regis.h:32
uint32 nchar
Definition: regis.h:36
RegisNode * node
Definition: regis.h:34
static RegisNode * newRegisNode(RegisNode *prev, int len)
Definition: regis.c:74
#define RSF_ONEOF
Definition: regis.h:29
#define RS_IN_NONEOF
Definition: regis.c:22
uint32 issuffix
Definition: regis.h:36
#define RNHDRSZ
Definition: regis.h:27
uint32 len
Definition: regis.h:20
#define RSF_NONEOF
Definition: regis.h:30
void pfree(void *pointer)
Definition: mcxt.c:950
#define RS_IN_WAIT
Definition: regis.c:23
#define ERROR
Definition: elog.h:43
char * c
bool RS_execute(Regis *r, char *str)
Definition: regis.c:218
#define t_iseq(x, c)
Definition: ts_locale.h:61
unsigned char data[FLEXIBLE_ARRAY_MEMBER]
Definition: regis.h:24
#define RS_IN_ONEOF_IN
Definition: regis.c:21
void * palloc0(Size size)
Definition: mcxt.c:878
void RS_free(Regis *r)
Definition: regis.c:166
void RS_compile(Regis *r, bool issuffix, const char *str)
Definition: regis.c:85
#define NULL
Definition: c.h:229
Definition: regguts.h:298
uint32 type
Definition: regis.h:20
#define t_isalpha(x)
Definition: ts_locale.h:59
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
#define mb_strchr(s, c)
Definition: regis.c:213
bool RS_isRegis(const char *str)
Definition: regis.c:31
int i
#define elog
Definition: elog.h:219
#define RS_IN_ONEOF
Definition: regis.c:20
struct RegisNode * next
Definition: regis.h:23