PostgreSQL Source Code  git master
regis.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * regis.c
4  * Fast regex subset
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/tsearch/regis.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "tsearch/dicts/regis.h"
18 #include "tsearch/ts_locale.h"
19 
20 #define RS_IN_ONEOF 1
21 #define RS_IN_ONEOF_IN 2
22 #define RS_IN_NONEOF 3
23 #define RS_IN_WAIT 4
24 
25 
26 /*
27  * Test whether a regex is of the subset supported here.
28  * Keep this in sync with RS_compile!
29  */
30 bool
31 RS_isRegis(const char *str)
32 {
33  int state = RS_IN_WAIT;
34  const char *c = str;
35 
36  while (*c)
37  {
38  if (state == RS_IN_WAIT)
39  {
40  if (t_isalpha(c))
41  /* okay */ ;
42  else if (t_iseq(c, '['))
44  else
45  return false;
46  }
47  else if (state == RS_IN_ONEOF)
48  {
49  if (t_iseq(c, '^'))
51  else if (t_isalpha(c))
53  else
54  return false;
55  }
56  else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
57  {
58  if (t_isalpha(c))
59  /* okay */ ;
60  else if (t_iseq(c, ']'))
61  state = RS_IN_WAIT;
62  else
63  return false;
64  }
65  else
66  elog(ERROR, "internal error in RS_isRegis: state %d", state);
67  c += pg_mblen(c);
68  }
69 
70  return (state == RS_IN_WAIT);
71 }
72 
73 static RegisNode *
75 {
76  RegisNode *ptr;
77 
78  ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
79  if (prev)
80  prev->next = ptr;
81  return ptr;
82 }
83 
84 void
85 RS_compile(Regis *r, bool issuffix, const char *str)
86 {
87  int len = strlen(str);
88  int state = RS_IN_WAIT;
89  const char *c = str;
90  RegisNode *ptr = NULL;
91 
92  memset(r, 0, sizeof(Regis));
93  r->issuffix = (issuffix) ? 1 : 0;
94 
95  while (*c)
96  {
97  if (state == RS_IN_WAIT)
98  {
99  if (t_isalpha(c))
100  {
101  if (ptr)
102  ptr = newRegisNode(ptr, len);
103  else
104  ptr = r->node = newRegisNode(NULL, len);
105  COPYCHAR(ptr->data, c);
106  ptr->type = RSF_ONEOF;
107  ptr->len = pg_mblen(c);
108  }
109  else if (t_iseq(c, '['))
110  {
111  if (ptr)
112  ptr = newRegisNode(ptr, len);
113  else
114  ptr = r->node = newRegisNode(NULL, len);
115  ptr->type = RSF_ONEOF;
116  state = RS_IN_ONEOF;
117  }
118  else /* shouldn't get here */
119  elog(ERROR, "invalid regis pattern: \"%s\"", str);
120  }
121  else if (state == RS_IN_ONEOF)
122  {
123  if (t_iseq(c, '^'))
124  {
125  ptr->type = RSF_NONEOF;
127  }
128  else if (t_isalpha(c))
129  {
130  COPYCHAR(ptr->data, c);
131  ptr->len = pg_mblen(c);
133  }
134  else /* shouldn't get here */
135  elog(ERROR, "invalid regis pattern: \"%s\"", str);
136  }
137  else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
138  {
139  if (t_isalpha(c))
140  {
141  COPYCHAR(ptr->data + ptr->len, c);
142  ptr->len += pg_mblen(c);
143  }
144  else if (t_iseq(c, ']'))
145  state = RS_IN_WAIT;
146  else /* shouldn't get here */
147  elog(ERROR, "invalid regis pattern: \"%s\"", str);
148  }
149  else
150  elog(ERROR, "internal error in RS_compile: state %d", state);
151  c += pg_mblen(c);
152  }
153 
154  if (state != RS_IN_WAIT) /* shouldn't get here */
155  elog(ERROR, "invalid regis pattern: \"%s\"", str);
156 
157  ptr = r->node;
158  while (ptr)
159  {
160  r->nchar++;
161  ptr = ptr->next;
162  }
163 }
164 
165 void
167 {
168  RegisNode *ptr = r->node,
169  *tmp;
170 
171  while (ptr)
172  {
173  tmp = ptr->next;
174  pfree(ptr);
175  ptr = tmp;
176  }
177 
178  r->node = NULL;
179 }
180 
181 static bool
182 mb_strchr(char *str, char *c)
183 {
184  int clen,
185  plen,
186  i;
187  char *ptr = str;
188  bool res = false;
189 
190  clen = pg_mblen(c);
191  while (*ptr && !res)
192  {
193  plen = pg_mblen(ptr);
194  if (plen == clen)
195  {
196  i = plen;
197  res = true;
198  while (i--)
199  if (*(ptr + i) != *(c + i))
200  {
201  res = false;
202  break;
203  }
204  }
205 
206  ptr += plen;
207  }
208 
209  return res;
210 }
211 
212 bool
213 RS_execute(Regis *r, char *str)
214 {
215  RegisNode *ptr = r->node;
216  char *c = str;
217  int len = 0;
218 
219  while (*c)
220  {
221  len++;
222  c += pg_mblen(c);
223  }
224 
225  if (len < r->nchar)
226  return 0;
227 
228  c = str;
229  if (r->issuffix)
230  {
231  len -= r->nchar;
232  while (len-- > 0)
233  c += pg_mblen(c);
234  }
235 
236 
237  while (ptr)
238  {
239  switch (ptr->type)
240  {
241  case RSF_ONEOF:
242  if (!mb_strchr((char *) ptr->data, c))
243  return false;
244  break;
245  case RSF_NONEOF:
246  if (mb_strchr((char *) ptr->data, c))
247  return false;
248  break;
249  default:
250  elog(ERROR, "unrecognized regis node type: %d", ptr->type);
251  }
252  ptr = ptr->next;
253  c += pg_mblen(c);
254  }
255 
256  return true;
257 }
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
const char * str
int i
Definition: isn.c:73
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
const void size_t len
char * c
#define RS_IN_WAIT
Definition: regis.c:23
void RS_compile(Regis *r, bool issuffix, const char *str)
Definition: regis.c:85
bool RS_execute(Regis *r, char *str)
Definition: regis.c:213
bool RS_isRegis(const char *str)
Definition: regis.c:31
static bool mb_strchr(char *str, char *c)
Definition: regis.c:182
#define RS_IN_ONEOF_IN
Definition: regis.c:21
void RS_free(Regis *r)
Definition: regis.c:166
#define RS_IN_ONEOF
Definition: regis.c:20
#define RS_IN_NONEOF
Definition: regis.c:22
static RegisNode * newRegisNode(RegisNode *prev, int len)
Definition: regis.c:74
#define RNHDRSZ
Definition: regis.h:27
#define RSF_ONEOF
Definition: regis.h:29
#define RSF_NONEOF
Definition: regis.h:30
unsigned char data[FLEXIBLE_ARRAY_MEMBER]
Definition: regis.h:24
uint32 len
Definition: regis.h:21
struct RegisNode * next
Definition: regis.h:23
uint32 type
Definition: regis.h:20
Definition: regis.h:33
uint32 issuffix
Definition: regis.h:36
RegisNode * node
Definition: regis.h:34
uint32 nchar
Definition: regis.h:37
Definition: regguts.h:323
int t_isalpha(const char *ptr)
Definition: ts_locale.c:65
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define COPYCHAR(d, s)
Definition: ts_locale.h:40