PostgreSQL Source Code  git master
like.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  * like expression handling code.
5  *
6  * NOTES
7  * A big hack of the regexp.c code!! Contributed by
8  * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
27 #include "varatt.h"
28 
29 
30 #define LIKE_TRUE 1
31 #define LIKE_FALSE 0
32 #define LIKE_ABORT (-1)
33 
34 
35 static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
36  pg_locale_t locale, bool locale_is_c);
37 static text *SB_do_like_escape(text *pat, text *esc);
38 
39 static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
40  pg_locale_t locale, bool locale_is_c);
41 static text *MB_do_like_escape(text *pat, text *esc);
42 
43 static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
44  pg_locale_t locale, bool locale_is_c);
45 
46 static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
47  pg_locale_t locale, bool locale_is_c);
48 
49 static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 
52 /*--------------------
53  * Support routine for MatchText. Compares given multibyte streams
54  * as wide characters. If they match, returns 1 otherwise returns 0.
55  *--------------------
56  */
57 static inline int
58 wchareq(const char *p1, const char *p2)
59 {
60  int p1_len;
61 
62  /* Optimization: quickly compare the first byte. */
63  if (*p1 != *p2)
64  return 0;
65 
66  p1_len = pg_mblen(p1);
67  if (pg_mblen(p2) != p1_len)
68  return 0;
69 
70  /* They are the same length */
71  while (p1_len--)
72  {
73  if (*p1++ != *p2++)
74  return 0;
75  }
76  return 1;
77 }
78 
79 /*
80  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81  * comparison of multibyte characters. It did not work at all, however,
82  * because it relied on tolower() which has a single-byte API ... and
83  * towlower() wouldn't be much better since we have no suitably cheap way
84  * of getting a single character transformed to the system's wchar_t format.
85  * So now, we just downcase the strings using lower() and apply regular LIKE
86  * comparison. This should be revisited when we install better locale support.
87  */
88 
89 /*
90  * We do handle case-insensitive matching for single-byte encodings using
91  * fold-on-the-fly processing, however.
92  */
93 static char
94 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
95 {
96  if (locale_is_c)
97  return pg_ascii_tolower(c);
98 #ifdef HAVE_LOCALE_T
99  else if (locale)
100  return tolower_l(c, locale->info.lt);
101 #endif
102  else
103  return pg_tolower(c);
104 }
105 
106 
107 #define NextByte(p, plen) ((p)++, (plen)--)
108 
109 /* Set up to compile like_match.c for multibyte characters */
110 #define CHAREQ(p1, p2) wchareq((p1), (p2))
111 #define NextChar(p, plen) \
112  do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
113 #define CopyAdvChar(dst, src, srclen) \
114  do { int __l = pg_mblen(src); \
115  (srclen) -= __l; \
116  while (__l-- > 0) \
117  *(dst)++ = *(src)++; \
118  } while (0)
119 
120 #define MatchText MB_MatchText
121 #define do_like_escape MB_do_like_escape
122 
123 #include "like_match.c"
124 
125 /* Set up to compile like_match.c for single-byte characters */
126 #define CHAREQ(p1, p2) (*(p1) == *(p2))
127 #define NextChar(p, plen) NextByte((p), (plen))
128 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
129 
130 #define MatchText SB_MatchText
131 #define do_like_escape SB_do_like_escape
132 
133 #include "like_match.c"
134 
135 /* setup to compile like_match.c for single byte case insensitive matches */
136 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
137 #define NextChar(p, plen) NextByte((p), (plen))
138 #define MatchText SB_IMatchText
139 
140 #include "like_match.c"
141 
142 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
143 
144 #define NextChar(p, plen) \
145  do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
146 #define MatchText UTF8_MatchText
147 
148 #include "like_match.c"
149 
150 /* Generic for all cases not requiring inline case-folding */
151 static inline int
152 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
153 {
154  if (collation && !lc_ctype_is_c(collation))
155  {
157 
159  ereport(ERROR,
160  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
161  errmsg("nondeterministic collations are not supported for LIKE")));
162  }
163 
165  return SB_MatchText(s, slen, p, plen, 0, true);
166  else if (GetDatabaseEncoding() == PG_UTF8)
167  return UTF8_MatchText(s, slen, p, plen, 0, true);
168  else
169  return MB_MatchText(s, slen, p, plen, 0, true);
170 }
171 
172 static inline int
174 {
175  char *s,
176  *p;
177  int slen,
178  plen;
179  pg_locale_t locale = 0;
180  bool locale_is_c = false;
181 
182  if (!OidIsValid(collation))
183  {
184  /*
185  * This typically means that the parser could not resolve a conflict
186  * of implicit collations, so report it that way.
187  */
188  ereport(ERROR,
189  (errcode(ERRCODE_INDETERMINATE_COLLATION),
190  errmsg("could not determine which collation to use for ILIKE"),
191  errhint("Use the COLLATE clause to set the collation explicitly.")));
192  }
193 
194  if (lc_ctype_is_c(collation))
195  locale_is_c = true;
196  else
197  locale = pg_newlocale_from_collation(collation);
198 
200  ereport(ERROR,
201  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
202  errmsg("nondeterministic collations are not supported for ILIKE")));
203 
204  /*
205  * For efficiency reasons, in the single byte case we don't call lower()
206  * on the pattern and text, but instead call SB_lower_char on each
207  * character. In the multi-byte case we don't have much choice :-(. Also,
208  * ICU does not support single-character case folding, so we go the long
209  * way.
210  */
211 
212  if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
213  {
215  PointerGetDatum(pat)));
216  p = VARDATA_ANY(pat);
217  plen = VARSIZE_ANY_EXHDR(pat);
219  PointerGetDatum(str)));
220  s = VARDATA_ANY(str);
221  slen = VARSIZE_ANY_EXHDR(str);
222  if (GetDatabaseEncoding() == PG_UTF8)
223  return UTF8_MatchText(s, slen, p, plen, 0, true);
224  else
225  return MB_MatchText(s, slen, p, plen, 0, true);
226  }
227  else
228  {
229  p = VARDATA_ANY(pat);
230  plen = VARSIZE_ANY_EXHDR(pat);
231  s = VARDATA_ANY(str);
232  slen = VARSIZE_ANY_EXHDR(str);
233  return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
234  }
235 }
236 
237 /*
238  * interface routines called by the function manager
239  */
240 
241 Datum
243 {
244  Name str = PG_GETARG_NAME(0);
245  text *pat = PG_GETARG_TEXT_PP(1);
246  bool result;
247  char *s,
248  *p;
249  int slen,
250  plen;
251 
252  s = NameStr(*str);
253  slen = strlen(s);
254  p = VARDATA_ANY(pat);
255  plen = VARSIZE_ANY_EXHDR(pat);
256 
257  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
258 
259  PG_RETURN_BOOL(result);
260 }
261 
262 Datum
264 {
265  Name str = PG_GETARG_NAME(0);
266  text *pat = PG_GETARG_TEXT_PP(1);
267  bool result;
268  char *s,
269  *p;
270  int slen,
271  plen;
272 
273  s = NameStr(*str);
274  slen = strlen(s);
275  p = VARDATA_ANY(pat);
276  plen = VARSIZE_ANY_EXHDR(pat);
277 
278  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
279 
280  PG_RETURN_BOOL(result);
281 }
282 
283 Datum
285 {
286  text *str = PG_GETARG_TEXT_PP(0);
287  text *pat = PG_GETARG_TEXT_PP(1);
288  bool result;
289  char *s,
290  *p;
291  int slen,
292  plen;
293 
294  s = VARDATA_ANY(str);
295  slen = VARSIZE_ANY_EXHDR(str);
296  p = VARDATA_ANY(pat);
297  plen = VARSIZE_ANY_EXHDR(pat);
298 
299  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
300 
301  PG_RETURN_BOOL(result);
302 }
303 
304 Datum
306 {
307  text *str = PG_GETARG_TEXT_PP(0);
308  text *pat = PG_GETARG_TEXT_PP(1);
309  bool result;
310  char *s,
311  *p;
312  int slen,
313  plen;
314 
315  s = VARDATA_ANY(str);
316  slen = VARSIZE_ANY_EXHDR(str);
317  p = VARDATA_ANY(pat);
318  plen = VARSIZE_ANY_EXHDR(pat);
319 
320  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
321 
322  PG_RETURN_BOOL(result);
323 }
324 
325 Datum
327 {
329  bytea *pat = PG_GETARG_BYTEA_PP(1);
330  bool result;
331  char *s,
332  *p;
333  int slen,
334  plen;
335 
336  s = VARDATA_ANY(str);
337  slen = VARSIZE_ANY_EXHDR(str);
338  p = VARDATA_ANY(pat);
339  plen = VARSIZE_ANY_EXHDR(pat);
340 
341  result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
342 
343  PG_RETURN_BOOL(result);
344 }
345 
346 Datum
348 {
350  bytea *pat = PG_GETARG_BYTEA_PP(1);
351  bool result;
352  char *s,
353  *p;
354  int slen,
355  plen;
356 
357  s = VARDATA_ANY(str);
358  slen = VARSIZE_ANY_EXHDR(str);
359  p = VARDATA_ANY(pat);
360  plen = VARSIZE_ANY_EXHDR(pat);
361 
362  result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
363 
364  PG_RETURN_BOOL(result);
365 }
366 
367 /*
368  * Case-insensitive versions
369  */
370 
371 Datum
373 {
374  Name str = PG_GETARG_NAME(0);
375  text *pat = PG_GETARG_TEXT_PP(1);
376  bool result;
377  text *strtext;
378 
380  NameGetDatum(str)));
381  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
382 
383  PG_RETURN_BOOL(result);
384 }
385 
386 Datum
388 {
389  Name str = PG_GETARG_NAME(0);
390  text *pat = PG_GETARG_TEXT_PP(1);
391  bool result;
392  text *strtext;
393 
395  NameGetDatum(str)));
396  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
397 
398  PG_RETURN_BOOL(result);
399 }
400 
401 Datum
403 {
404  text *str = PG_GETARG_TEXT_PP(0);
405  text *pat = PG_GETARG_TEXT_PP(1);
406  bool result;
407 
408  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
409 
410  PG_RETURN_BOOL(result);
411 }
412 
413 Datum
415 {
416  text *str = PG_GETARG_TEXT_PP(0);
417  text *pat = PG_GETARG_TEXT_PP(1);
418  bool result;
419 
420  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
421 
422  PG_RETURN_BOOL(result);
423 }
424 
425 /*
426  * like_escape() --- given a pattern and an ESCAPE string,
427  * convert the pattern to use Postgres' standard backslash escape convention.
428  */
429 Datum
431 {
432  text *pat = PG_GETARG_TEXT_PP(0);
433  text *esc = PG_GETARG_TEXT_PP(1);
434  text *result;
435 
437  result = SB_do_like_escape(pat, esc);
438  else
439  result = MB_do_like_escape(pat, esc);
440 
441  PG_RETURN_TEXT_P(result);
442 }
443 
444 /*
445  * like_escape_bytea() --- given a pattern and an ESCAPE string,
446  * convert the pattern to use Postgres' standard backslash escape convention.
447  */
448 Datum
450 {
451  bytea *pat = PG_GETARG_BYTEA_PP(0);
452  bytea *esc = PG_GETARG_BYTEA_PP(1);
453  bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
454 
455  PG_RETURN_BYTEA_P((bytea *) result);
456 }
#define NameStr(name)
Definition: c.h:730
#define OidIsValid(objectId)
Definition: c.h:759
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
Datum DirectFunctionCall1Coll(PGFunction func, Oid collation, Datum arg1)
Definition: fmgr.c:779
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static char * locale
Definition: initdb.c:132
static text * SB_do_like_escape(text *pat, text *esc)
static int SB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
static char SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
Definition: like.c:94
#define LIKE_TRUE
Definition: like.c:30
Datum texticnlike(PG_FUNCTION_ARGS)
Definition: like.c:414
Datum textlike(PG_FUNCTION_ARGS)
Definition: like.c:284
Datum namelike(PG_FUNCTION_ARGS)
Definition: like.c:242
Datum nameiclike(PG_FUNCTION_ARGS)
Definition: like.c:372
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum byteanlike(PG_FUNCTION_ARGS)
Definition: like.c:347
Datum like_escape(PG_FUNCTION_ARGS)
Definition: like.c:430
static int wchareq(const char *p1, const char *p2)
Definition: like.c:58
Datum namenlike(PG_FUNCTION_ARGS)
Definition: like.c:263
Datum textnlike(PG_FUNCTION_ARGS)
Definition: like.c:305
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum nameicnlike(PG_FUNCTION_ARGS)
Definition: like.c:387
Datum like_escape_bytea(PG_FUNCTION_ARGS)
Definition: like.c:449
static int MB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
static text * MB_do_like_escape(text *pat, text *esc)
Datum texticlike(PG_FUNCTION_ARGS)
Definition: like.c:402
static int Generic_Text_IC_like(text *str, text *pat, Oid collation)
Definition: like.c:173
Datum bytealike(PG_FUNCTION_ARGS)
Definition: like.c:326
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
Definition: like.c:152
int GetDatabaseEncoding(void)
Definition: mbutils.c:1268
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1553
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1543
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1375
bool pg_locale_deterministic(pg_locale_t locale)
Definition: pg_locale.c:1519
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum NameGetDatum(const NameData *X)
Definition: postgres.h:373
unsigned int Oid
Definition: postgres_ext.h:31
char * c
Definition: c.h:725
Definition: c.h:671
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3379
#define tolower_l
Definition: win32_port.h:427