PostgreSQL Source Code  git master
like.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  * like expression handling code.
5  *
6  * NOTES
7  * A big hack of the regexp.c code!! Contributed by
8  * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
27 
28 
29 #define LIKE_TRUE 1
30 #define LIKE_FALSE 0
31 #define LIKE_ABORT (-1)
32 
33 
34 static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
35  pg_locale_t locale, bool locale_is_c);
36 static text *SB_do_like_escape(text *pat, text *esc);
37 
38 static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
39  pg_locale_t locale, bool locale_is_c);
40 static text *MB_do_like_escape(text *pat, text *esc);
41 
42 static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
43  pg_locale_t locale, bool locale_is_c);
44 
45 static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
46  pg_locale_t locale, bool locale_is_c);
47 
48 static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
49 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
50 
51 /*--------------------
52  * Support routine for MatchText. Compares given multibyte streams
53  * as wide characters. If they match, returns 1 otherwise returns 0.
54  *--------------------
55  */
56 static inline int
57 wchareq(const char *p1, const char *p2)
58 {
59  int p1_len;
60 
61  /* Optimization: quickly compare the first byte. */
62  if (*p1 != *p2)
63  return 0;
64 
65  p1_len = pg_mblen(p1);
66  if (pg_mblen(p2) != p1_len)
67  return 0;
68 
69  /* They are the same length */
70  while (p1_len--)
71  {
72  if (*p1++ != *p2++)
73  return 0;
74  }
75  return 1;
76 }
77 
78 /*
79  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80  * comparison of multibyte characters. It did not work at all, however,
81  * because it relied on tolower() which has a single-byte API ... and
82  * towlower() wouldn't be much better since we have no suitably cheap way
83  * of getting a single character transformed to the system's wchar_t format.
84  * So now, we just downcase the strings using lower() and apply regular LIKE
85  * comparison. This should be revisited when we install better locale support.
86  */
87 
88 /*
89  * We do handle case-insensitive matching for single-byte encodings using
90  * fold-on-the-fly processing, however.
91  */
92 static char
93 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
94 {
95  if (locale_is_c)
96  return pg_ascii_tolower(c);
97 #ifdef HAVE_LOCALE_T
98  else if (locale)
99  return tolower_l(c, locale->info.lt);
100 #endif
101  else
102  return pg_tolower(c);
103 }
104 
105 
106 #define NextByte(p, plen) ((p)++, (plen)--)
107 
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111  do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113  do { int __l = pg_mblen(src); \
114  (srclen) -= __l; \
115  while (__l-- > 0) \
116  *(dst)++ = *(src)++; \
117  } while (0)
118 
119 #define MatchText MB_MatchText
120 #define do_like_escape MB_do_like_escape
121 
122 #include "like_match.c"
123 
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
128 
129 #define MatchText SB_MatchText
130 #define do_like_escape SB_do_like_escape
131 
132 #include "like_match.c"
133 
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
138 
139 #include "like_match.c"
140 
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
142 
143 #define NextChar(p, plen) \
144  do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText UTF8_MatchText
146 
147 #include "like_match.c"
148 
149 /* Generic for all cases not requiring inline case-folding */
150 static inline int
151 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
152 {
153  if (collation && !lc_ctype_is_c(collation))
154  {
156 
157  if (locale && !locale->deterministic)
158  ereport(ERROR,
159  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
160  errmsg("nondeterministic collations are not supported for LIKE")));
161  }
162 
164  return SB_MatchText(s, slen, p, plen, 0, true);
165  else if (GetDatabaseEncoding() == PG_UTF8)
166  return UTF8_MatchText(s, slen, p, plen, 0, true);
167  else
168  return MB_MatchText(s, slen, p, plen, 0, true);
169 }
170 
171 static inline int
173 {
174  char *s,
175  *p;
176  int slen,
177  plen;
178  pg_locale_t locale = 0;
179  bool locale_is_c = false;
180 
181  if (!OidIsValid(collation))
182  {
183  /*
184  * This typically means that the parser could not resolve a conflict
185  * of implicit collations, so report it that way.
186  */
187  ereport(ERROR,
188  (errcode(ERRCODE_INDETERMINATE_COLLATION),
189  errmsg("could not determine which collation to use for ILIKE"),
190  errhint("Use the COLLATE clause to set the collation explicitly.")));
191  }
192 
193  if (lc_ctype_is_c(collation))
194  locale_is_c = true;
195  else
196  locale = pg_newlocale_from_collation(collation);
197 
198  if (locale && !locale->deterministic)
199  ereport(ERROR,
200  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
201  errmsg("nondeterministic collations are not supported for ILIKE")));
202 
203  /*
204  * For efficiency reasons, in the single byte case we don't call lower()
205  * on the pattern and text, but instead call SB_lower_char on each
206  * character. In the multi-byte case we don't have much choice :-(. Also,
207  * ICU does not support single-character case folding, so we go the long
208  * way.
209  */
210 
211  if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
212  {
214  PointerGetDatum(pat)));
215  p = VARDATA_ANY(pat);
216  plen = VARSIZE_ANY_EXHDR(pat);
218  PointerGetDatum(str)));
219  s = VARDATA_ANY(str);
220  slen = VARSIZE_ANY_EXHDR(str);
221  if (GetDatabaseEncoding() == PG_UTF8)
222  return UTF8_MatchText(s, slen, p, plen, 0, true);
223  else
224  return MB_MatchText(s, slen, p, plen, 0, true);
225  }
226  else
227  {
228  p = VARDATA_ANY(pat);
229  plen = VARSIZE_ANY_EXHDR(pat);
230  s = VARDATA_ANY(str);
231  slen = VARSIZE_ANY_EXHDR(str);
232  return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
233  }
234 }
235 
236 /*
237  * interface routines called by the function manager
238  */
239 
240 Datum
242 {
243  Name str = PG_GETARG_NAME(0);
244  text *pat = PG_GETARG_TEXT_PP(1);
245  bool result;
246  char *s,
247  *p;
248  int slen,
249  plen;
250 
251  s = NameStr(*str);
252  slen = strlen(s);
253  p = VARDATA_ANY(pat);
254  plen = VARSIZE_ANY_EXHDR(pat);
255 
256  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
257 
258  PG_RETURN_BOOL(result);
259 }
260 
261 Datum
263 {
264  Name str = PG_GETARG_NAME(0);
265  text *pat = PG_GETARG_TEXT_PP(1);
266  bool result;
267  char *s,
268  *p;
269  int slen,
270  plen;
271 
272  s = NameStr(*str);
273  slen = strlen(s);
274  p = VARDATA_ANY(pat);
275  plen = VARSIZE_ANY_EXHDR(pat);
276 
277  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
278 
279  PG_RETURN_BOOL(result);
280 }
281 
282 Datum
284 {
285  text *str = PG_GETARG_TEXT_PP(0);
286  text *pat = PG_GETARG_TEXT_PP(1);
287  bool result;
288  char *s,
289  *p;
290  int slen,
291  plen;
292 
293  s = VARDATA_ANY(str);
294  slen = VARSIZE_ANY_EXHDR(str);
295  p = VARDATA_ANY(pat);
296  plen = VARSIZE_ANY_EXHDR(pat);
297 
298  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
299 
300  PG_RETURN_BOOL(result);
301 }
302 
303 Datum
305 {
306  text *str = PG_GETARG_TEXT_PP(0);
307  text *pat = PG_GETARG_TEXT_PP(1);
308  bool result;
309  char *s,
310  *p;
311  int slen,
312  plen;
313 
314  s = VARDATA_ANY(str);
315  slen = VARSIZE_ANY_EXHDR(str);
316  p = VARDATA_ANY(pat);
317  plen = VARSIZE_ANY_EXHDR(pat);
318 
319  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
320 
321  PG_RETURN_BOOL(result);
322 }
323 
324 Datum
326 {
328  bytea *pat = PG_GETARG_BYTEA_PP(1);
329  bool result;
330  char *s,
331  *p;
332  int slen,
333  plen;
334 
335  s = VARDATA_ANY(str);
336  slen = VARSIZE_ANY_EXHDR(str);
337  p = VARDATA_ANY(pat);
338  plen = VARSIZE_ANY_EXHDR(pat);
339 
340  result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
341 
342  PG_RETURN_BOOL(result);
343 }
344 
345 Datum
347 {
349  bytea *pat = PG_GETARG_BYTEA_PP(1);
350  bool result;
351  char *s,
352  *p;
353  int slen,
354  plen;
355 
356  s = VARDATA_ANY(str);
357  slen = VARSIZE_ANY_EXHDR(str);
358  p = VARDATA_ANY(pat);
359  plen = VARSIZE_ANY_EXHDR(pat);
360 
361  result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
362 
363  PG_RETURN_BOOL(result);
364 }
365 
366 /*
367  * Case-insensitive versions
368  */
369 
370 Datum
372 {
373  Name str = PG_GETARG_NAME(0);
374  text *pat = PG_GETARG_TEXT_PP(1);
375  bool result;
376  text *strtext;
377 
379  NameGetDatum(str)));
380  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
381 
382  PG_RETURN_BOOL(result);
383 }
384 
385 Datum
387 {
388  Name str = PG_GETARG_NAME(0);
389  text *pat = PG_GETARG_TEXT_PP(1);
390  bool result;
391  text *strtext;
392 
394  NameGetDatum(str)));
395  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
396 
397  PG_RETURN_BOOL(result);
398 }
399 
400 Datum
402 {
403  text *str = PG_GETARG_TEXT_PP(0);
404  text *pat = PG_GETARG_TEXT_PP(1);
405  bool result;
406 
407  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
408 
409  PG_RETURN_BOOL(result);
410 }
411 
412 Datum
414 {
415  text *str = PG_GETARG_TEXT_PP(0);
416  text *pat = PG_GETARG_TEXT_PP(1);
417  bool result;
418 
419  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
420 
421  PG_RETURN_BOOL(result);
422 }
423 
424 /*
425  * like_escape() --- given a pattern and an ESCAPE string,
426  * convert the pattern to use Postgres' standard backslash escape convention.
427  */
428 Datum
430 {
431  text *pat = PG_GETARG_TEXT_PP(0);
432  text *esc = PG_GETARG_TEXT_PP(1);
433  text *result;
434 
436  result = SB_do_like_escape(pat, esc);
437  else
438  result = MB_do_like_escape(pat, esc);
439 
440  PG_RETURN_TEXT_P(result);
441 }
442 
443 /*
444  * like_escape_bytea() --- given a pattern and an ESCAPE string,
445  * convert the pattern to use Postgres' standard backslash escape convention.
446  */
447 Datum
449 {
450  bytea *pat = PG_GETARG_BYTEA_PP(0);
451  bytea *esc = PG_GETARG_BYTEA_PP(1);
452  bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
453 
454  PG_RETURN_BYTEA_P((bytea *) result);
455 }
#define NameStr(name)
Definition: c.h:682
#define OidIsValid(objectId)
Definition: c.h:711
int errhint(const char *fmt,...)
Definition: elog.c:1153
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
Datum DirectFunctionCall1Coll(PGFunction func, Oid collation, Datum arg1)
Definition: fmgr.c:777
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static char * locale
Definition: initdb.c:129
static text * SB_do_like_escape(text *pat, text *esc)
static int SB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
static char SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
Definition: like.c:93
#define LIKE_TRUE
Definition: like.c:29
Datum texticnlike(PG_FUNCTION_ARGS)
Definition: like.c:413
Datum textlike(PG_FUNCTION_ARGS)
Definition: like.c:283
Datum namelike(PG_FUNCTION_ARGS)
Definition: like.c:241
Datum nameiclike(PG_FUNCTION_ARGS)
Definition: like.c:371
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum byteanlike(PG_FUNCTION_ARGS)
Definition: like.c:346
Datum like_escape(PG_FUNCTION_ARGS)
Definition: like.c:429
static int wchareq(const char *p1, const char *p2)
Definition: like.c:57
Datum namenlike(PG_FUNCTION_ARGS)
Definition: like.c:262
Datum textnlike(PG_FUNCTION_ARGS)
Definition: like.c:304
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum nameicnlike(PG_FUNCTION_ARGS)
Definition: like.c:386
Datum like_escape_bytea(PG_FUNCTION_ARGS)
Definition: like.c:448
static int MB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
static text * MB_do_like_escape(text *pat, text *esc)
Datum texticlike(PG_FUNCTION_ARGS)
Definition: like.c:401
static int Generic_Text_IC_like(text *str, text *pat, Oid collation)
Definition: like.c:172
Datum bytealike(PG_FUNCTION_ARGS)
Definition: like.c:325
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
Definition: like.c:151
int GetDatabaseEncoding(void)
Definition: mbutils.c:1210
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:48
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1481
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1352
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:670
uintptr_t Datum
Definition: postgres.h:412
static Datum NameGetDatum(const NameData *X)
Definition: postgres.h:721
#define VARDATA_ANY(PTR)
Definition: postgres.h:362
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:355
unsigned int Oid
Definition: postgres_ext.h:31
char * c
Definition: c.h:677
Definition: c.h:623
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3637
#define tolower_l
Definition: win32_port.h:420