PostgreSQL Source Code  git master
like.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  * like expression handling code.
5  *
6  * NOTES
7  * A big hack of the regexp.c code!! Contributed by
8  * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/fmgrprotos.h"
26 #include "utils/pg_locale.h"
27 #include "varatt.h"
28 
29 
30 #define LIKE_TRUE 1
31 #define LIKE_FALSE 0
32 #define LIKE_ABORT (-1)
33 
34 
35 static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
36  pg_locale_t locale, bool locale_is_c);
37 static text *SB_do_like_escape(text *pat, text *esc);
38 
39 static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
40  pg_locale_t locale, bool locale_is_c);
41 static text *MB_do_like_escape(text *pat, text *esc);
42 
43 static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
44  pg_locale_t locale, bool locale_is_c);
45 
46 static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
47  pg_locale_t locale, bool locale_is_c);
48 
49 static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 
52 /*--------------------
53  * Support routine for MatchText. Compares given multibyte streams
54  * as wide characters. If they match, returns 1 otherwise returns 0.
55  *--------------------
56  */
57 static inline int
58 wchareq(const char *p1, const char *p2)
59 {
60  int p1_len;
61 
62  /* Optimization: quickly compare the first byte. */
63  if (*p1 != *p2)
64  return 0;
65 
66  p1_len = pg_mblen(p1);
67  if (pg_mblen(p2) != p1_len)
68  return 0;
69 
70  /* They are the same length */
71  while (p1_len--)
72  {
73  if (*p1++ != *p2++)
74  return 0;
75  }
76  return 1;
77 }
78 
79 /*
80  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81  * comparison of multibyte characters. It did not work at all, however,
82  * because it relied on tolower() which has a single-byte API ... and
83  * towlower() wouldn't be much better since we have no suitably cheap way
84  * of getting a single character transformed to the system's wchar_t format.
85  * So now, we just downcase the strings using lower() and apply regular LIKE
86  * comparison. This should be revisited when we install better locale support.
87  */
88 
89 /*
90  * We do handle case-insensitive matching for single-byte encodings using
91  * fold-on-the-fly processing, however.
92  */
93 static char
94 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
95 {
96  if (locale_is_c)
97  return pg_ascii_tolower(c);
98  else if (locale)
99  return tolower_l(c, locale->info.lt);
100  else
101  return pg_tolower(c);
102 }
103 
104 
105 #define NextByte(p, plen) ((p)++, (plen)--)
106 
107 /* Set up to compile like_match.c for multibyte characters */
108 #define CHAREQ(p1, p2) wchareq((p1), (p2))
109 #define NextChar(p, plen) \
110  do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
111 #define CopyAdvChar(dst, src, srclen) \
112  do { int __l = pg_mblen(src); \
113  (srclen) -= __l; \
114  while (__l-- > 0) \
115  *(dst)++ = *(src)++; \
116  } while (0)
117 
118 #define MatchText MB_MatchText
119 #define do_like_escape MB_do_like_escape
120 
121 #include "like_match.c"
122 
123 /* Set up to compile like_match.c for single-byte characters */
124 #define CHAREQ(p1, p2) (*(p1) == *(p2))
125 #define NextChar(p, plen) NextByte((p), (plen))
126 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
127 
128 #define MatchText SB_MatchText
129 #define do_like_escape SB_do_like_escape
130 
131 #include "like_match.c"
132 
133 /* setup to compile like_match.c for single byte case insensitive matches */
134 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
135 #define NextChar(p, plen) NextByte((p), (plen))
136 #define MatchText SB_IMatchText
137 
138 #include "like_match.c"
139 
140 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
141 
142 #define NextChar(p, plen) \
143  do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
144 #define MatchText UTF8_MatchText
145 
146 #include "like_match.c"
147 
148 /* Generic for all cases not requiring inline case-folding */
149 static inline int
150 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
151 {
152  if (collation && !lc_ctype_is_c(collation))
153  {
155 
157  ereport(ERROR,
158  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
159  errmsg("nondeterministic collations are not supported for LIKE")));
160  }
161 
163  return SB_MatchText(s, slen, p, plen, 0, true);
164  else if (GetDatabaseEncoding() == PG_UTF8)
165  return UTF8_MatchText(s, slen, p, plen, 0, true);
166  else
167  return MB_MatchText(s, slen, p, plen, 0, true);
168 }
169 
170 static inline int
172 {
173  char *s,
174  *p;
175  int slen,
176  plen;
177  pg_locale_t locale = 0;
178  bool locale_is_c = false;
179 
180  if (!OidIsValid(collation))
181  {
182  /*
183  * This typically means that the parser could not resolve a conflict
184  * of implicit collations, so report it that way.
185  */
186  ereport(ERROR,
187  (errcode(ERRCODE_INDETERMINATE_COLLATION),
188  errmsg("could not determine which collation to use for ILIKE"),
189  errhint("Use the COLLATE clause to set the collation explicitly.")));
190  }
191 
192  if (lc_ctype_is_c(collation))
193  locale_is_c = true;
194  else
195  locale = pg_newlocale_from_collation(collation);
196 
198  ereport(ERROR,
199  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
200  errmsg("nondeterministic collations are not supported for ILIKE")));
201 
202  /*
203  * For efficiency reasons, in the single byte case we don't call lower()
204  * on the pattern and text, but instead call SB_lower_char on each
205  * character. In the multi-byte case we don't have much choice :-(. Also,
206  * ICU does not support single-character case folding, so we go the long
207  * way.
208  */
209 
210  if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
211  {
213  PointerGetDatum(pat)));
214  p = VARDATA_ANY(pat);
215  plen = VARSIZE_ANY_EXHDR(pat);
217  PointerGetDatum(str)));
218  s = VARDATA_ANY(str);
219  slen = VARSIZE_ANY_EXHDR(str);
220  if (GetDatabaseEncoding() == PG_UTF8)
221  return UTF8_MatchText(s, slen, p, plen, 0, true);
222  else
223  return MB_MatchText(s, slen, p, plen, 0, true);
224  }
225  else
226  {
227  p = VARDATA_ANY(pat);
228  plen = VARSIZE_ANY_EXHDR(pat);
229  s = VARDATA_ANY(str);
230  slen = VARSIZE_ANY_EXHDR(str);
231  return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
232  }
233 }
234 
235 /*
236  * interface routines called by the function manager
237  */
238 
239 Datum
241 {
242  Name str = PG_GETARG_NAME(0);
243  text *pat = PG_GETARG_TEXT_PP(1);
244  bool result;
245  char *s,
246  *p;
247  int slen,
248  plen;
249 
250  s = NameStr(*str);
251  slen = strlen(s);
252  p = VARDATA_ANY(pat);
253  plen = VARSIZE_ANY_EXHDR(pat);
254 
255  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
256 
257  PG_RETURN_BOOL(result);
258 }
259 
260 Datum
262 {
263  Name str = PG_GETARG_NAME(0);
264  text *pat = PG_GETARG_TEXT_PP(1);
265  bool result;
266  char *s,
267  *p;
268  int slen,
269  plen;
270 
271  s = NameStr(*str);
272  slen = strlen(s);
273  p = VARDATA_ANY(pat);
274  plen = VARSIZE_ANY_EXHDR(pat);
275 
276  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
277 
278  PG_RETURN_BOOL(result);
279 }
280 
281 Datum
283 {
284  text *str = PG_GETARG_TEXT_PP(0);
285  text *pat = PG_GETARG_TEXT_PP(1);
286  bool result;
287  char *s,
288  *p;
289  int slen,
290  plen;
291 
292  s = VARDATA_ANY(str);
293  slen = VARSIZE_ANY_EXHDR(str);
294  p = VARDATA_ANY(pat);
295  plen = VARSIZE_ANY_EXHDR(pat);
296 
297  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
298 
299  PG_RETURN_BOOL(result);
300 }
301 
302 Datum
304 {
305  text *str = PG_GETARG_TEXT_PP(0);
306  text *pat = PG_GETARG_TEXT_PP(1);
307  bool result;
308  char *s,
309  *p;
310  int slen,
311  plen;
312 
313  s = VARDATA_ANY(str);
314  slen = VARSIZE_ANY_EXHDR(str);
315  p = VARDATA_ANY(pat);
316  plen = VARSIZE_ANY_EXHDR(pat);
317 
318  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
319 
320  PG_RETURN_BOOL(result);
321 }
322 
323 Datum
325 {
327  bytea *pat = PG_GETARG_BYTEA_PP(1);
328  bool result;
329  char *s,
330  *p;
331  int slen,
332  plen;
333 
334  s = VARDATA_ANY(str);
335  slen = VARSIZE_ANY_EXHDR(str);
336  p = VARDATA_ANY(pat);
337  plen = VARSIZE_ANY_EXHDR(pat);
338 
339  result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
340 
341  PG_RETURN_BOOL(result);
342 }
343 
344 Datum
346 {
348  bytea *pat = PG_GETARG_BYTEA_PP(1);
349  bool result;
350  char *s,
351  *p;
352  int slen,
353  plen;
354 
355  s = VARDATA_ANY(str);
356  slen = VARSIZE_ANY_EXHDR(str);
357  p = VARDATA_ANY(pat);
358  plen = VARSIZE_ANY_EXHDR(pat);
359 
360  result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
361 
362  PG_RETURN_BOOL(result);
363 }
364 
365 /*
366  * Case-insensitive versions
367  */
368 
369 Datum
371 {
372  Name str = PG_GETARG_NAME(0);
373  text *pat = PG_GETARG_TEXT_PP(1);
374  bool result;
375  text *strtext;
376 
378  NameGetDatum(str)));
379  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
380 
381  PG_RETURN_BOOL(result);
382 }
383 
384 Datum
386 {
387  Name str = PG_GETARG_NAME(0);
388  text *pat = PG_GETARG_TEXT_PP(1);
389  bool result;
390  text *strtext;
391 
393  NameGetDatum(str)));
394  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
395 
396  PG_RETURN_BOOL(result);
397 }
398 
399 Datum
401 {
402  text *str = PG_GETARG_TEXT_PP(0);
403  text *pat = PG_GETARG_TEXT_PP(1);
404  bool result;
405 
406  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
407 
408  PG_RETURN_BOOL(result);
409 }
410 
411 Datum
413 {
414  text *str = PG_GETARG_TEXT_PP(0);
415  text *pat = PG_GETARG_TEXT_PP(1);
416  bool result;
417 
418  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
419 
420  PG_RETURN_BOOL(result);
421 }
422 
423 /*
424  * like_escape() --- given a pattern and an ESCAPE string,
425  * convert the pattern to use Postgres' standard backslash escape convention.
426  */
427 Datum
429 {
430  text *pat = PG_GETARG_TEXT_PP(0);
431  text *esc = PG_GETARG_TEXT_PP(1);
432  text *result;
433 
435  result = SB_do_like_escape(pat, esc);
436  else
437  result = MB_do_like_escape(pat, esc);
438 
439  PG_RETURN_TEXT_P(result);
440 }
441 
442 /*
443  * like_escape_bytea() --- given a pattern and an ESCAPE string,
444  * convert the pattern to use Postgres' standard backslash escape convention.
445  */
446 Datum
448 {
449  bytea *pat = PG_GETARG_BYTEA_PP(0);
450  bytea *esc = PG_GETARG_BYTEA_PP(1);
451  bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
452 
453  PG_RETURN_BYTEA_P((bytea *) result);
454 }
#define NameStr(name)
Definition: c.h:746
#define OidIsValid(objectId)
Definition: c.h:775
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
Datum DirectFunctionCall1Coll(PGFunction func, Oid collation, Datum arg1)
Definition: fmgr.c:792
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
const char * str
static char * locale
Definition: initdb.c:140
static text * SB_do_like_escape(text *pat, text *esc)
static int SB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
static char SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
Definition: like.c:94
#define LIKE_TRUE
Definition: like.c:30
Datum texticnlike(PG_FUNCTION_ARGS)
Definition: like.c:412
Datum textlike(PG_FUNCTION_ARGS)
Definition: like.c:282
Datum namelike(PG_FUNCTION_ARGS)
Definition: like.c:240
Datum nameiclike(PG_FUNCTION_ARGS)
Definition: like.c:370
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum byteanlike(PG_FUNCTION_ARGS)
Definition: like.c:345
Datum like_escape(PG_FUNCTION_ARGS)
Definition: like.c:428
static int wchareq(const char *p1, const char *p2)
Definition: like.c:58
Datum namenlike(PG_FUNCTION_ARGS)
Definition: like.c:261
Datum textnlike(PG_FUNCTION_ARGS)
Definition: like.c:303
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum nameicnlike(PG_FUNCTION_ARGS)
Definition: like.c:385
Datum like_escape_bytea(PG_FUNCTION_ARGS)
Definition: like.c:447
static int MB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c)
static text * MB_do_like_escape(text *pat, text *esc)
Datum texticlike(PG_FUNCTION_ARGS)
Definition: like.c:400
static int Generic_Text_IC_like(text *str, text *pat, Oid collation)
Definition: like.c:171
Datum bytealike(PG_FUNCTION_ARGS)
Definition: like.c:324
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
Definition: like.c:150
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1551
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1384
bool pg_locale_deterministic(pg_locale_t locale)
Definition: pg_locale.c:1531
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum NameGetDatum(const NameData *X)
Definition: postgres.h:373
unsigned int Oid
Definition: postgres_ext.h:31
char * c
Definition: c.h:741
Definition: c.h:687
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3382
#define tolower_l
Definition: win32_port.h:425