PostgreSQL Source Code  git master
like.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  * like expression handling code.
5  *
6  * NOTES
7  * A big hack of the regexp.c code!! Contributed by
8  * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/fmgrprotos.h"
26 #include "utils/pg_locale.h"
27 #include "varatt.h"
28 
29 
30 #define LIKE_TRUE 1
31 #define LIKE_FALSE 0
32 #define LIKE_ABORT (-1)
33 
34 
35 static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
37 static text *SB_do_like_escape(text *pat, text *esc);
38 
39 static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
41 static text *MB_do_like_escape(text *pat, text *esc);
42 
43 static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
45 
46 static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
48 
49 static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 
52 /*--------------------
53  * Support routine for MatchText. Compares given multibyte streams
54  * as wide characters. If they match, returns 1 otherwise returns 0.
55  *--------------------
56  */
57 static inline int
58 wchareq(const char *p1, const char *p2)
59 {
60  int p1_len;
61 
62  /* Optimization: quickly compare the first byte. */
63  if (*p1 != *p2)
64  return 0;
65 
66  p1_len = pg_mblen(p1);
67  if (pg_mblen(p2) != p1_len)
68  return 0;
69 
70  /* They are the same length */
71  while (p1_len--)
72  {
73  if (*p1++ != *p2++)
74  return 0;
75  }
76  return 1;
77 }
78 
79 /*
80  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81  * comparison of multibyte characters. It did not work at all, however,
82  * because it relied on tolower() which has a single-byte API ... and
83  * towlower() wouldn't be much better since we have no suitably cheap way
84  * of getting a single character transformed to the system's wchar_t format.
85  * So now, we just downcase the strings using lower() and apply regular LIKE
86  * comparison. This should be revisited when we install better locale support.
87  */
88 
89 /*
90  * We do handle case-insensitive matching for single-byte encodings using
91  * fold-on-the-fly processing, however.
92  */
93 static char
95 {
96  if (locale->ctype_is_c)
97  return pg_ascii_tolower(c);
98  else
99  return tolower_l(c, locale->info.lt);
100 }
101 
102 
103 #define NextByte(p, plen) ((p)++, (plen)--)
104 
105 /* Set up to compile like_match.c for multibyte characters */
106 #define CHAREQ(p1, p2) wchareq((p1), (p2))
107 #define NextChar(p, plen) \
108  do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
109 #define CopyAdvChar(dst, src, srclen) \
110  do { int __l = pg_mblen(src); \
111  (srclen) -= __l; \
112  while (__l-- > 0) \
113  *(dst)++ = *(src)++; \
114  } while (0)
115 
116 #define MatchText MB_MatchText
117 #define do_like_escape MB_do_like_escape
118 
119 #include "like_match.c"
120 
121 /* Set up to compile like_match.c for single-byte characters */
122 #define CHAREQ(p1, p2) (*(p1) == *(p2))
123 #define NextChar(p, plen) NextByte((p), (plen))
124 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
125 
126 #define MatchText SB_MatchText
127 #define do_like_escape SB_do_like_escape
128 
129 #include "like_match.c"
130 
131 /* setup to compile like_match.c for single byte case insensitive matches */
132 #define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale)
133 #define NextChar(p, plen) NextByte((p), (plen))
134 #define MatchText SB_IMatchText
135 
136 #include "like_match.c"
137 
138 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
139 
140 #define NextChar(p, plen) \
141  do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
142 #define MatchText UTF8_MatchText
143 
144 #include "like_match.c"
145 
146 /* Generic for all cases not requiring inline case-folding */
147 static inline int
148 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
149 {
150  if (collation)
151  {
153 
154  if (!locale->deterministic)
155  ereport(ERROR,
156  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
157  errmsg("nondeterministic collations are not supported for LIKE")));
158  }
159 
161  return SB_MatchText(s, slen, p, plen, 0);
162  else if (GetDatabaseEncoding() == PG_UTF8)
163  return UTF8_MatchText(s, slen, p, plen, 0);
164  else
165  return MB_MatchText(s, slen, p, plen, 0);
166 }
167 
168 static inline int
170 {
171  char *s,
172  *p;
173  int slen,
174  plen;
176 
177  if (!OidIsValid(collation))
178  {
179  /*
180  * This typically means that the parser could not resolve a conflict
181  * of implicit collations, so report it that way.
182  */
183  ereport(ERROR,
184  (errcode(ERRCODE_INDETERMINATE_COLLATION),
185  errmsg("could not determine which collation to use for ILIKE"),
186  errhint("Use the COLLATE clause to set the collation explicitly.")));
187  }
188 
189  locale = pg_newlocale_from_collation(collation);
190 
191  if (!locale->deterministic)
192  ereport(ERROR,
193  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
194  errmsg("nondeterministic collations are not supported for ILIKE")));
195 
196  /*
197  * For efficiency reasons, in the single byte case we don't call lower()
198  * on the pattern and text, but instead call SB_lower_char on each
199  * character. In the multi-byte case we don't have much choice :-(. Also,
200  * ICU does not support single-character case folding, so we go the long
201  * way.
202  */
203 
204  if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
205  {
207  PointerGetDatum(pat)));
208  p = VARDATA_ANY(pat);
209  plen = VARSIZE_ANY_EXHDR(pat);
211  PointerGetDatum(str)));
212  s = VARDATA_ANY(str);
213  slen = VARSIZE_ANY_EXHDR(str);
214  if (GetDatabaseEncoding() == PG_UTF8)
215  return UTF8_MatchText(s, slen, p, plen, 0);
216  else
217  return MB_MatchText(s, slen, p, plen, 0);
218  }
219  else
220  {
221  p = VARDATA_ANY(pat);
222  plen = VARSIZE_ANY_EXHDR(pat);
223  s = VARDATA_ANY(str);
224  slen = VARSIZE_ANY_EXHDR(str);
225  return SB_IMatchText(s, slen, p, plen, locale);
226  }
227 }
228 
229 /*
230  * interface routines called by the function manager
231  */
232 
233 Datum
235 {
236  Name str = PG_GETARG_NAME(0);
237  text *pat = PG_GETARG_TEXT_PP(1);
238  bool result;
239  char *s,
240  *p;
241  int slen,
242  plen;
243 
244  s = NameStr(*str);
245  slen = strlen(s);
246  p = VARDATA_ANY(pat);
247  plen = VARSIZE_ANY_EXHDR(pat);
248 
249  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
250 
251  PG_RETURN_BOOL(result);
252 }
253 
254 Datum
256 {
257  Name str = PG_GETARG_NAME(0);
258  text *pat = PG_GETARG_TEXT_PP(1);
259  bool result;
260  char *s,
261  *p;
262  int slen,
263  plen;
264 
265  s = NameStr(*str);
266  slen = strlen(s);
267  p = VARDATA_ANY(pat);
268  plen = VARSIZE_ANY_EXHDR(pat);
269 
270  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
271 
272  PG_RETURN_BOOL(result);
273 }
274 
275 Datum
277 {
278  text *str = PG_GETARG_TEXT_PP(0);
279  text *pat = PG_GETARG_TEXT_PP(1);
280  bool result;
281  char *s,
282  *p;
283  int slen,
284  plen;
285 
286  s = VARDATA_ANY(str);
287  slen = VARSIZE_ANY_EXHDR(str);
288  p = VARDATA_ANY(pat);
289  plen = VARSIZE_ANY_EXHDR(pat);
290 
291  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
292 
293  PG_RETURN_BOOL(result);
294 }
295 
296 Datum
298 {
299  text *str = PG_GETARG_TEXT_PP(0);
300  text *pat = PG_GETARG_TEXT_PP(1);
301  bool result;
302  char *s,
303  *p;
304  int slen,
305  plen;
306 
307  s = VARDATA_ANY(str);
308  slen = VARSIZE_ANY_EXHDR(str);
309  p = VARDATA_ANY(pat);
310  plen = VARSIZE_ANY_EXHDR(pat);
311 
312  result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
313 
314  PG_RETURN_BOOL(result);
315 }
316 
317 Datum
319 {
321  bytea *pat = PG_GETARG_BYTEA_PP(1);
322  bool result;
323  char *s,
324  *p;
325  int slen,
326  plen;
327 
328  s = VARDATA_ANY(str);
329  slen = VARSIZE_ANY_EXHDR(str);
330  p = VARDATA_ANY(pat);
331  plen = VARSIZE_ANY_EXHDR(pat);
332 
333  result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
334 
335  PG_RETURN_BOOL(result);
336 }
337 
338 Datum
340 {
342  bytea *pat = PG_GETARG_BYTEA_PP(1);
343  bool result;
344  char *s,
345  *p;
346  int slen,
347  plen;
348 
349  s = VARDATA_ANY(str);
350  slen = VARSIZE_ANY_EXHDR(str);
351  p = VARDATA_ANY(pat);
352  plen = VARSIZE_ANY_EXHDR(pat);
353 
354  result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
355 
356  PG_RETURN_BOOL(result);
357 }
358 
359 /*
360  * Case-insensitive versions
361  */
362 
363 Datum
365 {
366  Name str = PG_GETARG_NAME(0);
367  text *pat = PG_GETARG_TEXT_PP(1);
368  bool result;
369  text *strtext;
370 
372  NameGetDatum(str)));
373  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
374 
375  PG_RETURN_BOOL(result);
376 }
377 
378 Datum
380 {
381  Name str = PG_GETARG_NAME(0);
382  text *pat = PG_GETARG_TEXT_PP(1);
383  bool result;
384  text *strtext;
385 
387  NameGetDatum(str)));
388  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
389 
390  PG_RETURN_BOOL(result);
391 }
392 
393 Datum
395 {
396  text *str = PG_GETARG_TEXT_PP(0);
397  text *pat = PG_GETARG_TEXT_PP(1);
398  bool result;
399 
400  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
401 
402  PG_RETURN_BOOL(result);
403 }
404 
405 Datum
407 {
408  text *str = PG_GETARG_TEXT_PP(0);
409  text *pat = PG_GETARG_TEXT_PP(1);
410  bool result;
411 
412  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
413 
414  PG_RETURN_BOOL(result);
415 }
416 
417 /*
418  * like_escape() --- given a pattern and an ESCAPE string,
419  * convert the pattern to use Postgres' standard backslash escape convention.
420  */
421 Datum
423 {
424  text *pat = PG_GETARG_TEXT_PP(0);
425  text *esc = PG_GETARG_TEXT_PP(1);
426  text *result;
427 
429  result = SB_do_like_escape(pat, esc);
430  else
431  result = MB_do_like_escape(pat, esc);
432 
433  PG_RETURN_TEXT_P(result);
434 }
435 
436 /*
437  * like_escape_bytea() --- given a pattern and an ESCAPE string,
438  * convert the pattern to use Postgres' standard backslash escape convention.
439  */
440 Datum
442 {
443  bytea *pat = PG_GETARG_BYTEA_PP(0);
444  bytea *esc = PG_GETARG_BYTEA_PP(1);
445  bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
446 
447  PG_RETURN_BYTEA_P((bytea *) result);
448 }
#define NameStr(name)
Definition: c.h:737
#define OidIsValid(objectId)
Definition: c.h:766
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
Datum DirectFunctionCall1Coll(PGFunction func, Oid collation, Datum arg1)
Definition: fmgr.c:792
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
const char * str
static char * locale
Definition: initdb.c:140
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
static text * SB_do_like_escape(text *pat, text *esc)
static int MB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
#define LIKE_TRUE
Definition: like.c:30
Datum texticnlike(PG_FUNCTION_ARGS)
Definition: like.c:406
Datum textlike(PG_FUNCTION_ARGS)
Definition: like.c:276
Datum namelike(PG_FUNCTION_ARGS)
Definition: like.c:234
Datum nameiclike(PG_FUNCTION_ARGS)
Definition: like.c:364
Datum byteanlike(PG_FUNCTION_ARGS)
Definition: like.c:339
Datum like_escape(PG_FUNCTION_ARGS)
Definition: like.c:422
static int wchareq(const char *p1, const char *p2)
Definition: like.c:58
Datum namenlike(PG_FUNCTION_ARGS)
Definition: like.c:255
Datum textnlike(PG_FUNCTION_ARGS)
Definition: like.c:297
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
Datum nameicnlike(PG_FUNCTION_ARGS)
Definition: like.c:379
static char SB_lower_char(unsigned char c, pg_locale_t locale)
Definition: like.c:94
Datum like_escape_bytea(PG_FUNCTION_ARGS)
Definition: like.c:441
static text * MB_do_like_escape(text *pat, text *esc)
Datum texticlike(PG_FUNCTION_ARGS)
Definition: like.c:394
static int SB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
static int Generic_Text_IC_like(text *str, text *pat, Oid collation)
Definition: like.c:169
Datum bytealike(PG_FUNCTION_ARGS)
Definition: like.c:318
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
Definition: like.c:148
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1358
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum NameGetDatum(const NameData *X)
Definition: postgres.h:373
unsigned int Oid
Definition: postgres_ext.h:31
char * c
Definition: c.h:732
Definition: c.h:678
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3357
#define tolower_l
Definition: win32_port.h:443