PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
like.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  * like expression handling code.
5  *
6  * NOTES
7  * A big hack of the regexp.c code!! Contributed by
8  * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
27 
28 
29 #define LIKE_TRUE 1
30 #define LIKE_FALSE 0
31 #define LIKE_ABORT (-1)
32 
33 
34 static int SB_MatchText(char *t, int tlen, char *p, int plen,
35  pg_locale_t locale, bool locale_is_c);
36 static text *SB_do_like_escape(text *, text *);
37 
38 static int MB_MatchText(char *t, int tlen, char *p, int plen,
39  pg_locale_t locale, bool locale_is_c);
40 static text *MB_do_like_escape(text *, text *);
41 
42 static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
43  pg_locale_t locale, bool locale_is_c);
44 
45 static int SB_IMatchText(char *t, int tlen, char *p, int plen,
46  pg_locale_t locale, bool locale_is_c);
47 
48 static int GenericMatchText(char *s, int slen, char *p, int plen);
49 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
50 
51 /*--------------------
52  * Support routine for MatchText. Compares given multibyte streams
53  * as wide characters. If they match, returns 1 otherwise returns 0.
54  *--------------------
55  */
56 static inline int
57 wchareq(char *p1, char *p2)
58 {
59  int p1_len;
60 
61  /* Optimization: quickly compare the first byte. */
62  if (*p1 != *p2)
63  return 0;
64 
65  p1_len = pg_mblen(p1);
66  if (pg_mblen(p2) != p1_len)
67  return 0;
68 
69  /* They are the same length */
70  while (p1_len--)
71  {
72  if (*p1++ != *p2++)
73  return 0;
74  }
75  return 1;
76 }
77 
78 /*
79  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80  * comparison of multibyte characters. It did not work at all, however,
81  * because it relied on tolower() which has a single-byte API ... and
82  * towlower() wouldn't be much better since we have no suitably cheap way
83  * of getting a single character transformed to the system's wchar_t format.
84  * So now, we just downcase the strings using lower() and apply regular LIKE
85  * comparison. This should be revisited when we install better locale support.
86  */
87 
88 /*
89  * We do handle case-insensitive matching for single-byte encodings using
90  * fold-on-the-fly processing, however.
91  */
92 static char
93 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
94 {
95  if (locale_is_c)
96  return pg_ascii_tolower(c);
97 #ifdef HAVE_LOCALE_T
98  else if (locale)
99  return tolower_l(c, locale);
100 #endif
101  else
102  return pg_tolower(c);
103 }
104 
105 
106 #define NextByte(p, plen) ((p)++, (plen)--)
107 
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111  do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113  do { int __l = pg_mblen(src); \
114  (srclen) -= __l; \
115  while (__l-- > 0) \
116  *(dst)++ = *(src)++; \
117  } while (0)
118 
119 #define MatchText MB_MatchText
120 #define do_like_escape MB_do_like_escape
121 
122 #include "like_match.c"
123 
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
128 
129 #define MatchText SB_MatchText
130 #define do_like_escape SB_do_like_escape
131 
132 #include "like_match.c"
133 
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
138 
139 #include "like_match.c"
140 
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
142 
143 #define NextChar(p, plen) \
144  do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText UTF8_MatchText
146 
147 #include "like_match.c"
148 
149 /* Generic for all cases not requiring inline case-folding */
150 static inline int
151 GenericMatchText(char *s, int slen, char *p, int plen)
152 {
154  return SB_MatchText(s, slen, p, plen, 0, true);
155  else if (GetDatabaseEncoding() == PG_UTF8)
156  return UTF8_MatchText(s, slen, p, plen, 0, true);
157  else
158  return MB_MatchText(s, slen, p, plen, 0, true);
159 }
160 
161 static inline int
162 Generic_Text_IC_like(text *str, text *pat, Oid collation)
163 {
164  char *s,
165  *p;
166  int slen,
167  plen;
168 
169  /*
170  * For efficiency reasons, in the single byte case we don't call lower()
171  * on the pattern and text, but instead call SB_lower_char on each
172  * character. In the multi-byte case we don't have much choice :-(
173  */
174 
176  {
177  /* lower's result is never packed, so OK to use old macros here */
179  PointerGetDatum(pat)));
180  p = VARDATA(pat);
181  plen = (VARSIZE(pat) - VARHDRSZ);
183  PointerGetDatum(str)));
184  s = VARDATA(str);
185  slen = (VARSIZE(str) - VARHDRSZ);
186  if (GetDatabaseEncoding() == PG_UTF8)
187  return UTF8_MatchText(s, slen, p, plen, 0, true);
188  else
189  return MB_MatchText(s, slen, p, plen, 0, true);
190  }
191  else
192  {
193  /*
194  * Here we need to prepare locale information for SB_lower_char. This
195  * should match the methods used in str_tolower().
196  */
197  pg_locale_t locale = 0;
198  bool locale_is_c = false;
199 
200  if (lc_ctype_is_c(collation))
201  locale_is_c = true;
202  else if (collation != DEFAULT_COLLATION_OID)
203  {
204  if (!OidIsValid(collation))
205  {
206  /*
207  * This typically means that the parser could not resolve a
208  * conflict of implicit collations, so report it that way.
209  */
210  ereport(ERROR,
211  (errcode(ERRCODE_INDETERMINATE_COLLATION),
212  errmsg("could not determine which collation to use for ILIKE"),
213  errhint("Use the COLLATE clause to set the collation explicitly.")));
214  }
215  locale = pg_newlocale_from_collation(collation);
216  }
217 
218  p = VARDATA_ANY(pat);
219  plen = VARSIZE_ANY_EXHDR(pat);
220  s = VARDATA_ANY(str);
221  slen = VARSIZE_ANY_EXHDR(str);
222  return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
223  }
224 }
225 
226 /*
227  * interface routines called by the function manager
228  */
229 
230 Datum
232 {
233  Name str = PG_GETARG_NAME(0);
234  text *pat = PG_GETARG_TEXT_PP(1);
235  bool result;
236  char *s,
237  *p;
238  int slen,
239  plen;
240 
241  s = NameStr(*str);
242  slen = strlen(s);
243  p = VARDATA_ANY(pat);
244  plen = VARSIZE_ANY_EXHDR(pat);
245 
246  result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
247 
248  PG_RETURN_BOOL(result);
249 }
250 
251 Datum
253 {
254  Name str = PG_GETARG_NAME(0);
255  text *pat = PG_GETARG_TEXT_PP(1);
256  bool result;
257  char *s,
258  *p;
259  int slen,
260  plen;
261 
262  s = NameStr(*str);
263  slen = strlen(s);
264  p = VARDATA_ANY(pat);
265  plen = VARSIZE_ANY_EXHDR(pat);
266 
267  result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
268 
269  PG_RETURN_BOOL(result);
270 }
271 
272 Datum
274 {
275  text *str = PG_GETARG_TEXT_PP(0);
276  text *pat = PG_GETARG_TEXT_PP(1);
277  bool result;
278  char *s,
279  *p;
280  int slen,
281  plen;
282 
283  s = VARDATA_ANY(str);
284  slen = VARSIZE_ANY_EXHDR(str);
285  p = VARDATA_ANY(pat);
286  plen = VARSIZE_ANY_EXHDR(pat);
287 
288  result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
289 
290  PG_RETURN_BOOL(result);
291 }
292 
293 Datum
295 {
296  text *str = PG_GETARG_TEXT_PP(0);
297  text *pat = PG_GETARG_TEXT_PP(1);
298  bool result;
299  char *s,
300  *p;
301  int slen,
302  plen;
303 
304  s = VARDATA_ANY(str);
305  slen = VARSIZE_ANY_EXHDR(str);
306  p = VARDATA_ANY(pat);
307  plen = VARSIZE_ANY_EXHDR(pat);
308 
309  result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
310 
311  PG_RETURN_BOOL(result);
312 }
313 
314 Datum
316 {
317  bytea *str = PG_GETARG_BYTEA_PP(0);
318  bytea *pat = PG_GETARG_BYTEA_PP(1);
319  bool result;
320  char *s,
321  *p;
322  int slen,
323  plen;
324 
325  s = VARDATA_ANY(str);
326  slen = VARSIZE_ANY_EXHDR(str);
327  p = VARDATA_ANY(pat);
328  plen = VARSIZE_ANY_EXHDR(pat);
329 
330  result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
331 
332  PG_RETURN_BOOL(result);
333 }
334 
335 Datum
337 {
338  bytea *str = PG_GETARG_BYTEA_PP(0);
339  bytea *pat = PG_GETARG_BYTEA_PP(1);
340  bool result;
341  char *s,
342  *p;
343  int slen,
344  plen;
345 
346  s = VARDATA_ANY(str);
347  slen = VARSIZE_ANY_EXHDR(str);
348  p = VARDATA_ANY(pat);
349  plen = VARSIZE_ANY_EXHDR(pat);
350 
351  result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
352 
353  PG_RETURN_BOOL(result);
354 }
355 
356 /*
357  * Case-insensitive versions
358  */
359 
360 Datum
362 {
363  Name str = PG_GETARG_NAME(0);
364  text *pat = PG_GETARG_TEXT_PP(1);
365  bool result;
366  text *strtext;
367 
369  NameGetDatum(str)));
370  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
371 
372  PG_RETURN_BOOL(result);
373 }
374 
375 Datum
377 {
378  Name str = PG_GETARG_NAME(0);
379  text *pat = PG_GETARG_TEXT_PP(1);
380  bool result;
381  text *strtext;
382 
384  NameGetDatum(str)));
385  result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
386 
387  PG_RETURN_BOOL(result);
388 }
389 
390 Datum
392 {
393  text *str = PG_GETARG_TEXT_PP(0);
394  text *pat = PG_GETARG_TEXT_PP(1);
395  bool result;
396 
397  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
398 
399  PG_RETURN_BOOL(result);
400 }
401 
402 Datum
404 {
405  text *str = PG_GETARG_TEXT_PP(0);
406  text *pat = PG_GETARG_TEXT_PP(1);
407  bool result;
408 
409  result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
410 
411  PG_RETURN_BOOL(result);
412 }
413 
414 /*
415  * like_escape() --- given a pattern and an ESCAPE string,
416  * convert the pattern to use Postgres' standard backslash escape convention.
417  */
418 Datum
420 {
421  text *pat = PG_GETARG_TEXT_PP(0);
422  text *esc = PG_GETARG_TEXT_PP(1);
423  text *result;
424 
426  result = SB_do_like_escape(pat, esc);
427  else
428  result = MB_do_like_escape(pat, esc);
429 
430  PG_RETURN_TEXT_P(result);
431 }
432 
433 /*
434  * like_escape_bytea() --- given a pattern and an ESCAPE string,
435  * convert the pattern to use Postgres' standard backslash escape convention.
436  */
437 Datum
439 {
440  bytea *pat = PG_GETARG_BYTEA_PP(0);
441  bytea *esc = PG_GETARG_BYTEA_PP(1);
442  bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
443 
444  PG_RETURN_BYTEA_P((bytea *) result);
445 }
Datum textnlike(PG_FUNCTION_ARGS)
Definition: like.c:294
Datum namelike(PG_FUNCTION_ARGS)
Definition: like.c:231
Datum nameiclike(PG_FUNCTION_ARGS)
Definition: like.c:361
#define NameGetDatum(X)
Definition: postgres.h:603
static int wchareq(char *p1, char *p2)
Definition: like.c:57
int errhint(const char *fmt,...)
Definition: elog.c:987
Datum like_escape_bytea(PG_FUNCTION_ARGS)
Definition: like.c:438
#define VARDATA_ANY(PTR)
Definition: postgres.h:349
#define VARDATA(PTR)
Definition: postgres.h:305
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:43
#define VARSIZE(PTR)
Definition: postgres.h:306
static text * SB_do_like_escape(text *, text *)
#define PointerGetDatum(X)
Definition: postgres.h:564
#define VARHDRSZ
Definition: c.h:441
Datum bytealike(PG_FUNCTION_ARGS)
Definition: like.c:315
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
#define LIKE_TRUE
Definition: like.c:29
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int errcode(int sqlerrcode)
Definition: elog.c:575
Datum textlike(PG_FUNCTION_ARGS)
Definition: like.c:273
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:555
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:313
unsigned int Oid
Definition: postgres_ext.h:31
#define OidIsValid(objectId)
Definition: c.h:534
#define PG_GET_COLLATION()
Definition: fmgr.h:155
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:270
int pg_locale_t
Definition: pg_locale.h:71
static int SB_MatchText(char *t, int tlen, char *p, int plen, pg_locale_t locale, bool locale_is_c)
static int UTF8_MatchText(char *t, int tlen, char *p, int plen, pg_locale_t locale, bool locale_is_c)
Datum nameicnlike(PG_FUNCTION_ARGS)
Definition: like.c:376
#define ERROR
Definition: elog.h:43
static int MB_MatchText(char *t, int tlen, char *p, int plen, pg_locale_t locale, bool locale_is_c)
static int GenericMatchText(char *s, int slen, char *p, int plen)
Definition: like.c:151
Definition: c.h:489
char * c
static char SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
Definition: like.c:93
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:68
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
Datum namenlike(PG_FUNCTION_ARGS)
Definition: like.c:252
static text * MB_do_like_escape(text *, text *)
Datum texticnlike(PG_FUNCTION_ARGS)
Definition: like.c:403
Datum DirectFunctionCall1Coll(PGFunction func, Oid collation, Datum arg1)
Definition: fmgr.c:1016
#define ereport(elevel, rest)
Definition: elog.h:122
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:3054
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1260
Datum texticlike(PG_FUNCTION_ARGS)
Definition: like.c:391
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:303
uintptr_t Datum
Definition: postgres.h:374
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:314
Datum byteanlike(PG_FUNCTION_ARGS)
Definition: like.c:336
Datum like_escape(PG_FUNCTION_ARGS)
Definition: like.c:419
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:268
static int Generic_Text_IC_like(text *str, text *pat, Oid collation)
Definition: like.c:162
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
#define tolower_l
Definition: win32.h:334
#define DatumGetTextP(X)
Definition: fmgr.h:248
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:342
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define NameStr(name)
Definition: c.h:495
static char * locale
Definition: initdb.c:122
Definition: c.h:435
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
static int SB_IMatchText(char *t, int tlen, char *p, int plen, pg_locale_t locale, bool locale_is_c)
bool lc_ctype_is_c(Oid collation)
Definition: pg_locale.c:1173
#define PG_GETARG_NAME(n)
Definition: fmgr.h:234