PostgreSQL Source Code git master
like.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * like.c
4 * like expression handling code.
5 *
6 * NOTES
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9 *
10 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * IDENTIFICATION
14 * src/backend/utils/adt/like.c
15 *
16 *-------------------------------------------------------------------------
17 */
18#include "postgres.h"
19
20#include <ctype.h>
21
23#include "mb/pg_wchar.h"
24#include "miscadmin.h"
25#include "utils/fmgrprotos.h"
26#include "utils/pg_locale.h"
27#include "varatt.h"
28
29
30#define LIKE_TRUE 1
31#define LIKE_FALSE 0
32#define LIKE_ABORT (-1)
33
34
35static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
37static text *SB_do_like_escape(text *pat, text *esc);
38
39static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
41static text *MB_do_like_escape(text *pat, text *esc);
42
43static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
45
46static int C_IMatchText(const char *t, int tlen, const char *p, int plen,
48
49static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51
52/*--------------------
53 * Support routine for MatchText. Compares given multibyte streams
54 * as wide characters. If they match, returns 1 otherwise returns 0.
55 *--------------------
56 */
57static inline int
58wchareq(const char *p1, const char *p2)
59{
60 int p1_len;
61
62 /* Optimization: quickly compare the first byte. */
63 if (*p1 != *p2)
64 return 0;
65
66 p1_len = pg_mblen(p1);
67 if (pg_mblen(p2) != p1_len)
68 return 0;
69
70 /* They are the same length */
71 while (p1_len--)
72 {
73 if (*p1++ != *p2++)
74 return 0;
75 }
76 return 1;
77}
78
79/*
80 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81 * comparison of multibyte characters. It did not work at all, however,
82 * because it relied on tolower() which has a single-byte API ... and
83 * towlower() wouldn't be much better since we have no suitably cheap way
84 * of getting a single character transformed to the system's wchar_t format.
85 * So now, we just downcase the strings using lower() and apply regular LIKE
86 * comparison. This should be revisited when we install better locale support.
87 *
88 * We do handle case-insensitive matching for the C locale using
89 * fold-on-the-fly processing, however.
90 */
91
92
93#define NextByte(p, plen) ((p)++, (plen)--)
94
95/* Set up to compile like_match.c for multibyte characters */
96#define CHAREQ(p1, p2) wchareq((p1), (p2))
97#define NextChar(p, plen) \
98 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
99#define CopyAdvChar(dst, src, srclen) \
100 do { int __l = pg_mblen(src); \
101 (srclen) -= __l; \
102 while (__l-- > 0) \
103 *(dst)++ = *(src)++; \
104 } while (0)
105
106#define MatchText MB_MatchText
107#define do_like_escape MB_do_like_escape
108
109#include "like_match.c"
110
111/* Set up to compile like_match.c for single-byte characters */
112#define CHAREQ(p1, p2) (*(p1) == *(p2))
113#define NextChar(p, plen) NextByte((p), (plen))
114#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
115
116#define MatchText SB_MatchText
117#define do_like_escape SB_do_like_escape
118
119#include "like_match.c"
120
121/* setup to compile like_match.c for case-insensitive matches in C locale */
122#define MATCH_LOWER
123#define NextChar(p, plen) NextByte((p), (plen))
124#define MatchText C_IMatchText
125
126#include "like_match.c"
127
128/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
129
130#define NextChar(p, plen) \
131 do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
132#define MatchText UTF8_MatchText
133
134#include "like_match.c"
135
136/* Generic for all cases not requiring inline case-folding */
137static inline int
138GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
139{
141
142 if (!OidIsValid(collation))
143 {
144 /*
145 * This typically means that the parser could not resolve a conflict
146 * of implicit collations, so report it that way.
147 */
149 (errcode(ERRCODE_INDETERMINATE_COLLATION),
150 errmsg("could not determine which collation to use for LIKE"),
151 errhint("Use the COLLATE clause to set the collation explicitly.")));
152 }
153
155
157 return SB_MatchText(s, slen, p, plen, locale);
158 else if (GetDatabaseEncoding() == PG_UTF8)
159 return UTF8_MatchText(s, slen, p, plen, locale);
160 else
161 return MB_MatchText(s, slen, p, plen, locale);
162}
163
164static inline int
166{
167 char *s,
168 *p;
169 int slen,
170 plen;
172
173 if (!OidIsValid(collation))
174 {
175 /*
176 * This typically means that the parser could not resolve a conflict
177 * of implicit collations, so report it that way.
178 */
180 (errcode(ERRCODE_INDETERMINATE_COLLATION),
181 errmsg("could not determine which collation to use for ILIKE"),
182 errhint("Use the COLLATE clause to set the collation explicitly.")));
183 }
184
186
187 if (!locale->deterministic)
189 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
190 errmsg("nondeterministic collations are not supported for ILIKE")));
191
192 /*
193 * For efficiency reasons, in the C locale we don't call lower() on the
194 * pattern and text, but instead lowercase each character lazily.
195 *
196 * XXX: use casefolding instead?
197 */
198
199 if (locale->ctype_is_c)
200 {
201 p = VARDATA_ANY(pat);
202 plen = VARSIZE_ANY_EXHDR(pat);
203 s = VARDATA_ANY(str);
204 slen = VARSIZE_ANY_EXHDR(str);
205 return C_IMatchText(s, slen, p, plen, locale);
206 }
207 else
208 {
210 PointerGetDatum(pat)));
211 p = VARDATA_ANY(pat);
212 plen = VARSIZE_ANY_EXHDR(pat);
215 s = VARDATA_ANY(str);
216 slen = VARSIZE_ANY_EXHDR(str);
217
219 return UTF8_MatchText(s, slen, p, plen, 0);
220 else if (pg_database_encoding_max_length() > 1)
221 return MB_MatchText(s, slen, p, plen, 0);
222 else
223 return SB_MatchText(s, slen, p, plen, 0);
224 }
225}
226
227/*
228 * interface routines called by the function manager
229 */
230
231Datum
233{
235 text *pat = PG_GETARG_TEXT_PP(1);
236 bool result;
237 char *s,
238 *p;
239 int slen,
240 plen;
241
242 s = NameStr(*str);
243 slen = strlen(s);
244 p = VARDATA_ANY(pat);
245 plen = VARSIZE_ANY_EXHDR(pat);
246
247 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
248
249 PG_RETURN_BOOL(result);
250}
251
252Datum
254{
256 text *pat = PG_GETARG_TEXT_PP(1);
257 bool result;
258 char *s,
259 *p;
260 int slen,
261 plen;
262
263 s = NameStr(*str);
264 slen = strlen(s);
265 p = VARDATA_ANY(pat);
266 plen = VARSIZE_ANY_EXHDR(pat);
267
268 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
269
270 PG_RETURN_BOOL(result);
271}
272
273Datum
275{
277 text *pat = PG_GETARG_TEXT_PP(1);
278 bool result;
279 char *s,
280 *p;
281 int slen,
282 plen;
283
284 s = VARDATA_ANY(str);
285 slen = VARSIZE_ANY_EXHDR(str);
286 p = VARDATA_ANY(pat);
287 plen = VARSIZE_ANY_EXHDR(pat);
288
289 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
290
291 PG_RETURN_BOOL(result);
292}
293
294Datum
296{
298 text *pat = PG_GETARG_TEXT_PP(1);
299 bool result;
300 char *s,
301 *p;
302 int slen,
303 plen;
304
305 s = VARDATA_ANY(str);
306 slen = VARSIZE_ANY_EXHDR(str);
307 p = VARDATA_ANY(pat);
308 plen = VARSIZE_ANY_EXHDR(pat);
309
310 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
311
312 PG_RETURN_BOOL(result);
313}
314
315Datum
317{
319 bytea *pat = PG_GETARG_BYTEA_PP(1);
320 bool result;
321 char *s,
322 *p;
323 int slen,
324 plen;
325
326 s = VARDATA_ANY(str);
327 slen = VARSIZE_ANY_EXHDR(str);
328 p = VARDATA_ANY(pat);
329 plen = VARSIZE_ANY_EXHDR(pat);
330
331 result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
332
333 PG_RETURN_BOOL(result);
334}
335
336Datum
338{
340 bytea *pat = PG_GETARG_BYTEA_PP(1);
341 bool result;
342 char *s,
343 *p;
344 int slen,
345 plen;
346
347 s = VARDATA_ANY(str);
348 slen = VARSIZE_ANY_EXHDR(str);
349 p = VARDATA_ANY(pat);
350 plen = VARSIZE_ANY_EXHDR(pat);
351
352 result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
353
354 PG_RETURN_BOOL(result);
355}
356
357/*
358 * Case-insensitive versions
359 */
360
361Datum
363{
365 text *pat = PG_GETARG_TEXT_PP(1);
366 bool result;
367 text *strtext;
368
370 NameGetDatum(str)));
371 result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
372
373 PG_RETURN_BOOL(result);
374}
375
376Datum
378{
380 text *pat = PG_GETARG_TEXT_PP(1);
381 bool result;
382 text *strtext;
383
385 NameGetDatum(str)));
386 result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
387
388 PG_RETURN_BOOL(result);
389}
390
391Datum
393{
395 text *pat = PG_GETARG_TEXT_PP(1);
396 bool result;
397
398 result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
399
400 PG_RETURN_BOOL(result);
401}
402
403Datum
405{
407 text *pat = PG_GETARG_TEXT_PP(1);
408 bool result;
409
410 result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
411
412 PG_RETURN_BOOL(result);
413}
414
415/*
416 * like_escape() --- given a pattern and an ESCAPE string,
417 * convert the pattern to use Postgres' standard backslash escape convention.
418 */
419Datum
421{
422 text *pat = PG_GETARG_TEXT_PP(0);
423 text *esc = PG_GETARG_TEXT_PP(1);
424 text *result;
425
427 result = SB_do_like_escape(pat, esc);
428 else
429 result = MB_do_like_escape(pat, esc);
430
431 PG_RETURN_TEXT_P(result);
432}
433
434/*
435 * like_escape_bytea() --- given a pattern and an ESCAPE string,
436 * convert the pattern to use Postgres' standard backslash escape convention.
437 */
438Datum
440{
441 bytea *pat = PG_GETARG_BYTEA_PP(0);
442 bytea *esc = PG_GETARG_BYTEA_PP(1);
443 bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
444
445 PG_RETURN_BYTEA_P((bytea *) result);
446}
#define NameStr(name)
Definition: c.h:765
#define OidIsValid(objectId)
Definition: c.h:788
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:150
Datum DirectFunctionCall1Coll(PGFunction func, Oid collation, Datum arg1)
Definition: fmgr.c:793
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:682
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
const char * str
static char * locale
Definition: initdb.c:140
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
static int MB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
static text * MB_do_like_escape(text *pat, text *esc)
#define LIKE_TRUE
Definition: like.c:30
Datum texticnlike(PG_FUNCTION_ARGS)
Definition: like.c:404
Datum textlike(PG_FUNCTION_ARGS)
Definition: like.c:274
Datum namelike(PG_FUNCTION_ARGS)
Definition: like.c:232
Datum nameiclike(PG_FUNCTION_ARGS)
Definition: like.c:362
Datum byteanlike(PG_FUNCTION_ARGS)
Definition: like.c:337
static int C_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
Datum like_escape(PG_FUNCTION_ARGS)
Definition: like.c:420
static int wchareq(const char *p1, const char *p2)
Definition: like.c:58
Datum namenlike(PG_FUNCTION_ARGS)
Definition: like.c:253
Datum textnlike(PG_FUNCTION_ARGS)
Definition: like.c:295
Datum nameicnlike(PG_FUNCTION_ARGS)
Definition: like.c:377
Datum like_escape_bytea(PG_FUNCTION_ARGS)
Definition: like.c:439
static text * SB_do_like_escape(text *pat, text *esc)
Datum texticlike(PG_FUNCTION_ARGS)
Definition: like.c:392
static int SB_MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
static int Generic_Text_IC_like(text *str, text *pat, Oid collation)
Definition: like.c:165
Datum bytealike(PG_FUNCTION_ARGS)
Definition: like.c:316
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
Definition: like.c:138
int GetDatabaseEncoding(void)
Definition: mbutils.c:1264
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1549
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1026
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1186
@ PG_UTF8
Definition: pg_wchar.h:232
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
static Datum NameGetDatum(const NameData *X)
Definition: postgres.h:383
uint64_t Datum
Definition: postgres.h:70
unsigned int Oid
Definition: postgres_ext.h:32
Definition: c.h:760
Definition: c.h:706
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486
Datum name_text(PG_FUNCTION_ARGS)
Definition: varlena.c:2680