PostgreSQL Source Code git master
like_support.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * like_support.c
4 * Planner support functions for LIKE, regex, and related operators.
5 *
6 * These routines handle special optimization of operators that can be
7 * used with index scans even though they are not known to the executor's
8 * indexscan machinery. The key idea is that these operators allow us
9 * to derive approximate indexscan qual clauses, such that any tuples
10 * that pass the operator clause itself must also satisfy the simpler
11 * indexscan condition(s). Then we can use the indexscan machinery
12 * to avoid scanning as much of the table as we'd otherwise have to,
13 * while applying the original operator as a qpqual condition to ensure
14 * we deliver only the tuples we want. (In essence, we're using a regular
15 * index as if it were a lossy index.)
16 *
17 * An example of what we're doing is
18 * textfield LIKE 'abc%def'
19 * from which we can generate the indexscanable conditions
20 * textfield >= 'abc' AND textfield < 'abd'
21 * which allow efficient scanning of an index on textfield.
22 * (In reality, character set and collation issues make the transformation
23 * from LIKE to indexscan limits rather harder than one might think ...
24 * but that's the basic idea.)
25 *
26 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
28 *
29 *
30 * IDENTIFICATION
31 * src/backend/utils/adt/like_support.c
32 *
33 *-------------------------------------------------------------------------
34 */
35#include "postgres.h"
36
37#include <math.h>
38
39#include "access/htup_details.h"
41#include "catalog/pg_operator.h"
42#include "catalog/pg_opfamily.h"
44#include "catalog/pg_type.h"
45#include "mb/pg_wchar.h"
46#include "miscadmin.h"
47#include "nodes/makefuncs.h"
48#include "nodes/nodeFuncs.h"
49#include "nodes/supportnodes.h"
50#include "utils/builtins.h"
51#include "utils/datum.h"
52#include "utils/lsyscache.h"
53#include "utils/pg_locale.h"
54#include "utils/selfuncs.h"
55#include "utils/varlena.h"
56
57
58typedef enum
59{
66
67typedef enum
68{
71
72static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
73static List *match_pattern_prefix(Node *leftop,
74 Node *rightop,
75 Pattern_Type ptype,
76 Oid expr_coll,
77 Oid opfamily,
78 Oid indexcollation);
80 Oid oprid,
81 Oid opfuncid,
82 List *args,
83 int varRelid,
84 Oid collation,
85 Pattern_Type ptype,
86 bool negate);
88 Pattern_Type ptype,
89 Oid collation,
90 Const **prefix,
91 Selectivity *rest_selec);
93 VariableStatData *vardata,
94 Oid eqopr, Oid ltopr, Oid geopr,
95 Oid collation,
96 Const *prefixcon);
97static Selectivity like_selectivity(const char *patt, int pattlen,
98 bool case_insensitive);
99static Selectivity regex_selectivity(const char *patt, int pattlen,
100 bool case_insensitive,
101 int fixed_prefix_len);
102static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
103 Oid collation);
104static Datum string_to_datum(const char *str, Oid datatype);
105static Const *string_to_const(const char *str, Oid datatype);
106static Const *string_to_bytea_const(const char *str, size_t str_len);
107
108
109/*
110 * Planner support functions for LIKE, regex, and related operators
111 */
112Datum
114{
115 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
116
118}
119
120Datum
122{
123 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
124
126}
127
128Datum
130{
131 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
132
134}
135
136Datum
138{
139 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
140
142}
143
144Datum
146{
147 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
148
150}
151
152/* Common code for the above */
153static Node *
155{
156 Node *ret = NULL;
157
158 if (IsA(rawreq, SupportRequestSelectivity))
159 {
160 /*
161 * Make a selectivity estimate for a function call, just as we'd do if
162 * the call was via the corresponding operator.
163 */
166
167 if (req->is_join)
168 {
169 /*
170 * For the moment we just punt. If patternjoinsel is ever
171 * improved to do better, this should be made to call it.
172 */
174 }
175 else
176 {
177 /* Share code with operator restriction selectivity functions */
180 req->funcid,
181 req->args,
182 req->varRelid,
183 req->inputcollid,
184 ptype,
185 false);
186 }
187 req->selectivity = s1;
188 ret = (Node *) req;
189 }
190 else if (IsA(rawreq, SupportRequestIndexCondition))
191 {
192 /* Try to convert operator/function call to index conditions */
194
195 /*
196 * Currently we have no "reverse" match operators with the pattern on
197 * the left, so we only need consider cases with the indexkey on the
198 * left.
199 */
200 if (req->indexarg != 0)
201 return NULL;
202
203 if (is_opclause(req->node))
204 {
205 OpExpr *clause = (OpExpr *) req->node;
206
207 Assert(list_length(clause->args) == 2);
208 ret = (Node *)
210 (Node *) lsecond(clause->args),
211 ptype,
212 clause->inputcollid,
213 req->opfamily,
214 req->indexcollation);
215 }
216 else if (is_funcclause(req->node)) /* be paranoid */
217 {
218 FuncExpr *clause = (FuncExpr *) req->node;
219
220 Assert(list_length(clause->args) == 2);
221 ret = (Node *)
223 (Node *) lsecond(clause->args),
224 ptype,
225 clause->inputcollid,
226 req->opfamily,
227 req->indexcollation);
228 }
229 }
230
231 return ret;
232}
233
234/*
235 * match_pattern_prefix
236 * Try to generate an indexqual for a LIKE or regex operator.
237 */
238static List *
240 Node *rightop,
241 Pattern_Type ptype,
242 Oid expr_coll,
243 Oid opfamily,
244 Oid indexcollation)
245{
246 List *result;
247 Const *patt;
248 Const *prefix;
249 Pattern_Prefix_Status pstatus;
250 Oid ldatatype;
251 Oid rdatatype;
252 Oid eqopr;
253 Oid ltopr;
254 Oid geopr;
255 Oid preopr = InvalidOid;
256 bool collation_aware;
257 Expr *expr;
258 FmgrInfo ltproc;
259 Const *greaterstr;
260
261 /*
262 * Can't do anything with a non-constant or NULL pattern argument.
263 *
264 * Note that since we restrict ourselves to cases with a hard constant on
265 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
266 * about verifying that.
267 */
268 if (!IsA(rightop, Const) ||
269 ((Const *) rightop)->constisnull)
270 return NIL;
271 patt = (Const *) rightop;
272
273 /*
274 * Try to extract a fixed prefix from the pattern.
275 */
276 pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
277 &prefix, NULL);
278
279 /* fail if no fixed prefix */
280 if (pstatus == Pattern_Prefix_None)
281 return NIL;
282
283 /*
284 * Identify the operators we want to use, based on the type of the
285 * left-hand argument. Usually these are just the type's regular
286 * comparison operators, but if we are considering one of the semi-legacy
287 * "pattern" opclasses, use the "pattern" operators instead. Those are
288 * not collation-sensitive but always use C collation, as we want. The
289 * selected operators also determine the needed type of the prefix
290 * constant.
291 */
292 ldatatype = exprType(leftop);
293 switch (ldatatype)
294 {
295 case TEXTOID:
296 if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
297 {
298 eqopr = TextEqualOperator;
299 ltopr = TextPatternLessOperator;
300 geopr = TextPatternGreaterEqualOperator;
301 collation_aware = false;
302 }
303 else if (opfamily == TEXT_SPGIST_FAM_OID)
304 {
305 eqopr = TextEqualOperator;
306 ltopr = TextPatternLessOperator;
307 geopr = TextPatternGreaterEqualOperator;
308 /* This opfamily has direct support for prefixing */
309 preopr = TextPrefixOperator;
310 collation_aware = false;
311 }
312 else
313 {
314 eqopr = TextEqualOperator;
315 ltopr = TextLessOperator;
316 geopr = TextGreaterEqualOperator;
317 collation_aware = true;
318 }
319 rdatatype = TEXTOID;
320 break;
321 case NAMEOID:
322
323 /*
324 * Note that here, we need the RHS type to be text, so that the
325 * comparison value isn't improperly truncated to NAMEDATALEN.
326 */
327 eqopr = NameEqualTextOperator;
328 ltopr = NameLessTextOperator;
329 geopr = NameGreaterEqualTextOperator;
330 collation_aware = true;
331 rdatatype = TEXTOID;
332 break;
333 case BPCHAROID:
334 if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
335 {
336 eqopr = BpcharEqualOperator;
337 ltopr = BpcharPatternLessOperator;
338 geopr = BpcharPatternGreaterEqualOperator;
339 collation_aware = false;
340 }
341 else
342 {
343 eqopr = BpcharEqualOperator;
344 ltopr = BpcharLessOperator;
345 geopr = BpcharGreaterEqualOperator;
346 collation_aware = true;
347 }
348 rdatatype = BPCHAROID;
349 break;
350 case BYTEAOID:
351 eqopr = ByteaEqualOperator;
352 ltopr = ByteaLessOperator;
353 geopr = ByteaGreaterEqualOperator;
354 collation_aware = false;
355 rdatatype = BYTEAOID;
356 break;
357 default:
358 /* Can't get here unless we're attached to the wrong operator */
359 return NIL;
360 }
361
362 /*
363 * If necessary, coerce the prefix constant to the right type. The given
364 * prefix constant is either text or bytea type, therefore the only case
365 * where we need to do anything is when converting text to bpchar. Those
366 * two types are binary-compatible, so relabeling the Const node is
367 * sufficient.
368 */
369 if (prefix->consttype != rdatatype)
370 {
371 Assert(prefix->consttype == TEXTOID &&
372 rdatatype == BPCHAROID);
373 prefix->consttype = rdatatype;
374 }
375
376 /*
377 * If we found an exact-match pattern, generate an "=" indexqual.
378 *
379 * Here and below, check to see whether the desired operator is actually
380 * supported by the index opclass, and fail quietly if not. This allows
381 * us to not be concerned with specific opclasses (except for the legacy
382 * "pattern" cases); any index that correctly implements the operators
383 * will work.
384 */
385 if (pstatus == Pattern_Prefix_Exact)
386 {
387 if (!op_in_opfamily(eqopr, opfamily))
388 return NIL;
389 if (indexcollation != expr_coll)
390 return NIL;
391 expr = make_opclause(eqopr, BOOLOID, false,
392 (Expr *) leftop, (Expr *) prefix,
393 InvalidOid, indexcollation);
394 result = list_make1(expr);
395 return result;
396 }
397
398 /*
399 * Anything other than Pattern_Prefix_Exact is not supported if the
400 * expression collation is nondeterministic. The optimized equality or
401 * prefix tests use bytewise comparisons, which is not consistent with
402 * nondeterministic collations.
403 *
404 * expr_coll is not set for a non-collation-aware data type such as bytea.
405 */
406 if (expr_coll && !get_collation_isdeterministic(expr_coll))
407 return NIL;
408
409 /*
410 * Otherwise, we have a nonempty required prefix of the values. Some
411 * opclasses support prefix checks directly, otherwise we'll try to
412 * generate a range constraint.
413 */
414 if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
415 {
416 expr = make_opclause(preopr, BOOLOID, false,
417 (Expr *) leftop, (Expr *) prefix,
418 InvalidOid, indexcollation);
419 result = list_make1(expr);
420 return result;
421 }
422
423 /*
424 * Since we need a range constraint, it's only going to work reliably if
425 * the index is collation-insensitive or has "C" collation. Note that
426 * here we are looking at the index's collation, not the expression's
427 * collation -- this test is *not* dependent on the LIKE/regex operator's
428 * collation.
429 */
430 if (collation_aware &&
431 !pg_newlocale_from_collation(indexcollation)->collate_is_c)
432 return NIL;
433
434 /*
435 * We can always say "x >= prefix".
436 */
437 if (!op_in_opfamily(geopr, opfamily))
438 return NIL;
439 expr = make_opclause(geopr, BOOLOID, false,
440 (Expr *) leftop, (Expr *) prefix,
441 InvalidOid, indexcollation);
442 result = list_make1(expr);
443
444 /*-------
445 * If we can create a string larger than the prefix, we can say
446 * "x < greaterstr". NB: we rely on make_greater_string() to generate
447 * a guaranteed-greater string, not just a probably-greater string.
448 * In general this is only guaranteed in C locale, so we'd better be
449 * using a C-locale index collation.
450 *-------
451 */
452 if (!op_in_opfamily(ltopr, opfamily))
453 return result;
454 fmgr_info(get_opcode(ltopr), &ltproc);
455 greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
456 if (greaterstr)
457 {
458 expr = make_opclause(ltopr, BOOLOID, false,
459 (Expr *) leftop, (Expr *) greaterstr,
460 InvalidOid, indexcollation);
461 result = lappend(result, expr);
462 }
463
464 return result;
465}
466
467
468/*
469 * patternsel_common - generic code for pattern-match restriction selectivity.
470 *
471 * To support using this from either the operator or function paths, caller
472 * may pass either operator OID or underlying function OID; we look up the
473 * latter from the former if needed. (We could just have patternsel() call
474 * get_opcode(), but the work would be wasted if we don't have a need to
475 * compare a fixed prefix to the pg_statistic data.)
476 *
477 * Note that oprid and/or opfuncid should be for the positive-match operator
478 * even when negate is true.
479 */
480static double
482 Oid oprid,
483 Oid opfuncid,
484 List *args,
485 int varRelid,
486 Oid collation,
487 Pattern_Type ptype,
488 bool negate)
489{
490 VariableStatData vardata;
491 Node *other;
492 bool varonleft;
493 Datum constval;
494 Oid consttype;
495 Oid vartype;
496 Oid rdatatype;
497 Oid eqopr;
498 Oid ltopr;
499 Oid geopr;
500 Pattern_Prefix_Status pstatus;
501 Const *patt;
502 Const *prefix = NULL;
503 Selectivity rest_selec = 0;
504 double nullfrac = 0.0;
505 double result;
506
507 /*
508 * Initialize result to the appropriate default estimate depending on
509 * whether it's a match or not-match operator.
510 */
511 if (negate)
512 result = 1.0 - DEFAULT_MATCH_SEL;
513 else
514 result = DEFAULT_MATCH_SEL;
515
516 /*
517 * If expression is not variable op constant, then punt and return the
518 * default estimate.
519 */
520 if (!get_restriction_variable(root, args, varRelid,
521 &vardata, &other, &varonleft))
522 return result;
523 if (!varonleft || !IsA(other, Const))
524 {
525 ReleaseVariableStats(vardata);
526 return result;
527 }
528
529 /*
530 * If the constant is NULL, assume operator is strict and return zero, ie,
531 * operator will never return TRUE. (It's zero even for a negator op.)
532 */
533 if (((Const *) other)->constisnull)
534 {
535 ReleaseVariableStats(vardata);
536 return 0.0;
537 }
538 constval = ((Const *) other)->constvalue;
539 consttype = ((Const *) other)->consttype;
540
541 /*
542 * The right-hand const is type text or bytea for all supported operators.
543 * We do not expect to see binary-compatible types here, since
544 * const-folding should have relabeled the const to exactly match the
545 * operator's declared type.
546 */
547 if (consttype != TEXTOID && consttype != BYTEAOID)
548 {
549 ReleaseVariableStats(vardata);
550 return result;
551 }
552
553 /*
554 * Similarly, the exposed type of the left-hand side should be one of
555 * those we know. (Do not look at vardata.atttype, which might be
556 * something binary-compatible but different.) We can use it to identify
557 * the comparison operators and the required type of the comparison
558 * constant, much as in match_pattern_prefix().
559 */
560 vartype = vardata.vartype;
561
562 switch (vartype)
563 {
564 case TEXTOID:
565 eqopr = TextEqualOperator;
566 ltopr = TextLessOperator;
567 geopr = TextGreaterEqualOperator;
568 rdatatype = TEXTOID;
569 break;
570 case NAMEOID:
571
572 /*
573 * Note that here, we need the RHS type to be text, so that the
574 * comparison value isn't improperly truncated to NAMEDATALEN.
575 */
576 eqopr = NameEqualTextOperator;
577 ltopr = NameLessTextOperator;
578 geopr = NameGreaterEqualTextOperator;
579 rdatatype = TEXTOID;
580 break;
581 case BPCHAROID:
582 eqopr = BpcharEqualOperator;
583 ltopr = BpcharLessOperator;
584 geopr = BpcharGreaterEqualOperator;
585 rdatatype = BPCHAROID;
586 break;
587 case BYTEAOID:
588 eqopr = ByteaEqualOperator;
589 ltopr = ByteaLessOperator;
590 geopr = ByteaGreaterEqualOperator;
591 rdatatype = BYTEAOID;
592 break;
593 default:
594 /* Can't get here unless we're attached to the wrong operator */
595 ReleaseVariableStats(vardata);
596 return result;
597 }
598
599 /*
600 * Grab the nullfrac for use below.
601 */
602 if (HeapTupleIsValid(vardata.statsTuple))
603 {
604 Form_pg_statistic stats;
605
606 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
607 nullfrac = stats->stanullfrac;
608 }
609
610 /*
611 * Pull out any fixed prefix implied by the pattern, and estimate the
612 * fractional selectivity of the remainder of the pattern. Unlike many
613 * other selectivity estimators, we use the pattern operator's actual
614 * collation for this step. This is not because we expect the collation
615 * to make a big difference in the selectivity estimate (it seldom would),
616 * but because we want to be sure we cache compiled regexps under the
617 * right cache key, so that they can be re-used at runtime.
618 */
619 patt = (Const *) other;
620 pstatus = pattern_fixed_prefix(patt, ptype, collation,
621 &prefix, &rest_selec);
622
623 /*
624 * If necessary, coerce the prefix constant to the right type. The only
625 * case where we need to do anything is when converting text to bpchar.
626 * Those two types are binary-compatible, so relabeling the Const node is
627 * sufficient.
628 */
629 if (prefix && prefix->consttype != rdatatype)
630 {
631 Assert(prefix->consttype == TEXTOID &&
632 rdatatype == BPCHAROID);
633 prefix->consttype = rdatatype;
634 }
635
636 if (pstatus == Pattern_Prefix_Exact)
637 {
638 /*
639 * Pattern specifies an exact match, so estimate as for '='
640 */
641 result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
642 false, true, false);
643 }
644 else
645 {
646 /*
647 * Not exact-match pattern. If we have a sufficiently large
648 * histogram, estimate selectivity for the histogram part of the
649 * population by counting matches in the histogram. If not, estimate
650 * selectivity of the fixed prefix and remainder of pattern
651 * separately, then combine the two to get an estimate of the
652 * selectivity for the part of the column population represented by
653 * the histogram. (For small histograms, we combine these
654 * approaches.)
655 *
656 * We then add up data for any most-common-values values; these are
657 * not in the histogram population, and we can get exact answers for
658 * them by applying the pattern operator, so there's no reason to
659 * approximate. (If the MCVs cover a significant part of the total
660 * population, this gives us a big leg up in accuracy.)
661 */
662 Selectivity selec;
663 int hist_size;
664 FmgrInfo opproc;
665 double mcv_selec,
666 sumcommon;
667
668 /* Try to use the histogram entries to get selectivity */
669 if (!OidIsValid(opfuncid))
670 opfuncid = get_opcode(oprid);
671 fmgr_info(opfuncid, &opproc);
672
673 selec = histogram_selectivity(&vardata, &opproc, collation,
674 constval, true,
675 10, 1, &hist_size);
676
677 /* If not at least 100 entries, use the heuristic method */
678 if (hist_size < 100)
679 {
680 Selectivity heursel;
682
683 if (pstatus == Pattern_Prefix_Partial)
685 eqopr, ltopr, geopr,
686 collation,
687 prefix);
688 else
689 prefixsel = 1.0;
690 heursel = prefixsel * rest_selec;
691
692 if (selec < 0) /* fewer than 10 histogram entries? */
693 selec = heursel;
694 else
695 {
696 /*
697 * For histogram sizes from 10 to 100, we combine the
698 * histogram and heuristic selectivities, putting increasingly
699 * more trust in the histogram for larger sizes.
700 */
701 double hist_weight = hist_size / 100.0;
702
703 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
704 }
705 }
706
707 /* In any case, don't believe extremely small or large estimates. */
708 if (selec < 0.0001)
709 selec = 0.0001;
710 else if (selec > 0.9999)
711 selec = 0.9999;
712
713 /*
714 * If we have most-common-values info, add up the fractions of the MCV
715 * entries that satisfy MCV OP PATTERN. These fractions contribute
716 * directly to the result selectivity. Also add up the total fraction
717 * represented by MCV entries.
718 */
719 mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
720 constval, true,
721 &sumcommon);
722
723 /*
724 * Now merge the results from the MCV and histogram calculations,
725 * realizing that the histogram covers only the non-null values that
726 * are not listed in MCV.
727 */
728 selec *= 1.0 - nullfrac - sumcommon;
729 selec += mcv_selec;
730 result = selec;
731 }
732
733 /* now adjust if we wanted not-match rather than match */
734 if (negate)
735 result = 1.0 - result - nullfrac;
736
737 /* result should be in range, but make sure... */
738 CLAMP_PROBABILITY(result);
739
740 if (prefix)
741 {
742 pfree(DatumGetPointer(prefix->constvalue));
743 pfree(prefix);
744 }
745
746 ReleaseVariableStats(vardata);
747
748 return result;
749}
750
751/*
752 * Fix impedance mismatch between SQL-callable functions and patternsel_common
753 */
754static double
756{
758 Oid operator = PG_GETARG_OID(1);
760 int varRelid = PG_GETARG_INT32(3);
761 Oid collation = PG_GET_COLLATION();
762
763 /*
764 * If this is for a NOT LIKE or similar operator, get the corresponding
765 * positive-match operator and work with that.
766 */
767 if (negate)
768 {
769 operator = get_negator(operator);
770 if (!OidIsValid(operator))
771 elog(ERROR, "patternsel called for operator without a negator");
772 }
773
774 return patternsel_common(root,
775 operator,
777 args,
778 varRelid,
779 collation,
780 ptype,
781 negate);
782}
783
784/*
785 * regexeqsel - Selectivity of regular-expression pattern match.
786 */
787Datum
789{
791}
792
793/*
794 * icregexeqsel - Selectivity of case-insensitive regex match.
795 */
796Datum
798{
800}
801
802/*
803 * likesel - Selectivity of LIKE pattern match.
804 */
805Datum
807{
809}
810
811/*
812 * prefixsel - selectivity of prefix operator
813 */
814Datum
816{
818}
819
820/*
821 *
822 * iclikesel - Selectivity of ILIKE pattern match.
823 */
824Datum
826{
828}
829
830/*
831 * regexnesel - Selectivity of regular-expression pattern non-match.
832 */
833Datum
835{
837}
838
839/*
840 * icregexnesel - Selectivity of case-insensitive regex non-match.
841 */
842Datum
844{
846}
847
848/*
849 * nlikesel - Selectivity of LIKE pattern non-match.
850 */
851Datum
853{
855}
856
857/*
858 * icnlikesel - Selectivity of ILIKE pattern non-match.
859 */
860Datum
862{
864}
865
866/*
867 * patternjoinsel - Generic code for pattern-match join selectivity.
868 */
869static double
871{
872 /* For the moment we just punt. */
873 return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
874}
875
876/*
877 * regexeqjoinsel - Join selectivity of regular-expression pattern match.
878 */
879Datum
881{
883}
884
885/*
886 * icregexeqjoinsel - Join selectivity of case-insensitive regex match.
887 */
888Datum
890{
892}
893
894/*
895 * likejoinsel - Join selectivity of LIKE pattern match.
896 */
897Datum
899{
901}
902
903/*
904 * prefixjoinsel - Join selectivity of prefix operator
905 */
906Datum
908{
910}
911
912/*
913 * iclikejoinsel - Join selectivity of ILIKE pattern match.
914 */
915Datum
917{
919}
920
921/*
922 * regexnejoinsel - Join selectivity of regex non-match.
923 */
924Datum
926{
928}
929
930/*
931 * icregexnejoinsel - Join selectivity of case-insensitive regex non-match.
932 */
933Datum
935{
937}
938
939/*
940 * nlikejoinsel - Join selectivity of LIKE pattern non-match.
941 */
942Datum
944{
946}
947
948/*
949 * icnlikejoinsel - Join selectivity of ILIKE pattern non-match.
950 */
951Datum
953{
955}
956
957
958/*-------------------------------------------------------------------------
959 *
960 * Pattern analysis functions
961 *
962 * These routines support analysis of LIKE and regular-expression patterns
963 * by the planner/optimizer. It's important that they agree with the
964 * regular-expression code in backend/regex/ and the LIKE code in
965 * backend/utils/adt/like.c. Also, the computation of the fixed prefix
966 * must be conservative: if we report a string longer than the true fixed
967 * prefix, the query may produce actually wrong answers, rather than just
968 * getting a bad selectivity estimate!
969 *
970 *-------------------------------------------------------------------------
971 */
972
973/*
974 * Extract the fixed prefix, if any, for a pattern.
975 *
976 * *prefix is set to a palloc'd prefix string (in the form of a Const node),
977 * or to NULL if no fixed prefix exists for the pattern.
978 * If rest_selec is not NULL, *rest_selec is set to an estimate of the
979 * selectivity of the remainder of the pattern (without any fixed prefix).
980 * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
981 *
982 * The return value distinguishes no fixed prefix, a partial prefix,
983 * or an exact-match-only pattern.
984 */
985
987like_fixed_prefix(Const *patt_const, Const **prefix_const,
988 Selectivity *rest_selec)
989{
990 char *match;
991 char *patt;
992 int pattlen;
993 Oid typeid = patt_const->consttype;
994 int pos,
995 match_pos;
996
997 /* the right-hand const is type text or bytea */
998 Assert(typeid == BYTEAOID || typeid == TEXTOID);
999
1000 if (typeid != BYTEAOID)
1001 {
1002 patt = TextDatumGetCString(patt_const->constvalue);
1003 pattlen = strlen(patt);
1004 }
1005 else
1006 {
1007 bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
1008
1009 pattlen = VARSIZE_ANY_EXHDR(bstr);
1010 patt = (char *) palloc(pattlen);
1011 memcpy(patt, VARDATA_ANY(bstr), pattlen);
1012 Assert(bstr == DatumGetPointer(patt_const->constvalue));
1013 }
1014
1015 match = palloc(pattlen + 1);
1016 match_pos = 0;
1017 for (pos = 0; pos < pattlen; pos++)
1018 {
1019 /* % and _ are wildcard characters in LIKE */
1020 if (patt[pos] == '%' ||
1021 patt[pos] == '_')
1022 break;
1023
1024 /* Backslash escapes the next character */
1025 if (patt[pos] == '\\')
1026 {
1027 pos++;
1028 if (pos >= pattlen)
1029 break;
1030 }
1031
1032 match[match_pos++] = patt[pos];
1033 }
1034
1035 match[match_pos] = '\0';
1036
1037 if (typeid != BYTEAOID)
1038 *prefix_const = string_to_const(match, typeid);
1039 else
1040 *prefix_const = string_to_bytea_const(match, match_pos);
1041
1042 if (rest_selec != NULL)
1043 *rest_selec = like_selectivity(&patt[pos], pattlen - pos, false);
1044
1045 pfree(patt);
1046 pfree(match);
1047
1048 /* in LIKE, an empty pattern is an exact match! */
1049 if (pos == pattlen)
1050 return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1051
1052 if (match_pos > 0)
1054
1055 return Pattern_Prefix_None;
1056}
1057
1058/*
1059 * Case-insensitive variant of like_fixed_prefix(). Multibyte and
1060 * locale-aware for detecting cased characters.
1061 */
1063like_fixed_prefix_ci(Const *patt_const, Oid collation, Const **prefix_const,
1064 Selectivity *rest_selec)
1065{
1066 text *val = DatumGetTextPP(patt_const->constvalue);
1067 Oid typeid = patt_const->consttype;
1068 int nbytes = VARSIZE_ANY_EXHDR(val);
1069 int wpos;
1070 pg_wchar *wpatt;
1071 int wpattlen;
1072 pg_wchar *wmatch;
1073 int wmatch_pos = 0;
1074 char *match;
1075 int match_mblen;
1076 pg_locale_t locale = 0;
1077
1078 /* the right-hand const is type text or bytea */
1079 Assert(typeid == BYTEAOID || typeid == TEXTOID);
1080
1081 if (typeid == BYTEAOID)
1082 ereport(ERROR,
1083 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1084 errmsg("case insensitive matching not supported on type bytea")));
1085
1086 if (!OidIsValid(collation))
1087 {
1088 /*
1089 * This typically means that the parser could not resolve a conflict
1090 * of implicit collations, so report it that way.
1091 */
1092 ereport(ERROR,
1093 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1094 errmsg("could not determine which collation to use for ILIKE"),
1095 errhint("Use the COLLATE clause to set the collation explicitly.")));
1096 }
1097
1099
1100 wpatt = palloc((nbytes + 1) * sizeof(pg_wchar));
1101 wpattlen = pg_mb2wchar_with_len(VARDATA_ANY(val), wpatt, nbytes);
1102
1103 wmatch = palloc((nbytes + 1) * sizeof(pg_wchar));
1104 for (wpos = 0; wpos < wpattlen; wpos++)
1105 {
1106 /* % and _ are wildcard characters in LIKE */
1107 if (wpatt[wpos] == '%' ||
1108 wpatt[wpos] == '_')
1109 break;
1110
1111 /* Backslash escapes the next character */
1112 if (wpatt[wpos] == '\\')
1113 {
1114 wpos++;
1115 if (wpos >= wpattlen)
1116 break;
1117 }
1118
1119 /*
1120 * For ILIKE, stop if it's a case-varying character (it's sort of a
1121 * wildcard).
1122 */
1123 if (pg_iswcased(wpatt[wpos], locale))
1124 break;
1125
1126 wmatch[wmatch_pos++] = wpatt[wpos];
1127 }
1128
1129 wmatch[wmatch_pos] = '\0';
1130
1131 match = palloc(pg_database_encoding_max_length() * wmatch_pos + 1);
1132 match_mblen = pg_wchar2mb_with_len(wmatch, match, wmatch_pos);
1133 match[match_mblen] = '\0';
1134 pfree(wmatch);
1135
1136 *prefix_const = string_to_const(match, TEXTOID);
1137 pfree(match);
1138
1139 if (rest_selec != NULL)
1140 {
1141 int wrestlen = wpattlen - wmatch_pos;
1142 char *rest;
1143 int rest_mblen;
1144
1145 rest = palloc(pg_database_encoding_max_length() * wrestlen + 1);
1146 rest_mblen = pg_wchar2mb_with_len(&wpatt[wmatch_pos], rest, wrestlen);
1147
1148 *rest_selec = like_selectivity(rest, rest_mblen, true);
1149 pfree(rest);
1150 }
1151
1152 pfree(wpatt);
1153
1154 /* in LIKE, an empty pattern is an exact match! */
1155 if (wpos == wpattlen)
1156 return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1157
1158 if (wmatch_pos > 0)
1160
1161 return Pattern_Prefix_None;
1162}
1163
1165regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
1166 Const **prefix_const, Selectivity *rest_selec)
1167{
1168 Oid typeid = patt_const->consttype;
1169 char *prefix;
1170 bool exact;
1171
1172 /*
1173 * Should be unnecessary, there are no bytea regex operators defined. As
1174 * such, it should be noted that the rest of this function has *not* been
1175 * made safe for binary (possibly NULL containing) strings.
1176 */
1177 if (typeid == BYTEAOID)
1178 ereport(ERROR,
1179 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1180 errmsg("regular-expression matching not supported on type bytea")));
1181
1182 /* Use the regexp machinery to extract the prefix, if any */
1183 prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
1184 case_insensitive, collation,
1185 &exact);
1186
1187 if (prefix == NULL)
1188 {
1189 *prefix_const = NULL;
1190
1191 if (rest_selec != NULL)
1192 {
1193 char *patt = TextDatumGetCString(patt_const->constvalue);
1194
1195 *rest_selec = regex_selectivity(patt, strlen(patt),
1196 case_insensitive,
1197 0);
1198 pfree(patt);
1199 }
1200
1201 return Pattern_Prefix_None;
1202 }
1203
1204 *prefix_const = string_to_const(prefix, typeid);
1205
1206 if (rest_selec != NULL)
1207 {
1208 if (exact)
1209 {
1210 /* Exact match, so there's no additional selectivity */
1211 *rest_selec = 1.0;
1212 }
1213 else
1214 {
1215 char *patt = TextDatumGetCString(patt_const->constvalue);
1216
1217 *rest_selec = regex_selectivity(patt, strlen(patt),
1218 case_insensitive,
1219 strlen(prefix));
1220 pfree(patt);
1221 }
1222 }
1223
1224 pfree(prefix);
1225
1226 if (exact)
1227 return Pattern_Prefix_Exact; /* pattern specifies exact match */
1228 else
1230}
1231
1234 Const **prefix, Selectivity *rest_selec)
1235{
1236 Pattern_Prefix_Status result;
1237
1238 switch (ptype)
1239 {
1240 case Pattern_Type_Like:
1241 result = like_fixed_prefix(patt, prefix, rest_selec);
1242 break;
1244 result = like_fixed_prefix_ci(patt, collation, prefix,
1245 rest_selec);
1246 break;
1247 case Pattern_Type_Regex:
1248 result = regex_fixed_prefix(patt, false, collation,
1249 prefix, rest_selec);
1250 break;
1252 result = regex_fixed_prefix(patt, true, collation,
1253 prefix, rest_selec);
1254 break;
1256 /* Prefix type work is trivial. */
1257 result = Pattern_Prefix_Partial;
1258 *prefix = makeConst(patt->consttype,
1259 patt->consttypmod,
1260 patt->constcollid,
1261 patt->constlen,
1262 datumCopy(patt->constvalue,
1263 patt->constbyval,
1264 patt->constlen),
1265 patt->constisnull,
1266 patt->constbyval);
1267 if (rest_selec != NULL)
1268 *rest_selec = 1.0; /* all */
1269 break;
1270 default:
1271 elog(ERROR, "unrecognized ptype: %d", (int) ptype);
1272 result = Pattern_Prefix_None; /* keep compiler quiet */
1273 break;
1274 }
1275 return result;
1276}
1277
1278/*
1279 * Estimate the selectivity of a fixed prefix for a pattern match.
1280 *
1281 * A fixed prefix "foo" is estimated as the selectivity of the expression
1282 * "variable >= 'foo' AND variable < 'fop'".
1283 *
1284 * The selectivity estimate is with respect to the portion of the column
1285 * population represented by the histogram --- the caller must fold this
1286 * together with info about MCVs and NULLs.
1287 *
1288 * We use the given comparison operators and collation to do the estimation.
1289 * The given variable and Const must be of the associated datatype(s).
1290 *
1291 * XXX Note: we make use of the upper bound to estimate operator selectivity
1292 * even if the locale is such that we cannot rely on the upper-bound string.
1293 * The selectivity only needs to be approximately right anyway, so it seems
1294 * more useful to use the upper-bound code than not.
1295 */
1296static Selectivity
1298 Oid eqopr, Oid ltopr, Oid geopr,
1299 Oid collation,
1300 Const *prefixcon)
1301{
1303 FmgrInfo opproc;
1304 Const *greaterstrcon;
1305 Selectivity eq_sel;
1306
1307 /* Estimate the selectivity of "x >= prefix" */
1308 fmgr_info(get_opcode(geopr), &opproc);
1309
1311 geopr, &opproc, true, true,
1312 collation,
1313 prefixcon->constvalue,
1314 prefixcon->consttype);
1315
1316 if (prefixsel < 0.0)
1317 {
1318 /* No histogram is present ... return a suitable default estimate */
1319 return DEFAULT_MATCH_SEL;
1320 }
1321
1322 /*
1323 * If we can create a string larger than the prefix, say "x < greaterstr".
1324 */
1325 fmgr_info(get_opcode(ltopr), &opproc);
1326 greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
1327 if (greaterstrcon)
1328 {
1329 Selectivity topsel;
1330
1331 topsel = ineq_histogram_selectivity(root, vardata,
1332 ltopr, &opproc, false, false,
1333 collation,
1334 greaterstrcon->constvalue,
1335 greaterstrcon->consttype);
1336
1337 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1338 Assert(topsel >= 0.0);
1339
1340 /*
1341 * Merge the two selectivities in the same way as for a range query
1342 * (see clauselist_selectivity()). Note that we don't need to worry
1343 * about double-exclusion of nulls, since ineq_histogram_selectivity
1344 * doesn't count those anyway.
1345 */
1346 prefixsel = topsel + prefixsel - 1.0;
1347 }
1348
1349 /*
1350 * If the prefix is long then the two bounding values might be too close
1351 * together for the histogram to distinguish them usefully, resulting in a
1352 * zero estimate (plus or minus roundoff error). To avoid returning a
1353 * ridiculously small estimate, compute the estimated selectivity for
1354 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1355 * estimate should be at least that.)
1356 *
1357 * We apply this even if we couldn't make a greater string. That case
1358 * suggests that the prefix is near the maximum possible, and thus
1359 * probably off the end of the histogram, and thus we probably got a very
1360 * small estimate from the >= condition; so we still need to clamp.
1361 */
1362 eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1363 false, true, false);
1364
1365 prefixsel = Max(prefixsel, eq_sel);
1366
1367 return prefixsel;
1368}
1369
1370
1371/*
1372 * Estimate the selectivity of a pattern of the specified type.
1373 * Note that any fixed prefix of the pattern will have been removed already,
1374 * so actually we may be looking at just a fragment of the pattern.
1375 *
1376 * For now, we use a very simplistic approach: fixed characters reduce the
1377 * selectivity a good deal, character ranges reduce it a little,
1378 * wildcards (such as % for LIKE or .* for regex) increase it.
1379 */
1380
1381#define FIXED_CHAR_SEL 0.20 /* about 1/5 */
1382#define CHAR_RANGE_SEL 0.25
1383#define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */
1384#define FULL_WILDCARD_SEL 5.0
1385#define PARTIAL_WILDCARD_SEL 2.0
1386
1387static Selectivity
1388like_selectivity(const char *patt, int pattlen, bool case_insensitive)
1389{
1390 Selectivity sel = 1.0;
1391 int pos;
1392
1393 /* Skip any leading wildcard; it's already factored into initial sel */
1394 for (pos = 0; pos < pattlen; pos++)
1395 {
1396 if (patt[pos] != '%' && patt[pos] != '_')
1397 break;
1398 }
1399
1400 for (; pos < pattlen; pos++)
1401 {
1402 /* % and _ are wildcard characters in LIKE */
1403 if (patt[pos] == '%')
1404 sel *= FULL_WILDCARD_SEL;
1405 else if (patt[pos] == '_')
1406 sel *= ANY_CHAR_SEL;
1407 else if (patt[pos] == '\\')
1408 {
1409 /* Backslash quotes the next character */
1410 pos++;
1411 if (pos >= pattlen)
1412 break;
1413 sel *= FIXED_CHAR_SEL;
1414 }
1415 else
1416 sel *= FIXED_CHAR_SEL;
1417 }
1418 /* Could get sel > 1 if multiple wildcards */
1419 if (sel > 1.0)
1420 sel = 1.0;
1421 return sel;
1422}
1423
1424static Selectivity
1425regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
1426{
1427 Selectivity sel = 1.0;
1428 int paren_depth = 0;
1429 int paren_pos = 0; /* dummy init to keep compiler quiet */
1430 int pos;
1431
1432 /* since this function recurses, it could be driven to stack overflow */
1434
1435 for (pos = 0; pos < pattlen; pos++)
1436 {
1437 if (patt[pos] == '(')
1438 {
1439 if (paren_depth == 0)
1440 paren_pos = pos; /* remember start of parenthesized item */
1441 paren_depth++;
1442 }
1443 else if (patt[pos] == ')' && paren_depth > 0)
1444 {
1445 paren_depth--;
1446 if (paren_depth == 0)
1447 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
1448 pos - (paren_pos + 1),
1449 case_insensitive);
1450 }
1451 else if (patt[pos] == '|' && paren_depth == 0)
1452 {
1453 /*
1454 * If unquoted | is present at paren level 0 in pattern, we have
1455 * multiple alternatives; sum their probabilities.
1456 */
1457 sel += regex_selectivity_sub(patt + (pos + 1),
1458 pattlen - (pos + 1),
1459 case_insensitive);
1460 break; /* rest of pattern is now processed */
1461 }
1462 else if (patt[pos] == '[')
1463 {
1464 bool negclass = false;
1465
1466 if (patt[++pos] == '^')
1467 {
1468 negclass = true;
1469 pos++;
1470 }
1471 if (patt[pos] == ']') /* ']' at start of class is not special */
1472 pos++;
1473 while (pos < pattlen && patt[pos] != ']')
1474 pos++;
1475 if (paren_depth == 0)
1476 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
1477 }
1478 else if (patt[pos] == '.')
1479 {
1480 if (paren_depth == 0)
1481 sel *= ANY_CHAR_SEL;
1482 }
1483 else if (patt[pos] == '*' ||
1484 patt[pos] == '?' ||
1485 patt[pos] == '+')
1486 {
1487 /* Ought to be smarter about quantifiers... */
1488 if (paren_depth == 0)
1489 sel *= PARTIAL_WILDCARD_SEL;
1490 }
1491 else if (patt[pos] == '{')
1492 {
1493 while (pos < pattlen && patt[pos] != '}')
1494 pos++;
1495 if (paren_depth == 0)
1496 sel *= PARTIAL_WILDCARD_SEL;
1497 }
1498 else if (patt[pos] == '\\')
1499 {
1500 /* backslash quotes the next character */
1501 pos++;
1502 if (pos >= pattlen)
1503 break;
1504 if (paren_depth == 0)
1505 sel *= FIXED_CHAR_SEL;
1506 }
1507 else
1508 {
1509 if (paren_depth == 0)
1510 sel *= FIXED_CHAR_SEL;
1511 }
1512 }
1513 /* Could get sel > 1 if multiple wildcards */
1514 if (sel > 1.0)
1515 sel = 1.0;
1516 return sel;
1517}
1518
1519static Selectivity
1520regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
1521 int fixed_prefix_len)
1522{
1523 Selectivity sel;
1524
1525 /* If patt doesn't end with $, consider it to have a trailing wildcard */
1526 if (pattlen > 0 && patt[pattlen - 1] == '$' &&
1527 (pattlen == 1 || patt[pattlen - 2] != '\\'))
1528 {
1529 /* has trailing $ */
1530 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
1531 }
1532 else
1533 {
1534 /* no trailing $ */
1535 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
1536 sel *= FULL_WILDCARD_SEL;
1537 }
1538
1539 /*
1540 * If there's a fixed prefix, discount its selectivity. We have to be
1541 * careful here since a very long prefix could result in pow's result
1542 * underflowing to zero (in which case "sel" probably has as well).
1543 */
1544 if (fixed_prefix_len > 0)
1545 {
1546 double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
1547
1548 if (prefixsel > 0.0)
1549 sel /= prefixsel;
1550 }
1551
1552 /* Make sure result stays in range */
1553 CLAMP_PROBABILITY(sel);
1554 return sel;
1555}
1556
1557
1558/*
1559 * For bytea, the increment function need only increment the current byte
1560 * (there are no multibyte characters to worry about).
1561 */
1562static bool
1563byte_increment(unsigned char *ptr, int len)
1564{
1565 if (*ptr >= 255)
1566 return false;
1567 (*ptr)++;
1568 return true;
1569}
1570
1571/*
1572 * Try to generate a string greater than the given string or any
1573 * string it is a prefix of. If successful, return a palloc'd string
1574 * in the form of a Const node; else return NULL.
1575 *
1576 * The caller must provide the appropriate "less than" comparison function
1577 * for testing the strings, along with the collation to use.
1578 *
1579 * The key requirement here is that given a prefix string, say "foo",
1580 * we must be able to generate another string "fop" that is greater than
1581 * all strings "foobar" starting with "foo". We can test that we have
1582 * generated a string greater than the prefix string, but in non-C collations
1583 * that is not a bulletproof guarantee that an extension of the string might
1584 * not sort after it; an example is that "foo " is less than "foo!", but it
1585 * is not clear that a "dictionary" sort ordering will consider "foo!" less
1586 * than "foo bar". CAUTION: Therefore, this function should be used only for
1587 * estimation purposes when working in a non-C collation.
1588 *
1589 * To try to catch most cases where an extended string might otherwise sort
1590 * before the result value, we determine which of the strings "Z", "z", "y",
1591 * and "9" is seen as largest by the collation, and append that to the given
1592 * prefix before trying to find a string that compares as larger.
1593 *
1594 * To search for a greater string, we repeatedly "increment" the rightmost
1595 * character, using an encoding-specific character incrementer function.
1596 * When it's no longer possible to increment the last character, we truncate
1597 * off that character and start incrementing the next-to-rightmost.
1598 * For example, if "z" were the last character in the sort order, then we
1599 * could produce "foo" as a string greater than "fonz".
1600 *
1601 * This could be rather slow in the worst case, but in most cases we
1602 * won't have to try more than one or two strings before succeeding.
1603 *
1604 * Note that it's important for the character incrementer not to be too anal
1605 * about producing every possible character code, since in some cases the only
1606 * way to get a larger string is to increment a previous character position.
1607 * So we don't want to spend too much time trying every possible character
1608 * code at the last position. A good rule of thumb is to be sure that we
1609 * don't try more than 256*K values for a K-byte character (and definitely
1610 * not 256^K, which is what an exhaustive search would approach).
1611 */
1612static Const *
1613make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
1614{
1615 Oid datatype = str_const->consttype;
1616 char *workstr;
1617 int len;
1618 Datum cmpstr;
1619 char *cmptxt = NULL;
1621
1622 /*
1623 * Get a modifiable copy of the prefix string in C-string format, and set
1624 * up the string we will compare to as a Datum. In C locale this can just
1625 * be the given prefix string, otherwise we need to add a suffix. Type
1626 * BYTEA sorts bytewise so it never needs a suffix either.
1627 */
1628 if (datatype == BYTEAOID)
1629 {
1630 bytea *bstr = DatumGetByteaPP(str_const->constvalue);
1631
1632 len = VARSIZE_ANY_EXHDR(bstr);
1633 workstr = (char *) palloc(len);
1634 memcpy(workstr, VARDATA_ANY(bstr), len);
1635 Assert(bstr == DatumGetPointer(str_const->constvalue));
1636 cmpstr = str_const->constvalue;
1637 }
1638 else
1639 {
1640 if (datatype == NAMEOID)
1642 str_const->constvalue));
1643 else
1644 workstr = TextDatumGetCString(str_const->constvalue);
1645 len = strlen(workstr);
1646 if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
1647 cmpstr = str_const->constvalue;
1648 else
1649 {
1650 /* If first time through, determine the suffix to use */
1651 static char suffixchar = 0;
1652 static Oid suffixcollation = 0;
1653
1654 if (!suffixchar || suffixcollation != collation)
1655 {
1656 char *best;
1657
1658 best = "Z";
1659 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
1660 best = "z";
1661 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
1662 best = "y";
1663 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
1664 best = "9";
1665 suffixchar = *best;
1666 suffixcollation = collation;
1667 }
1668
1669 /* And build the string to compare to */
1670 if (datatype == NAMEOID)
1671 {
1672 cmptxt = palloc(len + 2);
1673 memcpy(cmptxt, workstr, len);
1674 cmptxt[len] = suffixchar;
1675 cmptxt[len + 1] = '\0';
1676 cmpstr = PointerGetDatum(cmptxt);
1677 }
1678 else
1679 {
1680 cmptxt = palloc(VARHDRSZ + len + 1);
1681 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
1682 memcpy(VARDATA(cmptxt), workstr, len);
1683 *(VARDATA(cmptxt) + len) = suffixchar;
1684 cmpstr = PointerGetDatum(cmptxt);
1685 }
1686 }
1687 }
1688
1689 /* Select appropriate character-incrementer function */
1690 if (datatype == BYTEAOID)
1691 charinc = byte_increment;
1692 else
1694
1695 /* And search ... */
1696 while (len > 0)
1697 {
1698 int charlen;
1699 unsigned char *lastchar;
1700
1701 /* Identify the last character --- for bytea, just the last byte */
1702 if (datatype == BYTEAOID)
1703 charlen = 1;
1704 else
1705 charlen = len - pg_mbcliplen(workstr, len, len - 1);
1706 lastchar = (unsigned char *) (workstr + len - charlen);
1707
1708 /*
1709 * Try to generate a larger string by incrementing the last character
1710 * (for BYTEA, we treat each byte as a character).
1711 *
1712 * Note: the incrementer function is expected to return true if it's
1713 * generated a valid-per-the-encoding new character, otherwise false.
1714 * The contents of the character on false return are unspecified.
1715 */
1716 while (charinc(lastchar, charlen))
1717 {
1718 Const *workstr_const;
1719
1720 if (datatype == BYTEAOID)
1721 workstr_const = string_to_bytea_const(workstr, len);
1722 else
1723 workstr_const = string_to_const(workstr, datatype);
1724
1726 collation,
1727 cmpstr,
1728 workstr_const->constvalue)))
1729 {
1730 /* Successfully made a string larger than cmpstr */
1731 if (cmptxt)
1732 pfree(cmptxt);
1733 pfree(workstr);
1734 return workstr_const;
1735 }
1736
1737 /* No good, release unusable value and try again */
1738 pfree(DatumGetPointer(workstr_const->constvalue));
1739 pfree(workstr_const);
1740 }
1741
1742 /*
1743 * No luck here, so truncate off the last character and try to
1744 * increment the next one.
1745 */
1746 len -= charlen;
1747 workstr[len] = '\0';
1748 }
1749
1750 /* Failed... */
1751 if (cmptxt)
1752 pfree(cmptxt);
1753 pfree(workstr);
1754
1755 return NULL;
1756}
1757
1758/*
1759 * Generate a Datum of the appropriate type from a C string.
1760 * Note that all of the supported types are pass-by-ref, so the
1761 * returned value should be pfree'd if no longer needed.
1762 */
1763static Datum
1764string_to_datum(const char *str, Oid datatype)
1765{
1766 Assert(str != NULL);
1767
1768 /*
1769 * We cheat a little by assuming that CStringGetTextDatum() will do for
1770 * bpchar and varchar constants too...
1771 */
1772 if (datatype == NAMEOID)
1774 else if (datatype == BYTEAOID)
1776 else
1777 return CStringGetTextDatum(str);
1778}
1779
1780/*
1781 * Generate a Const node of the appropriate type from a C string.
1782 */
1783static Const *
1784string_to_const(const char *str, Oid datatype)
1785{
1786 Datum conval = string_to_datum(str, datatype);
1787 Oid collation;
1788 int constlen;
1789
1790 /*
1791 * We only need to support a few datatypes here, so hard-wire properties
1792 * instead of incurring the expense of catalog lookups.
1793 */
1794 switch (datatype)
1795 {
1796 case TEXTOID:
1797 case VARCHAROID:
1798 case BPCHAROID:
1799 collation = DEFAULT_COLLATION_OID;
1800 constlen = -1;
1801 break;
1802
1803 case NAMEOID:
1804 collation = C_COLLATION_OID;
1805 constlen = NAMEDATALEN;
1806 break;
1807
1808 case BYTEAOID:
1809 collation = InvalidOid;
1810 constlen = -1;
1811 break;
1812
1813 default:
1814 elog(ERROR, "unexpected datatype in string_to_const: %u",
1815 datatype);
1816 return NULL;
1817 }
1818
1819 return makeConst(datatype, -1, collation, constlen,
1820 conval, false, false);
1821}
1822
1823/*
1824 * Generate a Const node of bytea type from a binary C string and a length.
1825 */
1826static Const *
1827string_to_bytea_const(const char *str, size_t str_len)
1828{
1829 bytea *bstr = palloc(VARHDRSZ + str_len);
1830 Datum conval;
1831
1832 memcpy(VARDATA(bstr), str, str_len);
1833 SET_VARSIZE(bstr, VARHDRSZ + str_len);
1834 conval = PointerGetDatum(bstr);
1835
1836 return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
1837}
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define TextDatumGetCString(d)
Definition: builtins.h:98
Datum byteain(PG_FUNCTION_ARGS)
Definition: bytea.c:201
#define Max(x, y)
Definition: c.h:989
#define VARHDRSZ
Definition: c.h:711
#define OidIsValid(objectId)
Definition: c.h:788
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:128
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:682
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Assert(PointerIsAligned(start, uint64))
const char * str
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
long val
Definition: informix.c:689
static char * locale
Definition: initdb.c:140
Datum icregexnesel(PG_FUNCTION_ARGS)
Definition: like_support.c:843
Datum regexnesel(PG_FUNCTION_ARGS)
Definition: like_support.c:834
static Node * like_regex_support(Node *rawreq, Pattern_Type ptype)
Definition: like_support.c:154
Datum iclikesel(PG_FUNCTION_ARGS)
Definition: like_support.c:825
Datum texticregexeq_support(PG_FUNCTION_ARGS)
Definition: like_support.c:137
static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
#define FULL_WILDCARD_SEL
Datum iclikejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:916
Datum prefixjoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:907
#define ANY_CHAR_SEL
static double patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:755
Datum regexeqsel(PG_FUNCTION_ARGS)
Definition: like_support.c:788
static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
Datum likejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:898
static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive)
Datum icregexnejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:934
static List * match_pattern_prefix(Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
Definition: like_support.c:239
Datum nlikejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:943
static Datum string_to_datum(const char *str, Oid datatype)
Datum icnlikejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:952
static Selectivity regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
Datum texticlike_support(PG_FUNCTION_ARGS)
Definition: like_support.c:121
Datum nlikesel(PG_FUNCTION_ARGS)
Definition: like_support.c:852
static Const * string_to_const(const char *str, Oid datatype)
#define PARTIAL_WILDCARD_SEL
Datum text_starts_with_support(PG_FUNCTION_ARGS)
Definition: like_support.c:145
#define CHAR_RANGE_SEL
static Const * string_to_bytea_const(const char *str, size_t str_len)
static Pattern_Prefix_Status like_fixed_prefix_ci(Const *patt_const, Oid collation, Const **prefix_const, Selectivity *rest_selec)
static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Pattern_Type
Definition: like_support.c:59
@ Pattern_Type_Prefix
Definition: like_support.c:64
@ Pattern_Type_Regex_IC
Definition: like_support.c:63
@ Pattern_Type_Like
Definition: like_support.c:60
@ Pattern_Type_Regex
Definition: like_support.c:62
@ Pattern_Type_Like_IC
Definition: like_support.c:61
Pattern_Prefix_Status
Definition: like_support.c:68
@ Pattern_Prefix_Partial
Definition: like_support.c:69
@ Pattern_Prefix_None
Definition: like_support.c:69
@ Pattern_Prefix_Exact
Definition: like_support.c:69
static Const * make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
Datum icregexeqsel(PG_FUNCTION_ARGS)
Definition: like_support.c:797
#define FIXED_CHAR_SEL
Datum textlike_support(PG_FUNCTION_ARGS)
Definition: like_support.c:113
static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, Const **prefix_const, Selectivity *rest_selec)
Definition: like_support.c:987
Datum regexnejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:925
static bool byte_increment(unsigned char *ptr, int len)
static double patternsel_common(PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
Definition: like_support.c:481
static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
Datum icnlikesel(PG_FUNCTION_ARGS)
Definition: like_support.c:861
Datum textregexeq_support(PG_FUNCTION_ARGS)
Definition: like_support.c:129
Datum prefixsel(PG_FUNCTION_ARGS)
Definition: like_support.c:815
static double patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:870
Datum likesel(PG_FUNCTION_ARGS)
Definition: like_support.c:806
Datum regexeqjoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:880
Datum icregexeqjoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:889
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1435
bool get_collation_isdeterministic(Oid colloid)
Definition: lsyscache.c:1130
bool op_in_opfamily(Oid opno, Oid opfamily)
Definition: lsyscache.c:68
Oid get_negator(Oid opno)
Definition: lsyscache.c:1683
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: makefuncs.c:701
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:350
unsigned int pg_wchar
Definition: mbprint.c:31
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
Definition: mbutils.c:1526
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:1011
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1086
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1549
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:989
void pfree(void *pointer)
Definition: mcxt.c:1616
void * palloc(Size size)
Definition: mcxt.c:1387
Datum nameout(PG_FUNCTION_ARGS)
Definition: name.c:71
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
double Selectivity
Definition: nodes.h:260
Oid oprid(Operator op)
Definition: parse_oper.c:239
#define NAMEDATALEN
const void size_t len
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
bool pg_iswcased(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1612
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1186
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
Definition: pg_wchar.h:370
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
static char * DatumGetCString(Datum X)
Definition: postgres.h:345
uint64_t Datum
Definition: postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:360
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
char * s1
tree ctl root
Definition: radixtree.h:1857
char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact)
Definition: regexp.c:2022
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:5507
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:368
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:805
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
Definition: selfuncs.c:1114
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:896
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_MATCH_SEL
Definition: selfuncs.h:46
void check_stack_depth(void)
Definition: stack_depth.c:95
Oid consttype
Definition: primnodes.h:329
Definition: fmgr.h:57
List * args
Definition: primnodes.h:800
Definition: pg_list.h:54
Definition: nodes.h:135
List * args
Definition: primnodes.h:868
HeapTuple statsTuple
Definition: selfuncs.h:89
Definition: c.h:706
#define wpos(wep)
Definition: tsrank.c:27
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1308