PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
like_support.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * like_support.c
4 * Planner support functions for LIKE, regex, and related operators.
5 *
6 * These routines handle special optimization of operators that can be
7 * used with index scans even though they are not known to the executor's
8 * indexscan machinery. The key idea is that these operators allow us
9 * to derive approximate indexscan qual clauses, such that any tuples
10 * that pass the operator clause itself must also satisfy the simpler
11 * indexscan condition(s). Then we can use the indexscan machinery
12 * to avoid scanning as much of the table as we'd otherwise have to,
13 * while applying the original operator as a qpqual condition to ensure
14 * we deliver only the tuples we want. (In essence, we're using a regular
15 * index as if it were a lossy index.)
16 *
17 * An example of what we're doing is
18 * textfield LIKE 'abc%def'
19 * from which we can generate the indexscanable conditions
20 * textfield >= 'abc' AND textfield < 'abd'
21 * which allow efficient scanning of an index on textfield.
22 * (In reality, character set and collation issues make the transformation
23 * from LIKE to indexscan limits rather harder than one might think ...
24 * but that's the basic idea.)
25 *
26 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
28 *
29 *
30 * IDENTIFICATION
31 * src/backend/utils/adt/like_support.c
32 *
33 *-------------------------------------------------------------------------
34 */
35#include "postgres.h"
36
37#include <math.h>
38
39#include "access/htup_details.h"
41#include "catalog/pg_operator.h"
42#include "catalog/pg_opfamily.h"
44#include "catalog/pg_type.h"
45#include "mb/pg_wchar.h"
46#include "miscadmin.h"
47#include "nodes/makefuncs.h"
48#include "nodes/nodeFuncs.h"
49#include "nodes/supportnodes.h"
50#include "utils/builtins.h"
51#include "utils/datum.h"
52#include "utils/lsyscache.h"
53#include "utils/pg_locale.h"
54#include "utils/selfuncs.h"
55#include "utils/varlena.h"
56
57
58typedef enum
59{
66
67typedef enum
68{
71
72static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
73static List *match_pattern_prefix(Node *leftop,
74 Node *rightop,
75 Pattern_Type ptype,
76 Oid expr_coll,
77 Oid opfamily,
78 Oid indexcollation);
80 Oid oprid,
81 Oid opfuncid,
82 List *args,
83 int varRelid,
84 Oid collation,
85 Pattern_Type ptype,
86 bool negate);
88 Pattern_Type ptype,
89 Oid collation,
90 Const **prefix,
91 Selectivity *rest_selec);
93 VariableStatData *vardata,
94 Oid eqopr, Oid ltopr, Oid geopr,
95 Oid collation,
96 Const *prefixcon);
97static Selectivity like_selectivity(const char *patt, int pattlen,
98 bool case_insensitive);
99static Selectivity regex_selectivity(const char *patt, int pattlen,
100 bool case_insensitive,
101 int fixed_prefix_len);
102static int pattern_char_isalpha(char c, bool is_multibyte,
104static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
105 Oid collation);
106static Datum string_to_datum(const char *str, Oid datatype);
107static Const *string_to_const(const char *str, Oid datatype);
108static Const *string_to_bytea_const(const char *str, size_t str_len);
109
110
111/*
112 * Planner support functions for LIKE, regex, and related operators
113 */
114Datum
116{
117 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
118
120}
121
122Datum
124{
125 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
126
128}
129
130Datum
132{
133 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
134
136}
137
138Datum
140{
141 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
142
144}
145
146Datum
148{
149 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
150
152}
153
154/* Common code for the above */
155static Node *
157{
158 Node *ret = NULL;
159
160 if (IsA(rawreq, SupportRequestSelectivity))
161 {
162 /*
163 * Make a selectivity estimate for a function call, just as we'd do if
164 * the call was via the corresponding operator.
165 */
168
169 if (req->is_join)
170 {
171 /*
172 * For the moment we just punt. If patternjoinsel is ever
173 * improved to do better, this should be made to call it.
174 */
176 }
177 else
178 {
179 /* Share code with operator restriction selectivity functions */
182 req->funcid,
183 req->args,
184 req->varRelid,
185 req->inputcollid,
186 ptype,
187 false);
188 }
189 req->selectivity = s1;
190 ret = (Node *) req;
191 }
192 else if (IsA(rawreq, SupportRequestIndexCondition))
193 {
194 /* Try to convert operator/function call to index conditions */
196
197 /*
198 * Currently we have no "reverse" match operators with the pattern on
199 * the left, so we only need consider cases with the indexkey on the
200 * left.
201 */
202 if (req->indexarg != 0)
203 return NULL;
204
205 if (is_opclause(req->node))
206 {
207 OpExpr *clause = (OpExpr *) req->node;
208
209 Assert(list_length(clause->args) == 2);
210 ret = (Node *)
212 (Node *) lsecond(clause->args),
213 ptype,
214 clause->inputcollid,
215 req->opfamily,
216 req->indexcollation);
217 }
218 else if (is_funcclause(req->node)) /* be paranoid */
219 {
220 FuncExpr *clause = (FuncExpr *) req->node;
221
222 Assert(list_length(clause->args) == 2);
223 ret = (Node *)
225 (Node *) lsecond(clause->args),
226 ptype,
227 clause->inputcollid,
228 req->opfamily,
229 req->indexcollation);
230 }
231 }
232
233 return ret;
234}
235
236/*
237 * match_pattern_prefix
238 * Try to generate an indexqual for a LIKE or regex operator.
239 */
240static List *
242 Node *rightop,
243 Pattern_Type ptype,
244 Oid expr_coll,
245 Oid opfamily,
246 Oid indexcollation)
247{
248 List *result;
249 Const *patt;
250 Const *prefix;
251 Pattern_Prefix_Status pstatus;
252 Oid ldatatype;
253 Oid rdatatype;
254 Oid eqopr;
255 Oid ltopr;
256 Oid geopr;
257 Oid preopr = InvalidOid;
258 bool collation_aware;
259 Expr *expr;
260 FmgrInfo ltproc;
261 Const *greaterstr;
262
263 /*
264 * Can't do anything with a non-constant or NULL pattern argument.
265 *
266 * Note that since we restrict ourselves to cases with a hard constant on
267 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
268 * about verifying that.
269 */
270 if (!IsA(rightop, Const) ||
271 ((Const *) rightop)->constisnull)
272 return NIL;
273 patt = (Const *) rightop;
274
275 /*
276 * Try to extract a fixed prefix from the pattern.
277 */
278 pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
279 &prefix, NULL);
280
281 /* fail if no fixed prefix */
282 if (pstatus == Pattern_Prefix_None)
283 return NIL;
284
285 /*
286 * Identify the operators we want to use, based on the type of the
287 * left-hand argument. Usually these are just the type's regular
288 * comparison operators, but if we are considering one of the semi-legacy
289 * "pattern" opclasses, use the "pattern" operators instead. Those are
290 * not collation-sensitive but always use C collation, as we want. The
291 * selected operators also determine the needed type of the prefix
292 * constant.
293 */
294 ldatatype = exprType(leftop);
295 switch (ldatatype)
296 {
297 case TEXTOID:
298 if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
299 {
300 eqopr = TextEqualOperator;
301 ltopr = TextPatternLessOperator;
302 geopr = TextPatternGreaterEqualOperator;
303 collation_aware = false;
304 }
305 else if (opfamily == TEXT_SPGIST_FAM_OID)
306 {
307 eqopr = TextEqualOperator;
308 ltopr = TextPatternLessOperator;
309 geopr = TextPatternGreaterEqualOperator;
310 /* This opfamily has direct support for prefixing */
311 preopr = TextPrefixOperator;
312 collation_aware = false;
313 }
314 else
315 {
316 eqopr = TextEqualOperator;
317 ltopr = TextLessOperator;
318 geopr = TextGreaterEqualOperator;
319 collation_aware = true;
320 }
321 rdatatype = TEXTOID;
322 break;
323 case NAMEOID:
324
325 /*
326 * Note that here, we need the RHS type to be text, so that the
327 * comparison value isn't improperly truncated to NAMEDATALEN.
328 */
329 eqopr = NameEqualTextOperator;
330 ltopr = NameLessTextOperator;
331 geopr = NameGreaterEqualTextOperator;
332 collation_aware = true;
333 rdatatype = TEXTOID;
334 break;
335 case BPCHAROID:
336 if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
337 {
338 eqopr = BpcharEqualOperator;
339 ltopr = BpcharPatternLessOperator;
340 geopr = BpcharPatternGreaterEqualOperator;
341 collation_aware = false;
342 }
343 else
344 {
345 eqopr = BpcharEqualOperator;
346 ltopr = BpcharLessOperator;
347 geopr = BpcharGreaterEqualOperator;
348 collation_aware = true;
349 }
350 rdatatype = BPCHAROID;
351 break;
352 case BYTEAOID:
353 eqopr = ByteaEqualOperator;
354 ltopr = ByteaLessOperator;
355 geopr = ByteaGreaterEqualOperator;
356 collation_aware = false;
357 rdatatype = BYTEAOID;
358 break;
359 default:
360 /* Can't get here unless we're attached to the wrong operator */
361 return NIL;
362 }
363
364 /*
365 * If necessary, coerce the prefix constant to the right type. The given
366 * prefix constant is either text or bytea type, therefore the only case
367 * where we need to do anything is when converting text to bpchar. Those
368 * two types are binary-compatible, so relabeling the Const node is
369 * sufficient.
370 */
371 if (prefix->consttype != rdatatype)
372 {
373 Assert(prefix->consttype == TEXTOID &&
374 rdatatype == BPCHAROID);
375 prefix->consttype = rdatatype;
376 }
377
378 /*
379 * If we found an exact-match pattern, generate an "=" indexqual.
380 *
381 * Here and below, check to see whether the desired operator is actually
382 * supported by the index opclass, and fail quietly if not. This allows
383 * us to not be concerned with specific opclasses (except for the legacy
384 * "pattern" cases); any index that correctly implements the operators
385 * will work.
386 */
387 if (pstatus == Pattern_Prefix_Exact)
388 {
389 if (!op_in_opfamily(eqopr, opfamily))
390 return NIL;
391 if (indexcollation != expr_coll)
392 return NIL;
393 expr = make_opclause(eqopr, BOOLOID, false,
394 (Expr *) leftop, (Expr *) prefix,
395 InvalidOid, indexcollation);
396 result = list_make1(expr);
397 return result;
398 }
399
400 /*
401 * Anything other than Pattern_Prefix_Exact is not supported if the
402 * expression collation is nondeterministic. The optimized equality or
403 * prefix tests use bytewise comparisons, which is not consistent with
404 * nondeterministic collations.
405 *
406 * expr_coll is not set for a non-collation-aware data type such as bytea.
407 */
408 if (expr_coll && !get_collation_isdeterministic(expr_coll))
409 return NIL;
410
411 /*
412 * Otherwise, we have a nonempty required prefix of the values. Some
413 * opclasses support prefix checks directly, otherwise we'll try to
414 * generate a range constraint.
415 */
416 if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
417 {
418 expr = make_opclause(preopr, BOOLOID, false,
419 (Expr *) leftop, (Expr *) prefix,
420 InvalidOid, indexcollation);
421 result = list_make1(expr);
422 return result;
423 }
424
425 /*
426 * Since we need a range constraint, it's only going to work reliably if
427 * the index is collation-insensitive or has "C" collation. Note that
428 * here we are looking at the index's collation, not the expression's
429 * collation -- this test is *not* dependent on the LIKE/regex operator's
430 * collation.
431 */
432 if (collation_aware &&
433 !pg_newlocale_from_collation(indexcollation)->collate_is_c)
434 return NIL;
435
436 /*
437 * We can always say "x >= prefix".
438 */
439 if (!op_in_opfamily(geopr, opfamily))
440 return NIL;
441 expr = make_opclause(geopr, BOOLOID, false,
442 (Expr *) leftop, (Expr *) prefix,
443 InvalidOid, indexcollation);
444 result = list_make1(expr);
445
446 /*-------
447 * If we can create a string larger than the prefix, we can say
448 * "x < greaterstr". NB: we rely on make_greater_string() to generate
449 * a guaranteed-greater string, not just a probably-greater string.
450 * In general this is only guaranteed in C locale, so we'd better be
451 * using a C-locale index collation.
452 *-------
453 */
454 if (!op_in_opfamily(ltopr, opfamily))
455 return result;
456 fmgr_info(get_opcode(ltopr), &ltproc);
457 greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
458 if (greaterstr)
459 {
460 expr = make_opclause(ltopr, BOOLOID, false,
461 (Expr *) leftop, (Expr *) greaterstr,
462 InvalidOid, indexcollation);
463 result = lappend(result, expr);
464 }
465
466 return result;
467}
468
469
470/*
471 * patternsel_common - generic code for pattern-match restriction selectivity.
472 *
473 * To support using this from either the operator or function paths, caller
474 * may pass either operator OID or underlying function OID; we look up the
475 * latter from the former if needed. (We could just have patternsel() call
476 * get_opcode(), but the work would be wasted if we don't have a need to
477 * compare a fixed prefix to the pg_statistic data.)
478 *
479 * Note that oprid and/or opfuncid should be for the positive-match operator
480 * even when negate is true.
481 */
482static double
484 Oid oprid,
485 Oid opfuncid,
486 List *args,
487 int varRelid,
488 Oid collation,
489 Pattern_Type ptype,
490 bool negate)
491{
492 VariableStatData vardata;
493 Node *other;
494 bool varonleft;
495 Datum constval;
496 Oid consttype;
497 Oid vartype;
498 Oid rdatatype;
499 Oid eqopr;
500 Oid ltopr;
501 Oid geopr;
502 Pattern_Prefix_Status pstatus;
503 Const *patt;
504 Const *prefix = NULL;
505 Selectivity rest_selec = 0;
506 double nullfrac = 0.0;
507 double result;
508
509 /*
510 * Initialize result to the appropriate default estimate depending on
511 * whether it's a match or not-match operator.
512 */
513 if (negate)
514 result = 1.0 - DEFAULT_MATCH_SEL;
515 else
516 result = DEFAULT_MATCH_SEL;
517
518 /*
519 * If expression is not variable op constant, then punt and return the
520 * default estimate.
521 */
522 if (!get_restriction_variable(root, args, varRelid,
523 &vardata, &other, &varonleft))
524 return result;
525 if (!varonleft || !IsA(other, Const))
526 {
527 ReleaseVariableStats(vardata);
528 return result;
529 }
530
531 /*
532 * If the constant is NULL, assume operator is strict and return zero, ie,
533 * operator will never return TRUE. (It's zero even for a negator op.)
534 */
535 if (((Const *) other)->constisnull)
536 {
537 ReleaseVariableStats(vardata);
538 return 0.0;
539 }
540 constval = ((Const *) other)->constvalue;
541 consttype = ((Const *) other)->consttype;
542
543 /*
544 * The right-hand const is type text or bytea for all supported operators.
545 * We do not expect to see binary-compatible types here, since
546 * const-folding should have relabeled the const to exactly match the
547 * operator's declared type.
548 */
549 if (consttype != TEXTOID && consttype != BYTEAOID)
550 {
551 ReleaseVariableStats(vardata);
552 return result;
553 }
554
555 /*
556 * Similarly, the exposed type of the left-hand side should be one of
557 * those we know. (Do not look at vardata.atttype, which might be
558 * something binary-compatible but different.) We can use it to identify
559 * the comparison operators and the required type of the comparison
560 * constant, much as in match_pattern_prefix().
561 */
562 vartype = vardata.vartype;
563
564 switch (vartype)
565 {
566 case TEXTOID:
567 eqopr = TextEqualOperator;
568 ltopr = TextLessOperator;
569 geopr = TextGreaterEqualOperator;
570 rdatatype = TEXTOID;
571 break;
572 case NAMEOID:
573
574 /*
575 * Note that here, we need the RHS type to be text, so that the
576 * comparison value isn't improperly truncated to NAMEDATALEN.
577 */
578 eqopr = NameEqualTextOperator;
579 ltopr = NameLessTextOperator;
580 geopr = NameGreaterEqualTextOperator;
581 rdatatype = TEXTOID;
582 break;
583 case BPCHAROID:
584 eqopr = BpcharEqualOperator;
585 ltopr = BpcharLessOperator;
586 geopr = BpcharGreaterEqualOperator;
587 rdatatype = BPCHAROID;
588 break;
589 case BYTEAOID:
590 eqopr = ByteaEqualOperator;
591 ltopr = ByteaLessOperator;
592 geopr = ByteaGreaterEqualOperator;
593 rdatatype = BYTEAOID;
594 break;
595 default:
596 /* Can't get here unless we're attached to the wrong operator */
597 ReleaseVariableStats(vardata);
598 return result;
599 }
600
601 /*
602 * Grab the nullfrac for use below.
603 */
604 if (HeapTupleIsValid(vardata.statsTuple))
605 {
606 Form_pg_statistic stats;
607
608 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
609 nullfrac = stats->stanullfrac;
610 }
611
612 /*
613 * Pull out any fixed prefix implied by the pattern, and estimate the
614 * fractional selectivity of the remainder of the pattern. Unlike many
615 * other selectivity estimators, we use the pattern operator's actual
616 * collation for this step. This is not because we expect the collation
617 * to make a big difference in the selectivity estimate (it seldom would),
618 * but because we want to be sure we cache compiled regexps under the
619 * right cache key, so that they can be re-used at runtime.
620 */
621 patt = (Const *) other;
622 pstatus = pattern_fixed_prefix(patt, ptype, collation,
623 &prefix, &rest_selec);
624
625 /*
626 * If necessary, coerce the prefix constant to the right type. The only
627 * case where we need to do anything is when converting text to bpchar.
628 * Those two types are binary-compatible, so relabeling the Const node is
629 * sufficient.
630 */
631 if (prefix && prefix->consttype != rdatatype)
632 {
633 Assert(prefix->consttype == TEXTOID &&
634 rdatatype == BPCHAROID);
635 prefix->consttype = rdatatype;
636 }
637
638 if (pstatus == Pattern_Prefix_Exact)
639 {
640 /*
641 * Pattern specifies an exact match, so estimate as for '='
642 */
643 result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
644 false, true, false);
645 }
646 else
647 {
648 /*
649 * Not exact-match pattern. If we have a sufficiently large
650 * histogram, estimate selectivity for the histogram part of the
651 * population by counting matches in the histogram. If not, estimate
652 * selectivity of the fixed prefix and remainder of pattern
653 * separately, then combine the two to get an estimate of the
654 * selectivity for the part of the column population represented by
655 * the histogram. (For small histograms, we combine these
656 * approaches.)
657 *
658 * We then add up data for any most-common-values values; these are
659 * not in the histogram population, and we can get exact answers for
660 * them by applying the pattern operator, so there's no reason to
661 * approximate. (If the MCVs cover a significant part of the total
662 * population, this gives us a big leg up in accuracy.)
663 */
664 Selectivity selec;
665 int hist_size;
666 FmgrInfo opproc;
667 double mcv_selec,
668 sumcommon;
669
670 /* Try to use the histogram entries to get selectivity */
671 if (!OidIsValid(opfuncid))
672 opfuncid = get_opcode(oprid);
673 fmgr_info(opfuncid, &opproc);
674
675 selec = histogram_selectivity(&vardata, &opproc, collation,
676 constval, true,
677 10, 1, &hist_size);
678
679 /* If not at least 100 entries, use the heuristic method */
680 if (hist_size < 100)
681 {
682 Selectivity heursel;
684
685 if (pstatus == Pattern_Prefix_Partial)
687 eqopr, ltopr, geopr,
688 collation,
689 prefix);
690 else
691 prefixsel = 1.0;
692 heursel = prefixsel * rest_selec;
693
694 if (selec < 0) /* fewer than 10 histogram entries? */
695 selec = heursel;
696 else
697 {
698 /*
699 * For histogram sizes from 10 to 100, we combine the
700 * histogram and heuristic selectivities, putting increasingly
701 * more trust in the histogram for larger sizes.
702 */
703 double hist_weight = hist_size / 100.0;
704
705 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
706 }
707 }
708
709 /* In any case, don't believe extremely small or large estimates. */
710 if (selec < 0.0001)
711 selec = 0.0001;
712 else if (selec > 0.9999)
713 selec = 0.9999;
714
715 /*
716 * If we have most-common-values info, add up the fractions of the MCV
717 * entries that satisfy MCV OP PATTERN. These fractions contribute
718 * directly to the result selectivity. Also add up the total fraction
719 * represented by MCV entries.
720 */
721 mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
722 constval, true,
723 &sumcommon);
724
725 /*
726 * Now merge the results from the MCV and histogram calculations,
727 * realizing that the histogram covers only the non-null values that
728 * are not listed in MCV.
729 */
730 selec *= 1.0 - nullfrac - sumcommon;
731 selec += mcv_selec;
732 result = selec;
733 }
734
735 /* now adjust if we wanted not-match rather than match */
736 if (negate)
737 result = 1.0 - result - nullfrac;
738
739 /* result should be in range, but make sure... */
740 CLAMP_PROBABILITY(result);
741
742 if (prefix)
743 {
744 pfree(DatumGetPointer(prefix->constvalue));
745 pfree(prefix);
746 }
747
748 ReleaseVariableStats(vardata);
749
750 return result;
751}
752
753/*
754 * Fix impedance mismatch between SQL-callable functions and patternsel_common
755 */
756static double
758{
760 Oid operator = PG_GETARG_OID(1);
762 int varRelid = PG_GETARG_INT32(3);
763 Oid collation = PG_GET_COLLATION();
764
765 /*
766 * If this is for a NOT LIKE or similar operator, get the corresponding
767 * positive-match operator and work with that.
768 */
769 if (negate)
770 {
771 operator = get_negator(operator);
772 if (!OidIsValid(operator))
773 elog(ERROR, "patternsel called for operator without a negator");
774 }
775
776 return patternsel_common(root,
777 operator,
779 args,
780 varRelid,
781 collation,
782 ptype,
783 negate);
784}
785
786/*
787 * regexeqsel - Selectivity of regular-expression pattern match.
788 */
789Datum
791{
793}
794
795/*
796 * icregexeqsel - Selectivity of case-insensitive regex match.
797 */
798Datum
800{
802}
803
804/*
805 * likesel - Selectivity of LIKE pattern match.
806 */
807Datum
809{
811}
812
813/*
814 * prefixsel - selectivity of prefix operator
815 */
816Datum
818{
820}
821
822/*
823 *
824 * iclikesel - Selectivity of ILIKE pattern match.
825 */
826Datum
828{
830}
831
832/*
833 * regexnesel - Selectivity of regular-expression pattern non-match.
834 */
835Datum
837{
839}
840
841/*
842 * icregexnesel - Selectivity of case-insensitive regex non-match.
843 */
844Datum
846{
848}
849
850/*
851 * nlikesel - Selectivity of LIKE pattern non-match.
852 */
853Datum
855{
857}
858
859/*
860 * icnlikesel - Selectivity of ILIKE pattern non-match.
861 */
862Datum
864{
866}
867
868/*
869 * patternjoinsel - Generic code for pattern-match join selectivity.
870 */
871static double
873{
874 /* For the moment we just punt. */
875 return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
876}
877
878/*
879 * regexeqjoinsel - Join selectivity of regular-expression pattern match.
880 */
881Datum
883{
885}
886
887/*
888 * icregexeqjoinsel - Join selectivity of case-insensitive regex match.
889 */
890Datum
892{
894}
895
896/*
897 * likejoinsel - Join selectivity of LIKE pattern match.
898 */
899Datum
901{
903}
904
905/*
906 * prefixjoinsel - Join selectivity of prefix operator
907 */
908Datum
910{
912}
913
914/*
915 * iclikejoinsel - Join selectivity of ILIKE pattern match.
916 */
917Datum
919{
921}
922
923/*
924 * regexnejoinsel - Join selectivity of regex non-match.
925 */
926Datum
928{
930}
931
932/*
933 * icregexnejoinsel - Join selectivity of case-insensitive regex non-match.
934 */
935Datum
937{
939}
940
941/*
942 * nlikejoinsel - Join selectivity of LIKE pattern non-match.
943 */
944Datum
946{
948}
949
950/*
951 * icnlikejoinsel - Join selectivity of ILIKE pattern non-match.
952 */
953Datum
955{
957}
958
959
960/*-------------------------------------------------------------------------
961 *
962 * Pattern analysis functions
963 *
964 * These routines support analysis of LIKE and regular-expression patterns
965 * by the planner/optimizer. It's important that they agree with the
966 * regular-expression code in backend/regex/ and the LIKE code in
967 * backend/utils/adt/like.c. Also, the computation of the fixed prefix
968 * must be conservative: if we report a string longer than the true fixed
969 * prefix, the query may produce actually wrong answers, rather than just
970 * getting a bad selectivity estimate!
971 *
972 *-------------------------------------------------------------------------
973 */
974
975/*
976 * Extract the fixed prefix, if any, for a pattern.
977 *
978 * *prefix is set to a palloc'd prefix string (in the form of a Const node),
979 * or to NULL if no fixed prefix exists for the pattern.
980 * If rest_selec is not NULL, *rest_selec is set to an estimate of the
981 * selectivity of the remainder of the pattern (without any fixed prefix).
982 * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
983 *
984 * The return value distinguishes no fixed prefix, a partial prefix,
985 * or an exact-match-only pattern.
986 */
987
989like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
990 Const **prefix_const, Selectivity *rest_selec)
991{
992 char *match;
993 char *patt;
994 int pattlen;
995 Oid typeid = patt_const->consttype;
996 int pos,
997 match_pos;
998 bool is_multibyte = (pg_database_encoding_max_length() > 1);
1000
1001 /* the right-hand const is type text or bytea */
1002 Assert(typeid == BYTEAOID || typeid == TEXTOID);
1003
1004 if (case_insensitive)
1005 {
1006 if (typeid == BYTEAOID)
1007 ereport(ERROR,
1008 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1009 errmsg("case insensitive matching not supported on type bytea")));
1010
1011 if (!OidIsValid(collation))
1012 {
1013 /*
1014 * This typically means that the parser could not resolve a
1015 * conflict of implicit collations, so report it that way.
1016 */
1017 ereport(ERROR,
1018 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1019 errmsg("could not determine which collation to use for ILIKE"),
1020 errhint("Use the COLLATE clause to set the collation explicitly.")));
1021 }
1022
1024 }
1025
1026 if (typeid != BYTEAOID)
1027 {
1028 patt = TextDatumGetCString(patt_const->constvalue);
1029 pattlen = strlen(patt);
1030 }
1031 else
1032 {
1033 bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
1034
1035 pattlen = VARSIZE_ANY_EXHDR(bstr);
1036 patt = (char *) palloc(pattlen);
1037 memcpy(patt, VARDATA_ANY(bstr), pattlen);
1038 Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
1039 }
1040
1041 match = palloc(pattlen + 1);
1042 match_pos = 0;
1043 for (pos = 0; pos < pattlen; pos++)
1044 {
1045 /* % and _ are wildcard characters in LIKE */
1046 if (patt[pos] == '%' ||
1047 patt[pos] == '_')
1048 break;
1049
1050 /* Backslash escapes the next character */
1051 if (patt[pos] == '\\')
1052 {
1053 pos++;
1054 if (pos >= pattlen)
1055 break;
1056 }
1057
1058 /* Stop if case-varying character (it's sort of a wildcard) */
1059 if (case_insensitive &&
1060 pattern_char_isalpha(patt[pos], is_multibyte, locale))
1061 break;
1062
1063 match[match_pos++] = patt[pos];
1064 }
1065
1066 match[match_pos] = '\0';
1067
1068 if (typeid != BYTEAOID)
1069 *prefix_const = string_to_const(match, typeid);
1070 else
1071 *prefix_const = string_to_bytea_const(match, match_pos);
1072
1073 if (rest_selec != NULL)
1074 *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
1075 case_insensitive);
1076
1077 pfree(patt);
1078 pfree(match);
1079
1080 /* in LIKE, an empty pattern is an exact match! */
1081 if (pos == pattlen)
1082 return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1083
1084 if (match_pos > 0)
1086
1087 return Pattern_Prefix_None;
1088}
1089
1091regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
1092 Const **prefix_const, Selectivity *rest_selec)
1093{
1094 Oid typeid = patt_const->consttype;
1095 char *prefix;
1096 bool exact;
1097
1098 /*
1099 * Should be unnecessary, there are no bytea regex operators defined. As
1100 * such, it should be noted that the rest of this function has *not* been
1101 * made safe for binary (possibly NULL containing) strings.
1102 */
1103 if (typeid == BYTEAOID)
1104 ereport(ERROR,
1105 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1106 errmsg("regular-expression matching not supported on type bytea")));
1107
1108 /* Use the regexp machinery to extract the prefix, if any */
1109 prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
1110 case_insensitive, collation,
1111 &exact);
1112
1113 if (prefix == NULL)
1114 {
1115 *prefix_const = NULL;
1116
1117 if (rest_selec != NULL)
1118 {
1119 char *patt = TextDatumGetCString(patt_const->constvalue);
1120
1121 *rest_selec = regex_selectivity(patt, strlen(patt),
1122 case_insensitive,
1123 0);
1124 pfree(patt);
1125 }
1126
1127 return Pattern_Prefix_None;
1128 }
1129
1130 *prefix_const = string_to_const(prefix, typeid);
1131
1132 if (rest_selec != NULL)
1133 {
1134 if (exact)
1135 {
1136 /* Exact match, so there's no additional selectivity */
1137 *rest_selec = 1.0;
1138 }
1139 else
1140 {
1141 char *patt = TextDatumGetCString(patt_const->constvalue);
1142
1143 *rest_selec = regex_selectivity(patt, strlen(patt),
1144 case_insensitive,
1145 strlen(prefix));
1146 pfree(patt);
1147 }
1148 }
1149
1150 pfree(prefix);
1151
1152 if (exact)
1153 return Pattern_Prefix_Exact; /* pattern specifies exact match */
1154 else
1156}
1157
1160 Const **prefix, Selectivity *rest_selec)
1161{
1162 Pattern_Prefix_Status result;
1163
1164 switch (ptype)
1165 {
1166 case Pattern_Type_Like:
1167 result = like_fixed_prefix(patt, false, collation,
1168 prefix, rest_selec);
1169 break;
1171 result = like_fixed_prefix(patt, true, collation,
1172 prefix, rest_selec);
1173 break;
1174 case Pattern_Type_Regex:
1175 result = regex_fixed_prefix(patt, false, collation,
1176 prefix, rest_selec);
1177 break;
1179 result = regex_fixed_prefix(patt, true, collation,
1180 prefix, rest_selec);
1181 break;
1183 /* Prefix type work is trivial. */
1184 result = Pattern_Prefix_Partial;
1185 *prefix = makeConst(patt->consttype,
1186 patt->consttypmod,
1187 patt->constcollid,
1188 patt->constlen,
1189 datumCopy(patt->constvalue,
1190 patt->constbyval,
1191 patt->constlen),
1192 patt->constisnull,
1193 patt->constbyval);
1194 if (rest_selec != NULL)
1195 *rest_selec = 1.0; /* all */
1196 break;
1197 default:
1198 elog(ERROR, "unrecognized ptype: %d", (int) ptype);
1199 result = Pattern_Prefix_None; /* keep compiler quiet */
1200 break;
1201 }
1202 return result;
1203}
1204
1205/*
1206 * Estimate the selectivity of a fixed prefix for a pattern match.
1207 *
1208 * A fixed prefix "foo" is estimated as the selectivity of the expression
1209 * "variable >= 'foo' AND variable < 'fop'".
1210 *
1211 * The selectivity estimate is with respect to the portion of the column
1212 * population represented by the histogram --- the caller must fold this
1213 * together with info about MCVs and NULLs.
1214 *
1215 * We use the given comparison operators and collation to do the estimation.
1216 * The given variable and Const must be of the associated datatype(s).
1217 *
1218 * XXX Note: we make use of the upper bound to estimate operator selectivity
1219 * even if the locale is such that we cannot rely on the upper-bound string.
1220 * The selectivity only needs to be approximately right anyway, so it seems
1221 * more useful to use the upper-bound code than not.
1222 */
1223static Selectivity
1225 Oid eqopr, Oid ltopr, Oid geopr,
1226 Oid collation,
1227 Const *prefixcon)
1228{
1230 FmgrInfo opproc;
1231 Const *greaterstrcon;
1232 Selectivity eq_sel;
1233
1234 /* Estimate the selectivity of "x >= prefix" */
1235 fmgr_info(get_opcode(geopr), &opproc);
1236
1238 geopr, &opproc, true, true,
1239 collation,
1240 prefixcon->constvalue,
1241 prefixcon->consttype);
1242
1243 if (prefixsel < 0.0)
1244 {
1245 /* No histogram is present ... return a suitable default estimate */
1246 return DEFAULT_MATCH_SEL;
1247 }
1248
1249 /*
1250 * If we can create a string larger than the prefix, say "x < greaterstr".
1251 */
1252 fmgr_info(get_opcode(ltopr), &opproc);
1253 greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
1254 if (greaterstrcon)
1255 {
1256 Selectivity topsel;
1257
1258 topsel = ineq_histogram_selectivity(root, vardata,
1259 ltopr, &opproc, false, false,
1260 collation,
1261 greaterstrcon->constvalue,
1262 greaterstrcon->consttype);
1263
1264 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1265 Assert(topsel >= 0.0);
1266
1267 /*
1268 * Merge the two selectivities in the same way as for a range query
1269 * (see clauselist_selectivity()). Note that we don't need to worry
1270 * about double-exclusion of nulls, since ineq_histogram_selectivity
1271 * doesn't count those anyway.
1272 */
1273 prefixsel = topsel + prefixsel - 1.0;
1274 }
1275
1276 /*
1277 * If the prefix is long then the two bounding values might be too close
1278 * together for the histogram to distinguish them usefully, resulting in a
1279 * zero estimate (plus or minus roundoff error). To avoid returning a
1280 * ridiculously small estimate, compute the estimated selectivity for
1281 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1282 * estimate should be at least that.)
1283 *
1284 * We apply this even if we couldn't make a greater string. That case
1285 * suggests that the prefix is near the maximum possible, and thus
1286 * probably off the end of the histogram, and thus we probably got a very
1287 * small estimate from the >= condition; so we still need to clamp.
1288 */
1289 eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1290 false, true, false);
1291
1292 prefixsel = Max(prefixsel, eq_sel);
1293
1294 return prefixsel;
1295}
1296
1297
1298/*
1299 * Estimate the selectivity of a pattern of the specified type.
1300 * Note that any fixed prefix of the pattern will have been removed already,
1301 * so actually we may be looking at just a fragment of the pattern.
1302 *
1303 * For now, we use a very simplistic approach: fixed characters reduce the
1304 * selectivity a good deal, character ranges reduce it a little,
1305 * wildcards (such as % for LIKE or .* for regex) increase it.
1306 */
1307
1308#define FIXED_CHAR_SEL 0.20 /* about 1/5 */
1309#define CHAR_RANGE_SEL 0.25
1310#define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */
1311#define FULL_WILDCARD_SEL 5.0
1312#define PARTIAL_WILDCARD_SEL 2.0
1313
1314static Selectivity
1315like_selectivity(const char *patt, int pattlen, bool case_insensitive)
1316{
1317 Selectivity sel = 1.0;
1318 int pos;
1319
1320 /* Skip any leading wildcard; it's already factored into initial sel */
1321 for (pos = 0; pos < pattlen; pos++)
1322 {
1323 if (patt[pos] != '%' && patt[pos] != '_')
1324 break;
1325 }
1326
1327 for (; pos < pattlen; pos++)
1328 {
1329 /* % and _ are wildcard characters in LIKE */
1330 if (patt[pos] == '%')
1331 sel *= FULL_WILDCARD_SEL;
1332 else if (patt[pos] == '_')
1333 sel *= ANY_CHAR_SEL;
1334 else if (patt[pos] == '\\')
1335 {
1336 /* Backslash quotes the next character */
1337 pos++;
1338 if (pos >= pattlen)
1339 break;
1340 sel *= FIXED_CHAR_SEL;
1341 }
1342 else
1343 sel *= FIXED_CHAR_SEL;
1344 }
1345 /* Could get sel > 1 if multiple wildcards */
1346 if (sel > 1.0)
1347 sel = 1.0;
1348 return sel;
1349}
1350
1351static Selectivity
1352regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
1353{
1354 Selectivity sel = 1.0;
1355 int paren_depth = 0;
1356 int paren_pos = 0; /* dummy init to keep compiler quiet */
1357 int pos;
1358
1359 /* since this function recurses, it could be driven to stack overflow */
1361
1362 for (pos = 0; pos < pattlen; pos++)
1363 {
1364 if (patt[pos] == '(')
1365 {
1366 if (paren_depth == 0)
1367 paren_pos = pos; /* remember start of parenthesized item */
1368 paren_depth++;
1369 }
1370 else if (patt[pos] == ')' && paren_depth > 0)
1371 {
1372 paren_depth--;
1373 if (paren_depth == 0)
1374 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
1375 pos - (paren_pos + 1),
1376 case_insensitive);
1377 }
1378 else if (patt[pos] == '|' && paren_depth == 0)
1379 {
1380 /*
1381 * If unquoted | is present at paren level 0 in pattern, we have
1382 * multiple alternatives; sum their probabilities.
1383 */
1384 sel += regex_selectivity_sub(patt + (pos + 1),
1385 pattlen - (pos + 1),
1386 case_insensitive);
1387 break; /* rest of pattern is now processed */
1388 }
1389 else if (patt[pos] == '[')
1390 {
1391 bool negclass = false;
1392
1393 if (patt[++pos] == '^')
1394 {
1395 negclass = true;
1396 pos++;
1397 }
1398 if (patt[pos] == ']') /* ']' at start of class is not special */
1399 pos++;
1400 while (pos < pattlen && patt[pos] != ']')
1401 pos++;
1402 if (paren_depth == 0)
1403 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
1404 }
1405 else if (patt[pos] == '.')
1406 {
1407 if (paren_depth == 0)
1408 sel *= ANY_CHAR_SEL;
1409 }
1410 else if (patt[pos] == '*' ||
1411 patt[pos] == '?' ||
1412 patt[pos] == '+')
1413 {
1414 /* Ought to be smarter about quantifiers... */
1415 if (paren_depth == 0)
1416 sel *= PARTIAL_WILDCARD_SEL;
1417 }
1418 else if (patt[pos] == '{')
1419 {
1420 while (pos < pattlen && patt[pos] != '}')
1421 pos++;
1422 if (paren_depth == 0)
1423 sel *= PARTIAL_WILDCARD_SEL;
1424 }
1425 else if (patt[pos] == '\\')
1426 {
1427 /* backslash quotes the next character */
1428 pos++;
1429 if (pos >= pattlen)
1430 break;
1431 if (paren_depth == 0)
1432 sel *= FIXED_CHAR_SEL;
1433 }
1434 else
1435 {
1436 if (paren_depth == 0)
1437 sel *= FIXED_CHAR_SEL;
1438 }
1439 }
1440 /* Could get sel > 1 if multiple wildcards */
1441 if (sel > 1.0)
1442 sel = 1.0;
1443 return sel;
1444}
1445
1446static Selectivity
1447regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
1448 int fixed_prefix_len)
1449{
1450 Selectivity sel;
1451
1452 /* If patt doesn't end with $, consider it to have a trailing wildcard */
1453 if (pattlen > 0 && patt[pattlen - 1] == '$' &&
1454 (pattlen == 1 || patt[pattlen - 2] != '\\'))
1455 {
1456 /* has trailing $ */
1457 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
1458 }
1459 else
1460 {
1461 /* no trailing $ */
1462 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
1463 sel *= FULL_WILDCARD_SEL;
1464 }
1465
1466 /*
1467 * If there's a fixed prefix, discount its selectivity. We have to be
1468 * careful here since a very long prefix could result in pow's result
1469 * underflowing to zero (in which case "sel" probably has as well).
1470 */
1471 if (fixed_prefix_len > 0)
1472 {
1473 double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
1474
1475 if (prefixsel > 0.0)
1476 sel /= prefixsel;
1477 }
1478
1479 /* Make sure result stays in range */
1480 CLAMP_PROBABILITY(sel);
1481 return sel;
1482}
1483
1484/*
1485 * Check whether char is a letter (and, hence, subject to case-folding)
1486 *
1487 * In multibyte character sets or with ICU, we can't use isalpha, and it does
1488 * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
1489 * Instead, just assume any non-ASCII char is potentially case-varying, and
1490 * hard-wire knowledge of which ASCII chars are letters.
1491 */
1492static int
1493pattern_char_isalpha(char c, bool is_multibyte,
1495{
1496 if (locale->ctype_is_c)
1497 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1498 else if (is_multibyte && IS_HIGHBIT_SET(c))
1499 return true;
1500 else if (locale->provider != COLLPROVIDER_LIBC)
1501 return IS_HIGHBIT_SET(c) ||
1502 (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1503 else
1504 return isalpha_l((unsigned char) c, locale->info.lt);
1505}
1506
1507
1508/*
1509 * For bytea, the increment function need only increment the current byte
1510 * (there are no multibyte characters to worry about).
1511 */
1512static bool
1513byte_increment(unsigned char *ptr, int len)
1514{
1515 if (*ptr >= 255)
1516 return false;
1517 (*ptr)++;
1518 return true;
1519}
1520
1521/*
1522 * Try to generate a string greater than the given string or any
1523 * string it is a prefix of. If successful, return a palloc'd string
1524 * in the form of a Const node; else return NULL.
1525 *
1526 * The caller must provide the appropriate "less than" comparison function
1527 * for testing the strings, along with the collation to use.
1528 *
1529 * The key requirement here is that given a prefix string, say "foo",
1530 * we must be able to generate another string "fop" that is greater than
1531 * all strings "foobar" starting with "foo". We can test that we have
1532 * generated a string greater than the prefix string, but in non-C collations
1533 * that is not a bulletproof guarantee that an extension of the string might
1534 * not sort after it; an example is that "foo " is less than "foo!", but it
1535 * is not clear that a "dictionary" sort ordering will consider "foo!" less
1536 * than "foo bar". CAUTION: Therefore, this function should be used only for
1537 * estimation purposes when working in a non-C collation.
1538 *
1539 * To try to catch most cases where an extended string might otherwise sort
1540 * before the result value, we determine which of the strings "Z", "z", "y",
1541 * and "9" is seen as largest by the collation, and append that to the given
1542 * prefix before trying to find a string that compares as larger.
1543 *
1544 * To search for a greater string, we repeatedly "increment" the rightmost
1545 * character, using an encoding-specific character incrementer function.
1546 * When it's no longer possible to increment the last character, we truncate
1547 * off that character and start incrementing the next-to-rightmost.
1548 * For example, if "z" were the last character in the sort order, then we
1549 * could produce "foo" as a string greater than "fonz".
1550 *
1551 * This could be rather slow in the worst case, but in most cases we
1552 * won't have to try more than one or two strings before succeeding.
1553 *
1554 * Note that it's important for the character incrementer not to be too anal
1555 * about producing every possible character code, since in some cases the only
1556 * way to get a larger string is to increment a previous character position.
1557 * So we don't want to spend too much time trying every possible character
1558 * code at the last position. A good rule of thumb is to be sure that we
1559 * don't try more than 256*K values for a K-byte character (and definitely
1560 * not 256^K, which is what an exhaustive search would approach).
1561 */
1562static Const *
1563make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
1564{
1565 Oid datatype = str_const->consttype;
1566 char *workstr;
1567 int len;
1568 Datum cmpstr;
1569 char *cmptxt = NULL;
1571
1572 /*
1573 * Get a modifiable copy of the prefix string in C-string format, and set
1574 * up the string we will compare to as a Datum. In C locale this can just
1575 * be the given prefix string, otherwise we need to add a suffix. Type
1576 * BYTEA sorts bytewise so it never needs a suffix either.
1577 */
1578 if (datatype == BYTEAOID)
1579 {
1580 bytea *bstr = DatumGetByteaPP(str_const->constvalue);
1581
1582 len = VARSIZE_ANY_EXHDR(bstr);
1583 workstr = (char *) palloc(len);
1584 memcpy(workstr, VARDATA_ANY(bstr), len);
1585 Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
1586 cmpstr = str_const->constvalue;
1587 }
1588 else
1589 {
1590 if (datatype == NAMEOID)
1592 str_const->constvalue));
1593 else
1594 workstr = TextDatumGetCString(str_const->constvalue);
1595 len = strlen(workstr);
1596 if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
1597 cmpstr = str_const->constvalue;
1598 else
1599 {
1600 /* If first time through, determine the suffix to use */
1601 static char suffixchar = 0;
1602 static Oid suffixcollation = 0;
1603
1604 if (!suffixchar || suffixcollation != collation)
1605 {
1606 char *best;
1607
1608 best = "Z";
1609 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
1610 best = "z";
1611 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
1612 best = "y";
1613 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
1614 best = "9";
1615 suffixchar = *best;
1616 suffixcollation = collation;
1617 }
1618
1619 /* And build the string to compare to */
1620 if (datatype == NAMEOID)
1621 {
1622 cmptxt = palloc(len + 2);
1623 memcpy(cmptxt, workstr, len);
1624 cmptxt[len] = suffixchar;
1625 cmptxt[len + 1] = '\0';
1626 cmpstr = PointerGetDatum(cmptxt);
1627 }
1628 else
1629 {
1630 cmptxt = palloc(VARHDRSZ + len + 1);
1631 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
1632 memcpy(VARDATA(cmptxt), workstr, len);
1633 *(VARDATA(cmptxt) + len) = suffixchar;
1634 cmpstr = PointerGetDatum(cmptxt);
1635 }
1636 }
1637 }
1638
1639 /* Select appropriate character-incrementer function */
1640 if (datatype == BYTEAOID)
1641 charinc = byte_increment;
1642 else
1644
1645 /* And search ... */
1646 while (len > 0)
1647 {
1648 int charlen;
1649 unsigned char *lastchar;
1650
1651 /* Identify the last character --- for bytea, just the last byte */
1652 if (datatype == BYTEAOID)
1653 charlen = 1;
1654 else
1655 charlen = len - pg_mbcliplen(workstr, len, len - 1);
1656 lastchar = (unsigned char *) (workstr + len - charlen);
1657
1658 /*
1659 * Try to generate a larger string by incrementing the last character
1660 * (for BYTEA, we treat each byte as a character).
1661 *
1662 * Note: the incrementer function is expected to return true if it's
1663 * generated a valid-per-the-encoding new character, otherwise false.
1664 * The contents of the character on false return are unspecified.
1665 */
1666 while (charinc(lastchar, charlen))
1667 {
1668 Const *workstr_const;
1669
1670 if (datatype == BYTEAOID)
1671 workstr_const = string_to_bytea_const(workstr, len);
1672 else
1673 workstr_const = string_to_const(workstr, datatype);
1674
1676 collation,
1677 cmpstr,
1678 workstr_const->constvalue)))
1679 {
1680 /* Successfully made a string larger than cmpstr */
1681 if (cmptxt)
1682 pfree(cmptxt);
1683 pfree(workstr);
1684 return workstr_const;
1685 }
1686
1687 /* No good, release unusable value and try again */
1688 pfree(DatumGetPointer(workstr_const->constvalue));
1689 pfree(workstr_const);
1690 }
1691
1692 /*
1693 * No luck here, so truncate off the last character and try to
1694 * increment the next one.
1695 */
1696 len -= charlen;
1697 workstr[len] = '\0';
1698 }
1699
1700 /* Failed... */
1701 if (cmptxt)
1702 pfree(cmptxt);
1703 pfree(workstr);
1704
1705 return NULL;
1706}
1707
1708/*
1709 * Generate a Datum of the appropriate type from a C string.
1710 * Note that all of the supported types are pass-by-ref, so the
1711 * returned value should be pfree'd if no longer needed.
1712 */
1713static Datum
1714string_to_datum(const char *str, Oid datatype)
1715{
1716 Assert(str != NULL);
1717
1718 /*
1719 * We cheat a little by assuming that CStringGetTextDatum() will do for
1720 * bpchar and varchar constants too...
1721 */
1722 if (datatype == NAMEOID)
1724 else if (datatype == BYTEAOID)
1726 else
1727 return CStringGetTextDatum(str);
1728}
1729
1730/*
1731 * Generate a Const node of the appropriate type from a C string.
1732 */
1733static Const *
1734string_to_const(const char *str, Oid datatype)
1735{
1736 Datum conval = string_to_datum(str, datatype);
1737 Oid collation;
1738 int constlen;
1739
1740 /*
1741 * We only need to support a few datatypes here, so hard-wire properties
1742 * instead of incurring the expense of catalog lookups.
1743 */
1744 switch (datatype)
1745 {
1746 case TEXTOID:
1747 case VARCHAROID:
1748 case BPCHAROID:
1749 collation = DEFAULT_COLLATION_OID;
1750 constlen = -1;
1751 break;
1752
1753 case NAMEOID:
1754 collation = C_COLLATION_OID;
1755 constlen = NAMEDATALEN;
1756 break;
1757
1758 case BYTEAOID:
1759 collation = InvalidOid;
1760 constlen = -1;
1761 break;
1762
1763 default:
1764 elog(ERROR, "unexpected datatype in string_to_const: %u",
1765 datatype);
1766 return NULL;
1767 }
1768
1769 return makeConst(datatype, -1, collation, constlen,
1770 conval, false, false);
1771}
1772
1773/*
1774 * Generate a Const node of bytea type from a binary C string and a length.
1775 */
1776static Const *
1777string_to_bytea_const(const char *str, size_t str_len)
1778{
1779 bytea *bstr = palloc(VARHDRSZ + str_len);
1780 Datum conval;
1781
1782 memcpy(VARDATA(bstr), str, str_len);
1783 SET_VARSIZE(bstr, VARHDRSZ + str_len);
1784 conval = PointerGetDatum(bstr);
1785
1786 return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
1787}
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1109
#define Max(x, y)
Definition: c.h:952
char * Pointer
Definition: c.h:476
#define VARHDRSZ
Definition: c.h:646
#define Assert(condition)
Definition: c.h:812
#define OidIsValid(objectId)
Definition: c.h:729
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
const char * str
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
static char * locale
Definition: initdb.c:140
Datum icregexnesel(PG_FUNCTION_ARGS)
Definition: like_support.c:845
Datum regexnesel(PG_FUNCTION_ARGS)
Definition: like_support.c:836
static Node * like_regex_support(Node *rawreq, Pattern_Type ptype)
Definition: like_support.c:156
Datum iclikesel(PG_FUNCTION_ARGS)
Definition: like_support.c:827
Datum texticregexeq_support(PG_FUNCTION_ARGS)
Definition: like_support.c:139
static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
#define FULL_WILDCARD_SEL
Datum iclikejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:918
Datum prefixjoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:909
#define ANY_CHAR_SEL
static double patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:757
Datum regexeqsel(PG_FUNCTION_ARGS)
Definition: like_support.c:790
static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
Datum likejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:900
static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive)
Datum icregexnejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:936
static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Definition: like_support.c:989
static List * match_pattern_prefix(Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
Definition: like_support.c:241
Datum nlikejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:945
static Datum string_to_datum(const char *str, Oid datatype)
Datum icnlikejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:954
static Selectivity regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
Datum texticlike_support(PG_FUNCTION_ARGS)
Definition: like_support.c:123
Datum nlikesel(PG_FUNCTION_ARGS)
Definition: like_support.c:854
static int pattern_char_isalpha(char c, bool is_multibyte, pg_locale_t locale)
static Const * string_to_const(const char *str, Oid datatype)
#define PARTIAL_WILDCARD_SEL
Datum text_starts_with_support(PG_FUNCTION_ARGS)
Definition: like_support.c:147
#define CHAR_RANGE_SEL
static Const * string_to_bytea_const(const char *str, size_t str_len)
static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Pattern_Type
Definition: like_support.c:59
@ Pattern_Type_Prefix
Definition: like_support.c:64
@ Pattern_Type_Regex_IC
Definition: like_support.c:63
@ Pattern_Type_Like
Definition: like_support.c:60
@ Pattern_Type_Regex
Definition: like_support.c:62
@ Pattern_Type_Like_IC
Definition: like_support.c:61
Pattern_Prefix_Status
Definition: like_support.c:68
@ Pattern_Prefix_Partial
Definition: like_support.c:69
@ Pattern_Prefix_None
Definition: like_support.c:69
@ Pattern_Prefix_Exact
Definition: like_support.c:69
static Const * make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
Datum icregexeqsel(PG_FUNCTION_ARGS)
Definition: like_support.c:799
#define FIXED_CHAR_SEL
Datum textlike_support(PG_FUNCTION_ARGS)
Definition: like_support.c:115
Datum regexnejoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:927
static bool byte_increment(unsigned char *ptr, int len)
static double patternsel_common(PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
Definition: like_support.c:483
static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
Datum icnlikesel(PG_FUNCTION_ARGS)
Definition: like_support.c:863
Datum textregexeq_support(PG_FUNCTION_ARGS)
Definition: like_support.c:131
Datum prefixsel(PG_FUNCTION_ARGS)
Definition: like_support.c:817
static double patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:872
Datum likesel(PG_FUNCTION_ARGS)
Definition: like_support.c:808
Datum regexeqjoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:882
Datum icregexeqjoinsel(PG_FUNCTION_ARGS)
Definition: like_support.c:891
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1285
bool get_collation_isdeterministic(Oid colloid)
Definition: lsyscache.c:1054
bool op_in_opfamily(Oid opno, Oid opfamily)
Definition: lsyscache.c:66
Oid get_negator(Oid opno)
Definition: lsyscache.c:1533
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: makefuncs.c:651
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:301
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
Definition: mbutils.c:1523
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
Datum nameout(PG_FUNCTION_ARGS)
Definition: name.c:71
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
Oid oprid(Operator op)
Definition: parse_oper.c:238
#define NAMEDATALEN
const void size_t len
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1341
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
Definition: pg_wchar.h:370
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:350
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
char * c
char * s1
tree ctl root
Definition: radixtree.h:1888
char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact)
Definition: regexp.c:1953
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4894
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:295
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:732
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
Definition: selfuncs.c:1041
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:823
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_MATCH_SEL
Definition: selfuncs.h:46
void check_stack_depth(void)
Definition: stack_depth.c:95
Oid consttype
Definition: primnodes.h:312
Definition: fmgr.h:57
List * args
Definition: primnodes.h:768
Definition: pg_list.h:54
Definition: nodes.h:129
List * args
Definition: primnodes.h:836
struct PlannerInfo * root
Definition: supportnodes.h:96
HeapTuple statsTuple
Definition: selfuncs.h:89
Definition: c.h:641
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1538
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:290
#define isalpha_l
Definition: win32_port.h:449