PostgreSQL Source Code git master
like_support.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
#include "utils/varlena.h"
Include dependency graph for like_support.c:

Go to the source code of this file.

Macros

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */
 
#define CHAR_RANGE_SEL   0.25
 
#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */
 
#define FULL_WILDCARD_SEL   5.0
 
#define PARTIAL_WILDCARD_SEL   2.0
 

Enumerations

enum  Pattern_Type {
  Pattern_Type_Like , Pattern_Type_Like_IC , Pattern_Type_Regex , Pattern_Type_Regex_IC ,
  Pattern_Type_Prefix
}
 
enum  Pattern_Prefix_Status { Pattern_Prefix_None , Pattern_Prefix_Partial , Pattern_Prefix_Exact }
 

Functions

static Nodelike_regex_support (Node *rawreq, Pattern_Type ptype)
 
static Listmatch_pattern_prefix (Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
 
static double patternsel_common (PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
 
static Pattern_Prefix_Status pattern_fixed_prefix (Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
 
static Selectivity prefix_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
 
static Selectivity like_selectivity (const char *patt, int pattlen, bool case_insensitive)
 
static Selectivity regex_selectivity (const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
 
static Constmake_greater_string (const Const *str_const, FmgrInfo *ltproc, Oid collation)
 
static Datum string_to_datum (const char *str, Oid datatype)
 
static Conststring_to_const (const char *str, Oid datatype)
 
static Conststring_to_bytea_const (const char *str, size_t str_len)
 
Datum textlike_support (PG_FUNCTION_ARGS)
 
Datum texticlike_support (PG_FUNCTION_ARGS)
 
Datum textregexeq_support (PG_FUNCTION_ARGS)
 
Datum texticregexeq_support (PG_FUNCTION_ARGS)
 
Datum text_starts_with_support (PG_FUNCTION_ARGS)
 
static double patternsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqsel (PG_FUNCTION_ARGS)
 
Datum icregexeqsel (PG_FUNCTION_ARGS)
 
Datum likesel (PG_FUNCTION_ARGS)
 
Datum prefixsel (PG_FUNCTION_ARGS)
 
Datum iclikesel (PG_FUNCTION_ARGS)
 
Datum regexnesel (PG_FUNCTION_ARGS)
 
Datum icregexnesel (PG_FUNCTION_ARGS)
 
Datum nlikesel (PG_FUNCTION_ARGS)
 
Datum icnlikesel (PG_FUNCTION_ARGS)
 
static double patternjoinsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum icregexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum likejoinsel (PG_FUNCTION_ARGS)
 
Datum prefixjoinsel (PG_FUNCTION_ARGS)
 
Datum iclikejoinsel (PG_FUNCTION_ARGS)
 
Datum regexnejoinsel (PG_FUNCTION_ARGS)
 
Datum icregexnejoinsel (PG_FUNCTION_ARGS)
 
Datum nlikejoinsel (PG_FUNCTION_ARGS)
 
Datum icnlikejoinsel (PG_FUNCTION_ARGS)
 
static Pattern_Prefix_Status like_fixed_prefix (Const *patt_const, Const **prefix_const, Selectivity *rest_selec)
 
static Pattern_Prefix_Status like_fixed_prefix_ci (Const *patt_const, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Pattern_Prefix_Status regex_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Selectivity regex_selectivity_sub (const char *patt, int pattlen, bool case_insensitive)
 
static bool byte_increment (unsigned char *ptr, int len)
 

Macro Definition Documentation

◆ ANY_CHAR_SEL

#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */

Definition at line 1383 of file like_support.c.

◆ CHAR_RANGE_SEL

#define CHAR_RANGE_SEL   0.25

Definition at line 1382 of file like_support.c.

◆ FIXED_CHAR_SEL

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */

Definition at line 1381 of file like_support.c.

◆ FULL_WILDCARD_SEL

#define FULL_WILDCARD_SEL   5.0

Definition at line 1384 of file like_support.c.

◆ PARTIAL_WILDCARD_SEL

#define PARTIAL_WILDCARD_SEL   2.0

Definition at line 1385 of file like_support.c.

Enumeration Type Documentation

◆ Pattern_Prefix_Status

Enumerator
Pattern_Prefix_None 
Pattern_Prefix_Partial 
Pattern_Prefix_Exact 

Definition at line 67 of file like_support.c.

68{
Pattern_Prefix_Status
Definition: like_support.c:68
@ Pattern_Prefix_Partial
Definition: like_support.c:69
@ Pattern_Prefix_None
Definition: like_support.c:69
@ Pattern_Prefix_Exact
Definition: like_support.c:69

◆ Pattern_Type

Enumerator
Pattern_Type_Like 
Pattern_Type_Like_IC 
Pattern_Type_Regex 
Pattern_Type_Regex_IC 
Pattern_Type_Prefix 

Definition at line 58 of file like_support.c.

59{
Pattern_Type
Definition: like_support.c:59
@ Pattern_Type_Prefix
Definition: like_support.c:64
@ Pattern_Type_Regex_IC
Definition: like_support.c:63
@ Pattern_Type_Like
Definition: like_support.c:60
@ Pattern_Type_Regex
Definition: like_support.c:62
@ Pattern_Type_Like_IC
Definition: like_support.c:61

Function Documentation

◆ byte_increment()

static bool byte_increment ( unsigned char *  ptr,
int  len 
)
static

Definition at line 1563 of file like_support.c.

1564{
1565 if (*ptr >= 255)
1566 return false;
1567 (*ptr)++;
1568 return true;
1569}

Referenced by make_greater_string().

◆ iclikejoinsel()

Datum iclikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 916 of file like_support.c.

917{
919}
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:369
static double patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:870

References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ iclikesel()

Datum iclikesel ( PG_FUNCTION_ARGS  )

Definition at line 825 of file like_support.c.

826{
828}
static double patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:755

References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icnlikejoinsel()

Datum icnlikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 952 of file like_support.c.

953{
955}

References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icnlikesel()

Datum icnlikesel ( PG_FUNCTION_ARGS  )

Definition at line 861 of file like_support.c.

862{
864}

References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icregexeqjoinsel()

Datum icregexeqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 889 of file like_support.c.

890{
892}

References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icregexeqsel()

Datum icregexeqsel ( PG_FUNCTION_ARGS  )

Definition at line 797 of file like_support.c.

798{
800}

References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icregexnejoinsel()

Datum icregexnejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 934 of file like_support.c.

935{
937}

References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icregexnesel()

Datum icregexnesel ( PG_FUNCTION_ARGS  )

Definition at line 843 of file like_support.c.

844{
846}

References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ like_fixed_prefix()

static Pattern_Prefix_Status like_fixed_prefix ( Const patt_const,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 987 of file like_support.c.

989{
990 char *match;
991 char *patt;
992 int pattlen;
993 Oid typeid = patt_const->consttype;
994 int pos,
995 match_pos;
996
997 /* the right-hand const is type text or bytea */
998 Assert(typeid == BYTEAOID || typeid == TEXTOID);
999
1000 if (typeid != BYTEAOID)
1001 {
1002 patt = TextDatumGetCString(patt_const->constvalue);
1003 pattlen = strlen(patt);
1004 }
1005 else
1006 {
1007 bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
1008
1009 pattlen = VARSIZE_ANY_EXHDR(bstr);
1010 patt = (char *) palloc(pattlen);
1011 memcpy(patt, VARDATA_ANY(bstr), pattlen);
1012 Assert(bstr == DatumGetPointer(patt_const->constvalue));
1013 }
1014
1015 match = palloc(pattlen + 1);
1016 match_pos = 0;
1017 for (pos = 0; pos < pattlen; pos++)
1018 {
1019 /* % and _ are wildcard characters in LIKE */
1020 if (patt[pos] == '%' ||
1021 patt[pos] == '_')
1022 break;
1023
1024 /* Backslash escapes the next character */
1025 if (patt[pos] == '\\')
1026 {
1027 pos++;
1028 if (pos >= pattlen)
1029 break;
1030 }
1031
1032 match[match_pos++] = patt[pos];
1033 }
1034
1035 match[match_pos] = '\0';
1036
1037 if (typeid != BYTEAOID)
1038 *prefix_const = string_to_const(match, typeid);
1039 else
1040 *prefix_const = string_to_bytea_const(match, match_pos);
1041
1042 if (rest_selec != NULL)
1043 *rest_selec = like_selectivity(&patt[pos], pattlen - pos, false);
1044
1045 pfree(patt);
1046 pfree(match);
1047
1048 /* in LIKE, an empty pattern is an exact match! */
1049 if (pos == pattlen)
1050 return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1051
1052 if (match_pos > 0)
1054
1055 return Pattern_Prefix_None;
1056}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define DatumGetByteaPP(X)
Definition: fmgr.h:292
Assert(PointerIsAligned(start, uint64))
static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive)
static Const * string_to_const(const char *str, Oid datatype)
static Const * string_to_bytea_const(const char *str, size_t str_len)
void pfree(void *pointer)
Definition: mcxt.c:1616
void * palloc(Size size)
Definition: mcxt.c:1387
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:342
unsigned int Oid
Definition: postgres_ext.h:32
Oid consttype
Definition: primnodes.h:329
Definition: c.h:712
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486

References Assert(), Const::consttype, DatumGetByteaPP, DatumGetPointer(), like_selectivity(), palloc(), Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by pattern_fixed_prefix().

◆ like_fixed_prefix_ci()

static Pattern_Prefix_Status like_fixed_prefix_ci ( Const patt_const,
Oid  collation,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 1063 of file like_support.c.

1065{
1066 text *val = DatumGetTextPP(patt_const->constvalue);
1067 Oid typeid = patt_const->consttype;
1068 int nbytes = VARSIZE_ANY_EXHDR(val);
1069 int wpos;
1070 pg_wchar *wpatt;
1071 int wpattlen;
1072 pg_wchar *wmatch;
1073 int wmatch_pos = 0;
1074 char *match;
1075 int match_mblen;
1076 pg_locale_t locale = 0;
1077
1078 /* the right-hand const is type text or bytea */
1079 Assert(typeid == BYTEAOID || typeid == TEXTOID);
1080
1081 if (typeid == BYTEAOID)
1082 ereport(ERROR,
1083 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1084 errmsg("case insensitive matching not supported on type bytea")));
1085
1086 if (!OidIsValid(collation))
1087 {
1088 /*
1089 * This typically means that the parser could not resolve a conflict
1090 * of implicit collations, so report it that way.
1091 */
1092 ereport(ERROR,
1093 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1094 errmsg("could not determine which collation to use for ILIKE"),
1095 errhint("Use the COLLATE clause to set the collation explicitly.")));
1096 }
1097
1099
1100 wpatt = palloc((nbytes + 1) * sizeof(pg_wchar));
1101 wpattlen = pg_mb2wchar_with_len(VARDATA_ANY(val), wpatt, nbytes);
1102
1103 wmatch = palloc((nbytes + 1) * sizeof(pg_wchar));
1104 for (wpos = 0; wpos < wpattlen; wpos++)
1105 {
1106 /* % and _ are wildcard characters in LIKE */
1107 if (wpatt[wpos] == '%' ||
1108 wpatt[wpos] == '_')
1109 break;
1110
1111 /* Backslash escapes the next character */
1112 if (wpatt[wpos] == '\\')
1113 {
1114 wpos++;
1115 if (wpos >= wpattlen)
1116 break;
1117 }
1118
1119 /*
1120 * For ILIKE, stop if it's a case-varying character (it's sort of a
1121 * wildcard).
1122 */
1123 if (pg_iswcased(wpatt[wpos], locale))
1124 break;
1125
1126 wmatch[wmatch_pos++] = wpatt[wpos];
1127 }
1128
1129 wmatch[wmatch_pos] = '\0';
1130
1131 match = palloc(pg_database_encoding_max_length() * wmatch_pos + 1);
1132 match_mblen = pg_wchar2mb_with_len(wmatch, match, wmatch_pos);
1133 match[match_mblen] = '\0';
1134 pfree(wmatch);
1135
1136 *prefix_const = string_to_const(match, TEXTOID);
1137 pfree(match);
1138
1139 if (rest_selec != NULL)
1140 {
1141 int wrestlen = wpattlen - wmatch_pos;
1142 char *rest;
1143 int rest_mblen;
1144
1145 rest = palloc(pg_database_encoding_max_length() * wrestlen + 1);
1146 rest_mblen = pg_wchar2mb_with_len(&wpatt[wmatch_pos], rest, wrestlen);
1147
1148 *rest_selec = like_selectivity(rest, rest_mblen, true);
1149 pfree(rest);
1150 }
1151
1152 pfree(wpatt);
1153
1154 /* in LIKE, an empty pattern is an exact match! */
1155 if (wpos == wpattlen)
1156 return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1157
1158 if (wmatch_pos > 0)
1160
1161 return Pattern_Prefix_None;
1162}
#define OidIsValid(objectId)
Definition: c.h:794
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:150
#define DatumGetTextPP(X)
Definition: fmgr.h:293
long val
Definition: informix.c:689
static char * locale
Definition: initdb.c:140
unsigned int pg_wchar
Definition: mbprint.c:31
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:1011
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1549
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:989
bool pg_iswcased(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1615
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1189
#define wpos(wep)
Definition: tsrank.c:27

References Assert(), Const::consttype, DatumGetTextPP, ereport, errcode(), errhint(), errmsg(), ERROR, like_selectivity(), locale, OidIsValid, palloc(), Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), pg_database_encoding_max_length(), pg_iswcased(), pg_mb2wchar_with_len(), pg_newlocale_from_collation(), pg_wchar2mb_with_len(), string_to_const(), val, VARDATA_ANY(), VARSIZE_ANY_EXHDR(), and wpos.

Referenced by pattern_fixed_prefix().

◆ like_regex_support()

static Node * like_regex_support ( Node rawreq,
Pattern_Type  ptype 
)
static

Definition at line 154 of file like_support.c.

155{
156 Node *ret = NULL;
157
158 if (IsA(rawreq, SupportRequestSelectivity))
159 {
160 /*
161 * Make a selectivity estimate for a function call, just as we'd do if
162 * the call was via the corresponding operator.
163 */
166
167 if (req->is_join)
168 {
169 /*
170 * For the moment we just punt. If patternjoinsel is ever
171 * improved to do better, this should be made to call it.
172 */
174 }
175 else
176 {
177 /* Share code with operator restriction selectivity functions */
180 req->funcid,
181 req->args,
182 req->varRelid,
183 req->inputcollid,
184 ptype,
185 false);
186 }
187 req->selectivity = s1;
188 ret = (Node *) req;
189 }
190 else if (IsA(rawreq, SupportRequestIndexCondition))
191 {
192 /* Try to convert operator/function call to index conditions */
194
195 /*
196 * Currently we have no "reverse" match operators with the pattern on
197 * the left, so we only need consider cases with the indexkey on the
198 * left.
199 */
200 if (req->indexarg != 0)
201 return NULL;
202
203 if (is_opclause(req->node))
204 {
205 OpExpr *clause = (OpExpr *) req->node;
206
207 Assert(list_length(clause->args) == 2);
208 ret = (Node *)
210 (Node *) lsecond(clause->args),
211 ptype,
212 clause->inputcollid,
213 req->opfamily,
214 req->indexcollation);
215 }
216 else if (is_funcclause(req->node)) /* be paranoid */
217 {
218 FuncExpr *clause = (FuncExpr *) req->node;
219
220 Assert(list_length(clause->args) == 2);
221 ret = (Node *)
223 (Node *) lsecond(clause->args),
224 ptype,
225 clause->inputcollid,
226 req->opfamily,
227 req->indexcollation);
228 }
229 }
230
231 return ret;
232}
static List * match_pattern_prefix(Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
Definition: like_support.c:239
static double patternsel_common(PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
Definition: like_support.c:481
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
double Selectivity
Definition: nodes.h:260
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
#define InvalidOid
Definition: postgres_ext.h:37
char * s1
#define DEFAULT_MATCH_SEL
Definition: selfuncs.h:46
List * args
Definition: primnodes.h:800
Definition: nodes.h:135
List * args
Definition: primnodes.h:868

References FuncExpr::args, OpExpr::args, SupportRequestSelectivity::args, Assert(), DEFAULT_MATCH_SEL, SupportRequestSelectivity::funcid, SupportRequestIndexCondition::indexarg, SupportRequestIndexCondition::indexcollation, SupportRequestSelectivity::inputcollid, InvalidOid, is_funcclause(), SupportRequestSelectivity::is_join, is_opclause(), IsA, linitial, list_length(), lsecond, match_pattern_prefix(), SupportRequestIndexCondition::node, SupportRequestIndexCondition::opfamily, patternsel_common(), SupportRequestSelectivity::root, s1, SupportRequestSelectivity::selectivity, and SupportRequestSelectivity::varRelid.

Referenced by text_starts_with_support(), texticlike_support(), texticregexeq_support(), textlike_support(), and textregexeq_support().

◆ like_selectivity()

static Selectivity like_selectivity ( const char *  patt,
int  pattlen,
bool  case_insensitive 
)
static

Definition at line 1388 of file like_support.c.

1389{
1390 Selectivity sel = 1.0;
1391 int pos;
1392
1393 /* Skip any leading wildcard; it's already factored into initial sel */
1394 for (pos = 0; pos < pattlen; pos++)
1395 {
1396 if (patt[pos] != '%' && patt[pos] != '_')
1397 break;
1398 }
1399
1400 for (; pos < pattlen; pos++)
1401 {
1402 /* % and _ are wildcard characters in LIKE */
1403 if (patt[pos] == '%')
1404 sel *= FULL_WILDCARD_SEL;
1405 else if (patt[pos] == '_')
1406 sel *= ANY_CHAR_SEL;
1407 else if (patt[pos] == '\\')
1408 {
1409 /* Backslash quotes the next character */
1410 pos++;
1411 if (pos >= pattlen)
1412 break;
1413 sel *= FIXED_CHAR_SEL;
1414 }
1415 else
1416 sel *= FIXED_CHAR_SEL;
1417 }
1418 /* Could get sel > 1 if multiple wildcards */
1419 if (sel > 1.0)
1420 sel = 1.0;
1421 return sel;
1422}
#define FULL_WILDCARD_SEL
#define ANY_CHAR_SEL
#define FIXED_CHAR_SEL

References ANY_CHAR_SEL, FIXED_CHAR_SEL, and FULL_WILDCARD_SEL.

Referenced by like_fixed_prefix(), and like_fixed_prefix_ci().

◆ likejoinsel()

Datum likejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 898 of file like_support.c.

899{
901}

References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ likesel()

Datum likesel ( PG_FUNCTION_ARGS  )

Definition at line 806 of file like_support.c.

807{
809}

References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.

◆ make_greater_string()

static Const * make_greater_string ( const Const str_const,
FmgrInfo ltproc,
Oid  collation 
)
static

Definition at line 1613 of file like_support.c.

1614{
1615 Oid datatype = str_const->consttype;
1616 char *workstr;
1617 int len;
1618 Datum cmpstr;
1619 char *cmptxt = NULL;
1621
1622 /*
1623 * Get a modifiable copy of the prefix string in C-string format, and set
1624 * up the string we will compare to as a Datum. In C locale this can just
1625 * be the given prefix string, otherwise we need to add a suffix. Type
1626 * BYTEA sorts bytewise so it never needs a suffix either.
1627 */
1628 if (datatype == BYTEAOID)
1629 {
1630 bytea *bstr = DatumGetByteaPP(str_const->constvalue);
1631
1632 len = VARSIZE_ANY_EXHDR(bstr);
1633 workstr = (char *) palloc(len);
1634 memcpy(workstr, VARDATA_ANY(bstr), len);
1635 Assert(bstr == DatumGetPointer(str_const->constvalue));
1636 cmpstr = str_const->constvalue;
1637 }
1638 else
1639 {
1640 if (datatype == NAMEOID)
1642 str_const->constvalue));
1643 else
1644 workstr = TextDatumGetCString(str_const->constvalue);
1645 len = strlen(workstr);
1646 if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
1647 cmpstr = str_const->constvalue;
1648 else
1649 {
1650 /* If first time through, determine the suffix to use */
1651 static char suffixchar = 0;
1652 static Oid suffixcollation = 0;
1653
1654 if (!suffixchar || suffixcollation != collation)
1655 {
1656 char *best;
1657
1658 best = "Z";
1659 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
1660 best = "z";
1661 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
1662 best = "y";
1663 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
1664 best = "9";
1665 suffixchar = *best;
1666 suffixcollation = collation;
1667 }
1668
1669 /* And build the string to compare to */
1670 if (datatype == NAMEOID)
1671 {
1672 cmptxt = palloc(len + 2);
1673 memcpy(cmptxt, workstr, len);
1674 cmptxt[len] = suffixchar;
1675 cmptxt[len + 1] = '\0';
1676 cmpstr = PointerGetDatum(cmptxt);
1677 }
1678 else
1679 {
1680 cmptxt = palloc(VARHDRSZ + len + 1);
1681 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
1682 memcpy(VARDATA(cmptxt), workstr, len);
1683 *(VARDATA(cmptxt) + len) = suffixchar;
1684 cmpstr = PointerGetDatum(cmptxt);
1685 }
1686 }
1687 }
1688
1689 /* Select appropriate character-incrementer function */
1690 if (datatype == BYTEAOID)
1691 charinc = byte_increment;
1692 else
1694
1695 /* And search ... */
1696 while (len > 0)
1697 {
1698 int charlen;
1699 unsigned char *lastchar;
1700
1701 /* Identify the last character --- for bytea, just the last byte */
1702 if (datatype == BYTEAOID)
1703 charlen = 1;
1704 else
1705 charlen = len - pg_mbcliplen(workstr, len, len - 1);
1706 lastchar = (unsigned char *) (workstr + len - charlen);
1707
1708 /*
1709 * Try to generate a larger string by incrementing the last character
1710 * (for BYTEA, we treat each byte as a character).
1711 *
1712 * Note: the incrementer function is expected to return true if it's
1713 * generated a valid-per-the-encoding new character, otherwise false.
1714 * The contents of the character on false return are unspecified.
1715 */
1716 while (charinc(lastchar, charlen))
1717 {
1718 Const *workstr_const;
1719
1720 if (datatype == BYTEAOID)
1721 workstr_const = string_to_bytea_const(workstr, len);
1722 else
1723 workstr_const = string_to_const(workstr, datatype);
1724
1726 collation,
1727 cmpstr,
1728 workstr_const->constvalue)))
1729 {
1730 /* Successfully made a string larger than cmpstr */
1731 if (cmptxt)
1732 pfree(cmptxt);
1733 pfree(workstr);
1734 return workstr_const;
1735 }
1736
1737 /* No good, release unusable value and try again */
1738 pfree(DatumGetPointer(workstr_const->constvalue));
1739 pfree(workstr_const);
1740 }
1741
1742 /*
1743 * No luck here, so truncate off the last character and try to
1744 * increment the next one.
1745 */
1746 len -= charlen;
1747 workstr[len] = '\0';
1748 }
1749
1750 /* Failed... */
1751 if (cmptxt)
1752 pfree(cmptxt);
1753 pfree(workstr);
1754
1755 return NULL;
1756}
#define VARHDRSZ
Definition: c.h:717
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:684
static bool byte_increment(unsigned char *ptr, int len)
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
Definition: mbutils.c:1526
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1086
Datum nameout(PG_FUNCTION_ARGS)
Definition: name.c:71
const void size_t len
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
Definition: pg_wchar.h:370
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:352
static char * DatumGetCString(Datum X)
Definition: postgres.h:365
uint64_t Datum
Definition: postgres.h:70
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1308

References Assert(), byte_increment(), Const::consttype, DatumGetBool(), DatumGetByteaPP, DatumGetCString(), DatumGetPointer(), DirectFunctionCall1, FunctionCall2Coll(), len, nameout(), palloc(), pfree(), pg_database_encoding_character_incrementer(), pg_mbcliplen(), pg_newlocale_from_collation(), PointerGetDatum(), SET_VARSIZE(), string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA(), VARDATA_ANY(), VARHDRSZ, VARSIZE_ANY_EXHDR(), and varstr_cmp().

Referenced by match_pattern_prefix(), and prefix_selectivity().

◆ match_pattern_prefix()

static List * match_pattern_prefix ( Node leftop,
Node rightop,
Pattern_Type  ptype,
Oid  expr_coll,
Oid  opfamily,
Oid  indexcollation 
)
static

Definition at line 239 of file like_support.c.

245{
246 List *result;
247 Const *patt;
248 Const *prefix;
249 Pattern_Prefix_Status pstatus;
250 Oid ldatatype;
251 Oid rdatatype;
252 Oid eqopr;
253 Oid ltopr;
254 Oid geopr;
255 Oid preopr = InvalidOid;
256 bool collation_aware;
257 Expr *expr;
258 FmgrInfo ltproc;
259 Const *greaterstr;
260
261 /*
262 * Can't do anything with a non-constant or NULL pattern argument.
263 *
264 * Note that since we restrict ourselves to cases with a hard constant on
265 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
266 * about verifying that.
267 */
268 if (!IsA(rightop, Const) ||
269 ((Const *) rightop)->constisnull)
270 return NIL;
271 patt = (Const *) rightop;
272
273 /*
274 * Try to extract a fixed prefix from the pattern.
275 */
276 pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
277 &prefix, NULL);
278
279 /* fail if no fixed prefix */
280 if (pstatus == Pattern_Prefix_None)
281 return NIL;
282
283 /*
284 * Identify the operators we want to use, based on the type of the
285 * left-hand argument. Usually these are just the type's regular
286 * comparison operators, but if we are considering one of the semi-legacy
287 * "pattern" opclasses, use the "pattern" operators instead. Those are
288 * not collation-sensitive but always use C collation, as we want. The
289 * selected operators also determine the needed type of the prefix
290 * constant.
291 */
292 ldatatype = exprType(leftop);
293 switch (ldatatype)
294 {
295 case TEXTOID:
296 if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
297 {
298 eqopr = TextEqualOperator;
299 ltopr = TextPatternLessOperator;
300 geopr = TextPatternGreaterEqualOperator;
301 collation_aware = false;
302 }
303 else if (opfamily == TEXT_SPGIST_FAM_OID)
304 {
305 eqopr = TextEqualOperator;
306 ltopr = TextPatternLessOperator;
307 geopr = TextPatternGreaterEqualOperator;
308 /* This opfamily has direct support for prefixing */
309 preopr = TextPrefixOperator;
310 collation_aware = false;
311 }
312 else
313 {
314 eqopr = TextEqualOperator;
315 ltopr = TextLessOperator;
316 geopr = TextGreaterEqualOperator;
317 collation_aware = true;
318 }
319 rdatatype = TEXTOID;
320 break;
321 case NAMEOID:
322
323 /*
324 * Note that here, we need the RHS type to be text, so that the
325 * comparison value isn't improperly truncated to NAMEDATALEN.
326 */
327 eqopr = NameEqualTextOperator;
328 ltopr = NameLessTextOperator;
329 geopr = NameGreaterEqualTextOperator;
330 collation_aware = true;
331 rdatatype = TEXTOID;
332 break;
333 case BPCHAROID:
334 if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
335 {
336 eqopr = BpcharEqualOperator;
337 ltopr = BpcharPatternLessOperator;
338 geopr = BpcharPatternGreaterEqualOperator;
339 collation_aware = false;
340 }
341 else
342 {
343 eqopr = BpcharEqualOperator;
344 ltopr = BpcharLessOperator;
345 geopr = BpcharGreaterEqualOperator;
346 collation_aware = true;
347 }
348 rdatatype = BPCHAROID;
349 break;
350 case BYTEAOID:
351 eqopr = ByteaEqualOperator;
352 ltopr = ByteaLessOperator;
353 geopr = ByteaGreaterEqualOperator;
354 collation_aware = false;
355 rdatatype = BYTEAOID;
356 break;
357 default:
358 /* Can't get here unless we're attached to the wrong operator */
359 return NIL;
360 }
361
362 /*
363 * If necessary, coerce the prefix constant to the right type. The given
364 * prefix constant is either text or bytea type, therefore the only case
365 * where we need to do anything is when converting text to bpchar. Those
366 * two types are binary-compatible, so relabeling the Const node is
367 * sufficient.
368 */
369 if (prefix->consttype != rdatatype)
370 {
371 Assert(prefix->consttype == TEXTOID &&
372 rdatatype == BPCHAROID);
373 prefix->consttype = rdatatype;
374 }
375
376 /*
377 * If we found an exact-match pattern, generate an "=" indexqual.
378 *
379 * Here and below, check to see whether the desired operator is actually
380 * supported by the index opclass, and fail quietly if not. This allows
381 * us to not be concerned with specific opclasses (except for the legacy
382 * "pattern" cases); any index that correctly implements the operators
383 * will work.
384 */
385 if (pstatus == Pattern_Prefix_Exact)
386 {
387 if (!op_in_opfamily(eqopr, opfamily))
388 return NIL;
389 if (indexcollation != expr_coll)
390 return NIL;
391 expr = make_opclause(eqopr, BOOLOID, false,
392 (Expr *) leftop, (Expr *) prefix,
393 InvalidOid, indexcollation);
394 result = list_make1(expr);
395 return result;
396 }
397
398 /*
399 * Anything other than Pattern_Prefix_Exact is not supported if the
400 * expression collation is nondeterministic. The optimized equality or
401 * prefix tests use bytewise comparisons, which is not consistent with
402 * nondeterministic collations.
403 *
404 * expr_coll is not set for a non-collation-aware data type such as bytea.
405 */
406 if (expr_coll && !get_collation_isdeterministic(expr_coll))
407 return NIL;
408
409 /*
410 * Otherwise, we have a nonempty required prefix of the values. Some
411 * opclasses support prefix checks directly, otherwise we'll try to
412 * generate a range constraint.
413 */
414 if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
415 {
416 expr = make_opclause(preopr, BOOLOID, false,
417 (Expr *) leftop, (Expr *) prefix,
418 InvalidOid, indexcollation);
419 result = list_make1(expr);
420 return result;
421 }
422
423 /*
424 * Since we need a range constraint, it's only going to work reliably if
425 * the index is collation-insensitive or has "C" collation. Note that
426 * here we are looking at the index's collation, not the expression's
427 * collation -- this test is *not* dependent on the LIKE/regex operator's
428 * collation.
429 */
430 if (collation_aware &&
431 !pg_newlocale_from_collation(indexcollation)->collate_is_c)
432 return NIL;
433
434 /*
435 * We can always say "x >= prefix".
436 */
437 if (!op_in_opfamily(geopr, opfamily))
438 return NIL;
439 expr = make_opclause(geopr, BOOLOID, false,
440 (Expr *) leftop, (Expr *) prefix,
441 InvalidOid, indexcollation);
442 result = list_make1(expr);
443
444 /*-------
445 * If we can create a string larger than the prefix, we can say
446 * "x < greaterstr". NB: we rely on make_greater_string() to generate
447 * a guaranteed-greater string, not just a probably-greater string.
448 * In general this is only guaranteed in C locale, so we'd better be
449 * using a C-locale index collation.
450 *-------
451 */
452 if (!op_in_opfamily(ltopr, opfamily))
453 return result;
454 fmgr_info(get_opcode(ltopr), &ltproc);
455 greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
456 if (greaterstr)
457 {
458 expr = make_opclause(ltopr, BOOLOID, false,
459 (Expr *) leftop, (Expr *) greaterstr,
460 InvalidOid, indexcollation);
461 result = lappend(result, expr);
462 }
463
464 return result;
465}
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:128
static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
static Const * make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1435
bool get_collation_isdeterministic(Oid colloid)
Definition: lsyscache.c:1130
bool op_in_opfamily(Oid opno, Oid opfamily)
Definition: lsyscache.c:68
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: makefuncs.c:701
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
Definition: fmgr.h:57
Definition: pg_list.h:54

References Assert(), Const::consttype, exprType(), fmgr_info(), get_collation_isdeterministic(), get_opcode(), InvalidOid, IsA, lappend(), list_make1, make_greater_string(), make_opclause(), NIL, OidIsValid, op_in_opfamily(), pattern_fixed_prefix(), Pattern_Prefix_Exact, Pattern_Prefix_None, and pg_newlocale_from_collation().

Referenced by like_regex_support().

◆ nlikejoinsel()

Datum nlikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 943 of file like_support.c.

944{
946}

References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ nlikesel()

Datum nlikesel ( PG_FUNCTION_ARGS  )

Definition at line 852 of file like_support.c.

853{
855}

References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.

◆ pattern_fixed_prefix()

static Pattern_Prefix_Status pattern_fixed_prefix ( Const patt,
Pattern_Type  ptype,
Oid  collation,
Const **  prefix,
Selectivity rest_selec 
)
static

Definition at line 1233 of file like_support.c.

1235{
1236 Pattern_Prefix_Status result;
1237
1238 switch (ptype)
1239 {
1240 case Pattern_Type_Like:
1241 result = like_fixed_prefix(patt, prefix, rest_selec);
1242 break;
1244 result = like_fixed_prefix_ci(patt, collation, prefix,
1245 rest_selec);
1246 break;
1247 case Pattern_Type_Regex:
1248 result = regex_fixed_prefix(patt, false, collation,
1249 prefix, rest_selec);
1250 break;
1252 result = regex_fixed_prefix(patt, true, collation,
1253 prefix, rest_selec);
1254 break;
1256 /* Prefix type work is trivial. */
1257 result = Pattern_Prefix_Partial;
1258 *prefix = makeConst(patt->consttype,
1259 patt->consttypmod,
1260 patt->constcollid,
1261 patt->constlen,
1262 datumCopy(patt->constvalue,
1263 patt->constbyval,
1264 patt->constlen),
1265 patt->constisnull,
1266 patt->constbyval);
1267 if (rest_selec != NULL)
1268 *rest_selec = 1.0; /* all */
1269 break;
1270 default:
1271 elog(ERROR, "unrecognized ptype: %d", (int) ptype);
1272 result = Pattern_Prefix_None; /* keep compiler quiet */
1273 break;
1274 }
1275 return result;
1276}
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
#define elog(elevel,...)
Definition: elog.h:226
static Pattern_Prefix_Status like_fixed_prefix_ci(Const *patt_const, Oid collation, Const **prefix_const, Selectivity *rest_selec)
static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, Const **prefix_const, Selectivity *rest_selec)
Definition: like_support.c:987
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:350

References Const::consttype, datumCopy(), elog, ERROR, like_fixed_prefix(), like_fixed_prefix_ci(), makeConst(), Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Prefix, Pattern_Type_Regex, Pattern_Type_Regex_IC, and regex_fixed_prefix().

Referenced by match_pattern_prefix(), and patternsel_common().

◆ patternjoinsel()

static double patternjoinsel ( PG_FUNCTION_ARGS  ,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 870 of file like_support.c.

871{
872 /* For the moment we just punt. */
873 return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
874}

References DEFAULT_MATCH_SEL.

Referenced by iclikejoinsel(), icnlikejoinsel(), icregexeqjoinsel(), icregexnejoinsel(), likejoinsel(), nlikejoinsel(), prefixjoinsel(), regexeqjoinsel(), and regexnejoinsel().

◆ patternsel()

static double patternsel ( PG_FUNCTION_ARGS  ,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 755 of file like_support.c.

756{
758 Oid operator = PG_GETARG_OID(1);
760 int varRelid = PG_GETARG_INT32(3);
761 Oid collation = PG_GET_COLLATION();
762
763 /*
764 * If this is for a NOT LIKE or similar operator, get the corresponding
765 * positive-match operator and work with that.
766 */
767 if (negate)
768 {
769 operator = get_negator(operator);
770 if (!OidIsValid(operator))
771 elog(ERROR, "patternsel called for operator without a negator");
772 }
773
774 return patternsel_common(root,
775 operator,
777 args,
778 varRelid,
779 collation,
780 ptype,
781 negate);
782}
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:277
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
Oid get_negator(Oid opno)
Definition: lsyscache.c:1683
tree ctl root
Definition: radixtree.h:1857

References generate_unaccent_rules::args, elog, ERROR, get_negator(), InvalidOid, OidIsValid, patternsel_common(), PG_GET_COLLATION, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, and root.

Referenced by iclikesel(), icnlikesel(), icregexeqsel(), icregexnesel(), likesel(), nlikesel(), prefixsel(), regexeqsel(), and regexnesel().

◆ patternsel_common()

static double patternsel_common ( PlannerInfo root,
Oid  oprid,
Oid  opfuncid,
List args,
int  varRelid,
Oid  collation,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 481 of file like_support.c.

489{
490 VariableStatData vardata;
491 Node *other;
492 bool varonleft;
493 Datum constval;
494 Oid consttype;
495 Oid vartype;
496 Oid rdatatype;
497 Oid eqopr;
498 Oid ltopr;
499 Oid geopr;
500 Pattern_Prefix_Status pstatus;
501 Const *patt;
502 Const *prefix = NULL;
503 Selectivity rest_selec = 0;
504 double nullfrac = 0.0;
505 double result;
506
507 /*
508 * Initialize result to the appropriate default estimate depending on
509 * whether it's a match or not-match operator.
510 */
511 if (negate)
512 result = 1.0 - DEFAULT_MATCH_SEL;
513 else
514 result = DEFAULT_MATCH_SEL;
515
516 /*
517 * If expression is not variable op constant, then punt and return the
518 * default estimate.
519 */
520 if (!get_restriction_variable(root, args, varRelid,
521 &vardata, &other, &varonleft))
522 return result;
523 if (!varonleft || !IsA(other, Const))
524 {
525 ReleaseVariableStats(vardata);
526 return result;
527 }
528
529 /*
530 * If the constant is NULL, assume operator is strict and return zero, ie,
531 * operator will never return TRUE. (It's zero even for a negator op.)
532 */
533 if (((Const *) other)->constisnull)
534 {
535 ReleaseVariableStats(vardata);
536 return 0.0;
537 }
538 constval = ((Const *) other)->constvalue;
539 consttype = ((Const *) other)->consttype;
540
541 /*
542 * The right-hand const is type text or bytea for all supported operators.
543 * We do not expect to see binary-compatible types here, since
544 * const-folding should have relabeled the const to exactly match the
545 * operator's declared type.
546 */
547 if (consttype != TEXTOID && consttype != BYTEAOID)
548 {
549 ReleaseVariableStats(vardata);
550 return result;
551 }
552
553 /*
554 * Similarly, the exposed type of the left-hand side should be one of
555 * those we know. (Do not look at vardata.atttype, which might be
556 * something binary-compatible but different.) We can use it to identify
557 * the comparison operators and the required type of the comparison
558 * constant, much as in match_pattern_prefix().
559 */
560 vartype = vardata.vartype;
561
562 switch (vartype)
563 {
564 case TEXTOID:
565 eqopr = TextEqualOperator;
566 ltopr = TextLessOperator;
567 geopr = TextGreaterEqualOperator;
568 rdatatype = TEXTOID;
569 break;
570 case NAMEOID:
571
572 /*
573 * Note that here, we need the RHS type to be text, so that the
574 * comparison value isn't improperly truncated to NAMEDATALEN.
575 */
576 eqopr = NameEqualTextOperator;
577 ltopr = NameLessTextOperator;
578 geopr = NameGreaterEqualTextOperator;
579 rdatatype = TEXTOID;
580 break;
581 case BPCHAROID:
582 eqopr = BpcharEqualOperator;
583 ltopr = BpcharLessOperator;
584 geopr = BpcharGreaterEqualOperator;
585 rdatatype = BPCHAROID;
586 break;
587 case BYTEAOID:
588 eqopr = ByteaEqualOperator;
589 ltopr = ByteaLessOperator;
590 geopr = ByteaGreaterEqualOperator;
591 rdatatype = BYTEAOID;
592 break;
593 default:
594 /* Can't get here unless we're attached to the wrong operator */
595 ReleaseVariableStats(vardata);
596 return result;
597 }
598
599 /*
600 * Grab the nullfrac for use below.
601 */
602 if (HeapTupleIsValid(vardata.statsTuple))
603 {
604 Form_pg_statistic stats;
605
606 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
607 nullfrac = stats->stanullfrac;
608 }
609
610 /*
611 * Pull out any fixed prefix implied by the pattern, and estimate the
612 * fractional selectivity of the remainder of the pattern. Unlike many
613 * other selectivity estimators, we use the pattern operator's actual
614 * collation for this step. This is not because we expect the collation
615 * to make a big difference in the selectivity estimate (it seldom would),
616 * but because we want to be sure we cache compiled regexps under the
617 * right cache key, so that they can be re-used at runtime.
618 */
619 patt = (Const *) other;
620 pstatus = pattern_fixed_prefix(patt, ptype, collation,
621 &prefix, &rest_selec);
622
623 /*
624 * If necessary, coerce the prefix constant to the right type. The only
625 * case where we need to do anything is when converting text to bpchar.
626 * Those two types are binary-compatible, so relabeling the Const node is
627 * sufficient.
628 */
629 if (prefix && prefix->consttype != rdatatype)
630 {
631 Assert(prefix->consttype == TEXTOID &&
632 rdatatype == BPCHAROID);
633 prefix->consttype = rdatatype;
634 }
635
636 if (pstatus == Pattern_Prefix_Exact)
637 {
638 /*
639 * Pattern specifies an exact match, so estimate as for '='
640 */
641 result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
642 false, true, false);
643 }
644 else
645 {
646 /*
647 * Not exact-match pattern. If we have a sufficiently large
648 * histogram, estimate selectivity for the histogram part of the
649 * population by counting matches in the histogram. If not, estimate
650 * selectivity of the fixed prefix and remainder of pattern
651 * separately, then combine the two to get an estimate of the
652 * selectivity for the part of the column population represented by
653 * the histogram. (For small histograms, we combine these
654 * approaches.)
655 *
656 * We then add up data for any most-common-values values; these are
657 * not in the histogram population, and we can get exact answers for
658 * them by applying the pattern operator, so there's no reason to
659 * approximate. (If the MCVs cover a significant part of the total
660 * population, this gives us a big leg up in accuracy.)
661 */
662 Selectivity selec;
663 int hist_size;
664 FmgrInfo opproc;
665 double mcv_selec,
666 sumcommon;
667
668 /* Try to use the histogram entries to get selectivity */
669 if (!OidIsValid(opfuncid))
670 opfuncid = get_opcode(oprid);
671 fmgr_info(opfuncid, &opproc);
672
673 selec = histogram_selectivity(&vardata, &opproc, collation,
674 constval, true,
675 10, 1, &hist_size);
676
677 /* If not at least 100 entries, use the heuristic method */
678 if (hist_size < 100)
679 {
680 Selectivity heursel;
682
683 if (pstatus == Pattern_Prefix_Partial)
685 eqopr, ltopr, geopr,
686 collation,
687 prefix);
688 else
689 prefixsel = 1.0;
690 heursel = prefixsel * rest_selec;
691
692 if (selec < 0) /* fewer than 10 histogram entries? */
693 selec = heursel;
694 else
695 {
696 /*
697 * For histogram sizes from 10 to 100, we combine the
698 * histogram and heuristic selectivities, putting increasingly
699 * more trust in the histogram for larger sizes.
700 */
701 double hist_weight = hist_size / 100.0;
702
703 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
704 }
705 }
706
707 /* In any case, don't believe extremely small or large estimates. */
708 if (selec < 0.0001)
709 selec = 0.0001;
710 else if (selec > 0.9999)
711 selec = 0.9999;
712
713 /*
714 * If we have most-common-values info, add up the fractions of the MCV
715 * entries that satisfy MCV OP PATTERN. These fractions contribute
716 * directly to the result selectivity. Also add up the total fraction
717 * represented by MCV entries.
718 */
719 mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
720 constval, true,
721 &sumcommon);
722
723 /*
724 * Now merge the results from the MCV and histogram calculations,
725 * realizing that the histogram covers only the non-null values that
726 * are not listed in MCV.
727 */
728 selec *= 1.0 - nullfrac - sumcommon;
729 selec += mcv_selec;
730 result = selec;
731 }
732
733 /* now adjust if we wanted not-match rather than match */
734 if (negate)
735 result = 1.0 - result - nullfrac;
736
737 /* result should be in range, but make sure... */
738 CLAMP_PROBABILITY(result);
739
740 if (prefix)
741 {
742 pfree(DatumGetPointer(prefix->constvalue));
743 pfree(prefix);
744 }
745
746 ReleaseVariableStats(vardata);
747
748 return result;
749}
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
Datum prefixsel(PG_FUNCTION_ARGS)
Definition: like_support.c:815
Oid oprid(Operator op)
Definition: parse_oper.c:239
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:5507
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:368
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:805
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:896
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
HeapTuple statsTuple
Definition: selfuncs.h:89

References generate_unaccent_rules::args, Assert(), CLAMP_PROBABILITY, Const::consttype, DatumGetPointer(), DEFAULT_MATCH_SEL, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT(), HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), OidIsValid, oprid(), pattern_fixed_prefix(), Pattern_Prefix_Exact, Pattern_Prefix_Partial, pfree(), prefix_selectivity(), prefixsel(), ReleaseVariableStats, root, VariableStatData::statsTuple, var_eq_const(), and VariableStatData::vartype.

Referenced by like_regex_support(), and patternsel().

◆ prefix_selectivity()

static Selectivity prefix_selectivity ( PlannerInfo root,
VariableStatData vardata,
Oid  eqopr,
Oid  ltopr,
Oid  geopr,
Oid  collation,
Const prefixcon 
)
static

Definition at line 1297 of file like_support.c.

1301{
1303 FmgrInfo opproc;
1304 Const *greaterstrcon;
1305 Selectivity eq_sel;
1306
1307 /* Estimate the selectivity of "x >= prefix" */
1308 fmgr_info(get_opcode(geopr), &opproc);
1309
1311 geopr, &opproc, true, true,
1312 collation,
1313 prefixcon->constvalue,
1314 prefixcon->consttype);
1315
1316 if (prefixsel < 0.0)
1317 {
1318 /* No histogram is present ... return a suitable default estimate */
1319 return DEFAULT_MATCH_SEL;
1320 }
1321
1322 /*
1323 * If we can create a string larger than the prefix, say "x < greaterstr".
1324 */
1325 fmgr_info(get_opcode(ltopr), &opproc);
1326 greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
1327 if (greaterstrcon)
1328 {
1329 Selectivity topsel;
1330
1331 topsel = ineq_histogram_selectivity(root, vardata,
1332 ltopr, &opproc, false, false,
1333 collation,
1334 greaterstrcon->constvalue,
1335 greaterstrcon->consttype);
1336
1337 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1338 Assert(topsel >= 0.0);
1339
1340 /*
1341 * Merge the two selectivities in the same way as for a range query
1342 * (see clauselist_selectivity()). Note that we don't need to worry
1343 * about double-exclusion of nulls, since ineq_histogram_selectivity
1344 * doesn't count those anyway.
1345 */
1346 prefixsel = topsel + prefixsel - 1.0;
1347 }
1348
1349 /*
1350 * If the prefix is long then the two bounding values might be too close
1351 * together for the histogram to distinguish them usefully, resulting in a
1352 * zero estimate (plus or minus roundoff error). To avoid returning a
1353 * ridiculously small estimate, compute the estimated selectivity for
1354 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1355 * estimate should be at least that.)
1356 *
1357 * We apply this even if we couldn't make a greater string. That case
1358 * suggests that the prefix is near the maximum possible, and thus
1359 * probably off the end of the histogram, and thus we probably got a very
1360 * small estimate from the >= condition; so we still need to clamp.
1361 */
1362 eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1363 false, true, false);
1364
1365 prefixsel = Max(prefixsel, eq_sel);
1366
1367 return prefixsel;
1368}
#define Max(x, y)
Definition: c.h:997
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
Definition: selfuncs.c:1114

References Assert(), Const::consttype, DEFAULT_MATCH_SEL, fmgr_info(), get_opcode(), ineq_histogram_selectivity(), make_greater_string(), Max, prefixsel(), root, and var_eq_const().

Referenced by patternsel_common().

◆ prefixjoinsel()

Datum prefixjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 907 of file like_support.c.

908{
910}

References Pattern_Type_Prefix, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ prefixsel()

Datum prefixsel ( PG_FUNCTION_ARGS  )

◆ regex_fixed_prefix()

static Pattern_Prefix_Status regex_fixed_prefix ( Const patt_const,
bool  case_insensitive,
Oid  collation,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 1165 of file like_support.c.

1167{
1168 Oid typeid = patt_const->consttype;
1169 char *prefix;
1170 bool exact;
1171
1172 /*
1173 * Should be unnecessary, there are no bytea regex operators defined. As
1174 * such, it should be noted that the rest of this function has *not* been
1175 * made safe for binary (possibly NULL containing) strings.
1176 */
1177 if (typeid == BYTEAOID)
1178 ereport(ERROR,
1179 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1180 errmsg("regular-expression matching not supported on type bytea")));
1181
1182 /* Use the regexp machinery to extract the prefix, if any */
1183 prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
1184 case_insensitive, collation,
1185 &exact);
1186
1187 if (prefix == NULL)
1188 {
1189 *prefix_const = NULL;
1190
1191 if (rest_selec != NULL)
1192 {
1193 char *patt = TextDatumGetCString(patt_const->constvalue);
1194
1195 *rest_selec = regex_selectivity(patt, strlen(patt),
1196 case_insensitive,
1197 0);
1198 pfree(patt);
1199 }
1200
1201 return Pattern_Prefix_None;
1202 }
1203
1204 *prefix_const = string_to_const(prefix, typeid);
1205
1206 if (rest_selec != NULL)
1207 {
1208 if (exact)
1209 {
1210 /* Exact match, so there's no additional selectivity */
1211 *rest_selec = 1.0;
1212 }
1213 else
1214 {
1215 char *patt = TextDatumGetCString(patt_const->constvalue);
1216
1217 *rest_selec = regex_selectivity(patt, strlen(patt),
1218 case_insensitive,
1219 strlen(prefix));
1220 pfree(patt);
1221 }
1222 }
1223
1224 pfree(prefix);
1225
1226 if (exact)
1227 return Pattern_Prefix_Exact; /* pattern specifies exact match */
1228 else
1230}
static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact)
Definition: regexp.c:2022

References Const::consttype, DatumGetTextPP, ereport, errcode(), errmsg(), ERROR, Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), regex_selectivity(), regexp_fixed_prefix(), string_to_const(), and TextDatumGetCString.

Referenced by pattern_fixed_prefix().

◆ regex_selectivity()

static Selectivity regex_selectivity ( const char *  patt,
int  pattlen,
bool  case_insensitive,
int  fixed_prefix_len 
)
static

Definition at line 1520 of file like_support.c.

1522{
1523 Selectivity sel;
1524
1525 /* If patt doesn't end with $, consider it to have a trailing wildcard */
1526 if (pattlen > 0 && patt[pattlen - 1] == '$' &&
1527 (pattlen == 1 || patt[pattlen - 2] != '\\'))
1528 {
1529 /* has trailing $ */
1530 sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
1531 }
1532 else
1533 {
1534 /* no trailing $ */
1535 sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
1536 sel *= FULL_WILDCARD_SEL;
1537 }
1538
1539 /*
1540 * If there's a fixed prefix, discount its selectivity. We have to be
1541 * careful here since a very long prefix could result in pow's result
1542 * underflowing to zero (in which case "sel" probably has as well).
1543 */
1544 if (fixed_prefix_len > 0)
1545 {
1546 double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
1547
1548 if (prefixsel > 0.0)
1549 sel /= prefixsel;
1550 }
1551
1552 /* Make sure result stays in range */
1553 CLAMP_PROBABILITY(sel);
1554 return sel;
1555}
static Selectivity regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)

References CLAMP_PROBABILITY, FIXED_CHAR_SEL, FULL_WILDCARD_SEL, prefixsel(), and regex_selectivity_sub().

Referenced by regex_fixed_prefix().

◆ regex_selectivity_sub()

static Selectivity regex_selectivity_sub ( const char *  patt,
int  pattlen,
bool  case_insensitive 
)
static

Definition at line 1425 of file like_support.c.

1426{
1427 Selectivity sel = 1.0;
1428 int paren_depth = 0;
1429 int paren_pos = 0; /* dummy init to keep compiler quiet */
1430 int pos;
1431
1432 /* since this function recurses, it could be driven to stack overflow */
1434
1435 for (pos = 0; pos < pattlen; pos++)
1436 {
1437 if (patt[pos] == '(')
1438 {
1439 if (paren_depth == 0)
1440 paren_pos = pos; /* remember start of parenthesized item */
1441 paren_depth++;
1442 }
1443 else if (patt[pos] == ')' && paren_depth > 0)
1444 {
1445 paren_depth--;
1446 if (paren_depth == 0)
1447 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
1448 pos - (paren_pos + 1),
1449 case_insensitive);
1450 }
1451 else if (patt[pos] == '|' && paren_depth == 0)
1452 {
1453 /*
1454 * If unquoted | is present at paren level 0 in pattern, we have
1455 * multiple alternatives; sum their probabilities.
1456 */
1457 sel += regex_selectivity_sub(patt + (pos + 1),
1458 pattlen - (pos + 1),
1459 case_insensitive);
1460 break; /* rest of pattern is now processed */
1461 }
1462 else if (patt[pos] == '[')
1463 {
1464 bool negclass = false;
1465
1466 if (patt[++pos] == '^')
1467 {
1468 negclass = true;
1469 pos++;
1470 }
1471 if (patt[pos] == ']') /* ']' at start of class is not special */
1472 pos++;
1473 while (pos < pattlen && patt[pos] != ']')
1474 pos++;
1475 if (paren_depth == 0)
1476 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
1477 }
1478 else if (patt[pos] == '.')
1479 {
1480 if (paren_depth == 0)
1481 sel *= ANY_CHAR_SEL;
1482 }
1483 else if (patt[pos] == '*' ||
1484 patt[pos] == '?' ||
1485 patt[pos] == '+')
1486 {
1487 /* Ought to be smarter about quantifiers... */
1488 if (paren_depth == 0)
1489 sel *= PARTIAL_WILDCARD_SEL;
1490 }
1491 else if (patt[pos] == '{')
1492 {
1493 while (pos < pattlen && patt[pos] != '}')
1494 pos++;
1495 if (paren_depth == 0)
1496 sel *= PARTIAL_WILDCARD_SEL;
1497 }
1498 else if (patt[pos] == '\\')
1499 {
1500 /* backslash quotes the next character */
1501 pos++;
1502 if (pos >= pattlen)
1503 break;
1504 if (paren_depth == 0)
1505 sel *= FIXED_CHAR_SEL;
1506 }
1507 else
1508 {
1509 if (paren_depth == 0)
1510 sel *= FIXED_CHAR_SEL;
1511 }
1512 }
1513 /* Could get sel > 1 if multiple wildcards */
1514 if (sel > 1.0)
1515 sel = 1.0;
1516 return sel;
1517}
#define PARTIAL_WILDCARD_SEL
#define CHAR_RANGE_SEL
void check_stack_depth(void)
Definition: stack_depth.c:95

References ANY_CHAR_SEL, CHAR_RANGE_SEL, check_stack_depth(), FIXED_CHAR_SEL, PARTIAL_WILDCARD_SEL, and regex_selectivity_sub().

Referenced by regex_selectivity(), and regex_selectivity_sub().

◆ regexeqjoinsel()

Datum regexeqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 880 of file like_support.c.

881{
883}

References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ regexeqsel()

Datum regexeqsel ( PG_FUNCTION_ARGS  )

Definition at line 788 of file like_support.c.

789{
791}

References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.

◆ regexnejoinsel()

Datum regexnejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 925 of file like_support.c.

926{
928}

References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ regexnesel()

Datum regexnesel ( PG_FUNCTION_ARGS  )

Definition at line 834 of file like_support.c.

835{
837}

References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.

◆ string_to_bytea_const()

static Const * string_to_bytea_const ( const char *  str,
size_t  str_len 
)
static

Definition at line 1827 of file like_support.c.

1828{
1829 bytea *bstr = palloc(VARHDRSZ + str_len);
1830 Datum conval;
1831
1832 memcpy(VARDATA(bstr), str, str_len);
1833 SET_VARSIZE(bstr, VARHDRSZ + str_len);
1834 conval = PointerGetDatum(bstr);
1835
1836 return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
1837}
const char * str

References InvalidOid, makeConst(), palloc(), PointerGetDatum(), SET_VARSIZE(), str, VARDATA(), and VARHDRSZ.

Referenced by like_fixed_prefix(), and make_greater_string().

◆ string_to_const()

static Const * string_to_const ( const char *  str,
Oid  datatype 
)
static

Definition at line 1784 of file like_support.c.

1785{
1786 Datum conval = string_to_datum(str, datatype);
1787 Oid collation;
1788 int constlen;
1789
1790 /*
1791 * We only need to support a few datatypes here, so hard-wire properties
1792 * instead of incurring the expense of catalog lookups.
1793 */
1794 switch (datatype)
1795 {
1796 case TEXTOID:
1797 case VARCHAROID:
1798 case BPCHAROID:
1799 collation = DEFAULT_COLLATION_OID;
1800 constlen = -1;
1801 break;
1802
1803 case NAMEOID:
1804 collation = C_COLLATION_OID;
1805 constlen = NAMEDATALEN;
1806 break;
1807
1808 case BYTEAOID:
1809 collation = InvalidOid;
1810 constlen = -1;
1811 break;
1812
1813 default:
1814 elog(ERROR, "unexpected datatype in string_to_const: %u",
1815 datatype);
1816 return NULL;
1817 }
1818
1819 return makeConst(datatype, -1, collation, constlen,
1820 conval, false, false);
1821}
static Datum string_to_datum(const char *str, Oid datatype)
#define NAMEDATALEN

References elog, ERROR, InvalidOid, makeConst(), NAMEDATALEN, str, and string_to_datum().

Referenced by like_fixed_prefix(), like_fixed_prefix_ci(), make_greater_string(), and regex_fixed_prefix().

◆ string_to_datum()

static Datum string_to_datum ( const char *  str,
Oid  datatype 
)
static

Definition at line 1764 of file like_support.c.

1765{
1766 Assert(str != NULL);
1767
1768 /*
1769 * We cheat a little by assuming that CStringGetTextDatum() will do for
1770 * bpchar and varchar constants too...
1771 */
1772 if (datatype == NAMEOID)
1774 else if (datatype == BYTEAOID)
1776 else
1777 return CStringGetTextDatum(str);
1778}
#define CStringGetTextDatum(s)
Definition: builtins.h:97
Datum byteain(PG_FUNCTION_ARGS)
Definition: bytea.c:201
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:380

References Assert(), byteain(), CStringGetDatum(), CStringGetTextDatum, DirectFunctionCall1, namein(), and str.

Referenced by string_to_const().

◆ text_starts_with_support()

Datum text_starts_with_support ( PG_FUNCTION_ARGS  )

Definition at line 145 of file like_support.c.

146{
147 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
148
150}
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:363
static Node * like_regex_support(Node *rawreq, Pattern_Type ptype)
Definition: like_support.c:154

References like_regex_support(), Pattern_Type_Prefix, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ texticlike_support()

Datum texticlike_support ( PG_FUNCTION_ARGS  )

◆ texticregexeq_support()

Datum texticregexeq_support ( PG_FUNCTION_ARGS  )

◆ textlike_support()

Datum textlike_support ( PG_FUNCTION_ARGS  )

Definition at line 113 of file like_support.c.

114{
115 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
116
118}

References like_regex_support(), Pattern_Type_Like, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ textregexeq_support()

Datum textregexeq_support ( PG_FUNCTION_ARGS  )