PostgreSQL Source Code  git master
like_support.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
#include "utils/varlena.h"
Include dependency graph for like_support.c:

Go to the source code of this file.

Macros

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */
 
#define CHAR_RANGE_SEL   0.25
 
#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */
 
#define FULL_WILDCARD_SEL   5.0
 
#define PARTIAL_WILDCARD_SEL   2.0
 

Enumerations

enum  Pattern_Type {
  Pattern_Type_Like , Pattern_Type_Like_IC , Pattern_Type_Regex , Pattern_Type_Regex_IC ,
  Pattern_Type_Prefix
}
 
enum  Pattern_Prefix_Status { Pattern_Prefix_None , Pattern_Prefix_Partial , Pattern_Prefix_Exact }
 

Functions

static Nodelike_regex_support (Node *rawreq, Pattern_Type ptype)
 
static Listmatch_pattern_prefix (Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
 
static double patternsel_common (PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
 
static Pattern_Prefix_Status pattern_fixed_prefix (Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
 
static Selectivity prefix_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
 
static Selectivity like_selectivity (const char *patt, int pattlen, bool case_insensitive)
 
static Selectivity regex_selectivity (const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
 
static int pattern_char_isalpha (char c, bool is_multibyte, pg_locale_t locale)
 
static Constmake_greater_string (const Const *str_const, FmgrInfo *ltproc, Oid collation)
 
static Datum string_to_datum (const char *str, Oid datatype)
 
static Conststring_to_const (const char *str, Oid datatype)
 
static Conststring_to_bytea_const (const char *str, size_t str_len)
 
Datum textlike_support (PG_FUNCTION_ARGS)
 
Datum texticlike_support (PG_FUNCTION_ARGS)
 
Datum textregexeq_support (PG_FUNCTION_ARGS)
 
Datum texticregexeq_support (PG_FUNCTION_ARGS)
 
Datum text_starts_with_support (PG_FUNCTION_ARGS)
 
static double patternsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqsel (PG_FUNCTION_ARGS)
 
Datum icregexeqsel (PG_FUNCTION_ARGS)
 
Datum likesel (PG_FUNCTION_ARGS)
 
Datum prefixsel (PG_FUNCTION_ARGS)
 
Datum iclikesel (PG_FUNCTION_ARGS)
 
Datum regexnesel (PG_FUNCTION_ARGS)
 
Datum icregexnesel (PG_FUNCTION_ARGS)
 
Datum nlikesel (PG_FUNCTION_ARGS)
 
Datum icnlikesel (PG_FUNCTION_ARGS)
 
static double patternjoinsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum icregexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum likejoinsel (PG_FUNCTION_ARGS)
 
Datum prefixjoinsel (PG_FUNCTION_ARGS)
 
Datum iclikejoinsel (PG_FUNCTION_ARGS)
 
Datum regexnejoinsel (PG_FUNCTION_ARGS)
 
Datum icregexnejoinsel (PG_FUNCTION_ARGS)
 
Datum nlikejoinsel (PG_FUNCTION_ARGS)
 
Datum icnlikejoinsel (PG_FUNCTION_ARGS)
 
static Pattern_Prefix_Status like_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Pattern_Prefix_Status regex_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Selectivity regex_selectivity_sub (const char *patt, int pattlen, bool case_insensitive)
 
static bool byte_increment (unsigned char *ptr, int len)
 

Macro Definition Documentation

◆ ANY_CHAR_SEL

#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */

Definition at line 1310 of file like_support.c.

◆ CHAR_RANGE_SEL

#define CHAR_RANGE_SEL   0.25

Definition at line 1309 of file like_support.c.

◆ FIXED_CHAR_SEL

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */

Definition at line 1308 of file like_support.c.

◆ FULL_WILDCARD_SEL

#define FULL_WILDCARD_SEL   5.0

Definition at line 1311 of file like_support.c.

◆ PARTIAL_WILDCARD_SEL

#define PARTIAL_WILDCARD_SEL   2.0

Definition at line 1312 of file like_support.c.

Enumeration Type Documentation

◆ Pattern_Prefix_Status

Enumerator
Pattern_Prefix_None 
Pattern_Prefix_Partial 
Pattern_Prefix_Exact 

Definition at line 67 of file like_support.c.

68 {
Pattern_Prefix_Status
Definition: like_support.c:68
@ Pattern_Prefix_Partial
Definition: like_support.c:69
@ Pattern_Prefix_None
Definition: like_support.c:69
@ Pattern_Prefix_Exact
Definition: like_support.c:69

◆ Pattern_Type

Enumerator
Pattern_Type_Like 
Pattern_Type_Like_IC 
Pattern_Type_Regex 
Pattern_Type_Regex_IC 
Pattern_Type_Prefix 

Definition at line 58 of file like_support.c.

59 {
65 } Pattern_Type;
Pattern_Type
Definition: like_support.c:59
@ Pattern_Type_Prefix
Definition: like_support.c:64
@ Pattern_Type_Regex_IC
Definition: like_support.c:63
@ Pattern_Type_Like
Definition: like_support.c:60
@ Pattern_Type_Regex
Definition: like_support.c:62
@ Pattern_Type_Like_IC
Definition: like_support.c:61

Function Documentation

◆ byte_increment()

static bool byte_increment ( unsigned char *  ptr,
int  len 
)
static

Definition at line 1513 of file like_support.c.

1514 {
1515  if (*ptr >= 255)
1516  return false;
1517  (*ptr)++;
1518  return true;
1519 }

Referenced by make_greater_string().

◆ iclikejoinsel()

Datum iclikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 918 of file like_support.c.

919 {
921 }
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
static double patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:872

References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ iclikesel()

Datum iclikesel ( PG_FUNCTION_ARGS  )

Definition at line 827 of file like_support.c.

828 {
830 }
static double patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:757

References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icnlikejoinsel()

Datum icnlikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 954 of file like_support.c.

955 {
957 }

References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icnlikesel()

Datum icnlikesel ( PG_FUNCTION_ARGS  )

Definition at line 863 of file like_support.c.

864 {
866 }

References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icregexeqjoinsel()

Datum icregexeqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 891 of file like_support.c.

892 {
894 }

References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icregexeqsel()

Datum icregexeqsel ( PG_FUNCTION_ARGS  )

Definition at line 799 of file like_support.c.

800 {
802 }

References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icregexnejoinsel()

Datum icregexnejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 936 of file like_support.c.

937 {
939 }

References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icregexnesel()

Datum icregexnesel ( PG_FUNCTION_ARGS  )

Definition at line 845 of file like_support.c.

846 {
848 }

References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ like_fixed_prefix()

static Pattern_Prefix_Status like_fixed_prefix ( Const patt_const,
bool  case_insensitive,
Oid  collation,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 989 of file like_support.c.

991 {
992  char *match;
993  char *patt;
994  int pattlen;
995  Oid typeid = patt_const->consttype;
996  int pos,
997  match_pos;
998  bool is_multibyte = (pg_database_encoding_max_length() > 1);
999  pg_locale_t locale = 0;
1000 
1001  /* the right-hand const is type text or bytea */
1002  Assert(typeid == BYTEAOID || typeid == TEXTOID);
1003 
1004  if (case_insensitive)
1005  {
1006  if (typeid == BYTEAOID)
1007  ereport(ERROR,
1008  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1009  errmsg("case insensitive matching not supported on type bytea")));
1010 
1011  if (!OidIsValid(collation))
1012  {
1013  /*
1014  * This typically means that the parser could not resolve a
1015  * conflict of implicit collations, so report it that way.
1016  */
1017  ereport(ERROR,
1018  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1019  errmsg("could not determine which collation to use for ILIKE"),
1020  errhint("Use the COLLATE clause to set the collation explicitly.")));
1021  }
1022 
1023  locale = pg_newlocale_from_collation(collation);
1024  }
1025 
1026  if (typeid != BYTEAOID)
1027  {
1028  patt = TextDatumGetCString(patt_const->constvalue);
1029  pattlen = strlen(patt);
1030  }
1031  else
1032  {
1033  bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
1034 
1035  pattlen = VARSIZE_ANY_EXHDR(bstr);
1036  patt = (char *) palloc(pattlen);
1037  memcpy(patt, VARDATA_ANY(bstr), pattlen);
1038  Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
1039  }
1040 
1041  match = palloc(pattlen + 1);
1042  match_pos = 0;
1043  for (pos = 0; pos < pattlen; pos++)
1044  {
1045  /* % and _ are wildcard characters in LIKE */
1046  if (patt[pos] == '%' ||
1047  patt[pos] == '_')
1048  break;
1049 
1050  /* Backslash escapes the next character */
1051  if (patt[pos] == '\\')
1052  {
1053  pos++;
1054  if (pos >= pattlen)
1055  break;
1056  }
1057 
1058  /* Stop if case-varying character (it's sort of a wildcard) */
1059  if (case_insensitive &&
1060  pattern_char_isalpha(patt[pos], is_multibyte, locale))
1061  break;
1062 
1063  match[match_pos++] = patt[pos];
1064  }
1065 
1066  match[match_pos] = '\0';
1067 
1068  if (typeid != BYTEAOID)
1069  *prefix_const = string_to_const(match, typeid);
1070  else
1071  *prefix_const = string_to_bytea_const(match, match_pos);
1072 
1073  if (rest_selec != NULL)
1074  *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
1075  case_insensitive);
1076 
1077  pfree(patt);
1078  pfree(match);
1079 
1080  /* in LIKE, an empty pattern is an exact match! */
1081  if (pos == pattlen)
1082  return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1083 
1084  if (match_pos > 0)
1085  return Pattern_Prefix_Partial;
1086 
1087  return Pattern_Prefix_None;
1088 }
#define TextDatumGetCString(d)
Definition: builtins.h:98
char * Pointer
Definition: c.h:476
#define Assert(condition)
Definition: c.h:812
#define OidIsValid(objectId)
Definition: c.h:729
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
static char * locale
Definition: initdb.c:140
static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive)
static int pattern_char_isalpha(char c, bool is_multibyte, pg_locale_t locale)
static Const * string_to_const(const char *str, Oid datatype)
static Const * string_to_bytea_const(const char *str, size_t str_len)
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1320
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
unsigned int Oid
Definition: postgres_ext.h:31
Oid consttype
Definition: primnodes.h:312
Definition: c.h:641
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References Assert, Const::consttype, DatumGetByteaPP, DatumGetPointer(), ereport, errcode(), errhint(), errmsg(), ERROR, like_selectivity(), locale, OidIsValid, palloc(), pattern_char_isalpha(), Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), pg_database_encoding_max_length(), pg_newlocale_from_collation(), string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pattern_fixed_prefix().

◆ like_regex_support()

static Node * like_regex_support ( Node rawreq,
Pattern_Type  ptype 
)
static

Definition at line 156 of file like_support.c.

157 {
158  Node *ret = NULL;
159 
160  if (IsA(rawreq, SupportRequestSelectivity))
161  {
162  /*
163  * Make a selectivity estimate for a function call, just as we'd do if
164  * the call was via the corresponding operator.
165  */
167  Selectivity s1;
168 
169  if (req->is_join)
170  {
171  /*
172  * For the moment we just punt. If patternjoinsel is ever
173  * improved to do better, this should be made to call it.
174  */
176  }
177  else
178  {
179  /* Share code with operator restriction selectivity functions */
180  s1 = patternsel_common(req->root,
181  InvalidOid,
182  req->funcid,
183  req->args,
184  req->varRelid,
185  req->inputcollid,
186  ptype,
187  false);
188  }
189  req->selectivity = s1;
190  ret = (Node *) req;
191  }
192  else if (IsA(rawreq, SupportRequestIndexCondition))
193  {
194  /* Try to convert operator/function call to index conditions */
196 
197  /*
198  * Currently we have no "reverse" match operators with the pattern on
199  * the left, so we only need consider cases with the indexkey on the
200  * left.
201  */
202  if (req->indexarg != 0)
203  return NULL;
204 
205  if (is_opclause(req->node))
206  {
207  OpExpr *clause = (OpExpr *) req->node;
208 
209  Assert(list_length(clause->args) == 2);
210  ret = (Node *)
211  match_pattern_prefix((Node *) linitial(clause->args),
212  (Node *) lsecond(clause->args),
213  ptype,
214  clause->inputcollid,
215  req->opfamily,
216  req->indexcollation);
217  }
218  else if (is_funcclause(req->node)) /* be paranoid */
219  {
220  FuncExpr *clause = (FuncExpr *) req->node;
221 
222  Assert(list_length(clause->args) == 2);
223  ret = (Node *)
224  match_pattern_prefix((Node *) linitial(clause->args),
225  (Node *) lsecond(clause->args),
226  ptype,
227  clause->inputcollid,
228  req->opfamily,
229  req->indexcollation);
230  }
231  }
232 
233  return ret;
234 }
static List * match_pattern_prefix(Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
Definition: like_support.c:241
static double patternsel_common(PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
Definition: like_support.c:483
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
#define InvalidOid
Definition: postgres_ext.h:36
char * s1
#define DEFAULT_MATCH_SEL
Definition: selfuncs.h:46
List * args
Definition: primnodes.h:768
Definition: nodes.h:129
List * args
Definition: primnodes.h:836
struct PlannerInfo * root
Definition: supportnodes.h:96

References FuncExpr::args, OpExpr::args, SupportRequestSelectivity::args, Assert, DEFAULT_MATCH_SEL, SupportRequestSelectivity::funcid, SupportRequestIndexCondition::indexarg, SupportRequestIndexCondition::indexcollation, SupportRequestSelectivity::inputcollid, InvalidOid, is_funcclause(), SupportRequestSelectivity::is_join, is_opclause(), IsA, linitial, list_length(), lsecond, match_pattern_prefix(), SupportRequestIndexCondition::node, SupportRequestIndexCondition::opfamily, patternsel_common(), SupportRequestSelectivity::root, s1, SupportRequestSelectivity::selectivity, and SupportRequestSelectivity::varRelid.

Referenced by text_starts_with_support(), texticlike_support(), texticregexeq_support(), textlike_support(), and textregexeq_support().

◆ like_selectivity()

static Selectivity like_selectivity ( const char *  patt,
int  pattlen,
bool  case_insensitive 
)
static

Definition at line 1315 of file like_support.c.

1316 {
1317  Selectivity sel = 1.0;
1318  int pos;
1319 
1320  /* Skip any leading wildcard; it's already factored into initial sel */
1321  for (pos = 0; pos < pattlen; pos++)
1322  {
1323  if (patt[pos] != '%' && patt[pos] != '_')
1324  break;
1325  }
1326 
1327  for (; pos < pattlen; pos++)
1328  {
1329  /* % and _ are wildcard characters in LIKE */
1330  if (patt[pos] == '%')
1331  sel *= FULL_WILDCARD_SEL;
1332  else if (patt[pos] == '_')
1333  sel *= ANY_CHAR_SEL;
1334  else if (patt[pos] == '\\')
1335  {
1336  /* Backslash quotes the next character */
1337  pos++;
1338  if (pos >= pattlen)
1339  break;
1340  sel *= FIXED_CHAR_SEL;
1341  }
1342  else
1343  sel *= FIXED_CHAR_SEL;
1344  }
1345  /* Could get sel > 1 if multiple wildcards */
1346  if (sel > 1.0)
1347  sel = 1.0;
1348  return sel;
1349 }
#define FULL_WILDCARD_SEL
#define ANY_CHAR_SEL
#define FIXED_CHAR_SEL

References ANY_CHAR_SEL, FIXED_CHAR_SEL, and FULL_WILDCARD_SEL.

Referenced by like_fixed_prefix().

◆ likejoinsel()

Datum likejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 900 of file like_support.c.

901 {
903 }

References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ likesel()

Datum likesel ( PG_FUNCTION_ARGS  )

Definition at line 808 of file like_support.c.

809 {
811 }

References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.

◆ make_greater_string()

static Const * make_greater_string ( const Const str_const,
FmgrInfo ltproc,
Oid  collation 
)
static

Definition at line 1563 of file like_support.c.

1564 {
1565  Oid datatype = str_const->consttype;
1566  char *workstr;
1567  int len;
1568  Datum cmpstr;
1569  char *cmptxt = NULL;
1570  mbcharacter_incrementer charinc;
1571 
1572  /*
1573  * Get a modifiable copy of the prefix string in C-string format, and set
1574  * up the string we will compare to as a Datum. In C locale this can just
1575  * be the given prefix string, otherwise we need to add a suffix. Type
1576  * BYTEA sorts bytewise so it never needs a suffix either.
1577  */
1578  if (datatype == BYTEAOID)
1579  {
1580  bytea *bstr = DatumGetByteaPP(str_const->constvalue);
1581 
1582  len = VARSIZE_ANY_EXHDR(bstr);
1583  workstr = (char *) palloc(len);
1584  memcpy(workstr, VARDATA_ANY(bstr), len);
1585  Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
1586  cmpstr = str_const->constvalue;
1587  }
1588  else
1589  {
1590  if (datatype == NAMEOID)
1592  str_const->constvalue));
1593  else
1594  workstr = TextDatumGetCString(str_const->constvalue);
1595  len = strlen(workstr);
1596  if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
1597  cmpstr = str_const->constvalue;
1598  else
1599  {
1600  /* If first time through, determine the suffix to use */
1601  static char suffixchar = 0;
1602  static Oid suffixcollation = 0;
1603 
1604  if (!suffixchar || suffixcollation != collation)
1605  {
1606  char *best;
1607 
1608  best = "Z";
1609  if (varstr_cmp(best, 1, "z", 1, collation) < 0)
1610  best = "z";
1611  if (varstr_cmp(best, 1, "y", 1, collation) < 0)
1612  best = "y";
1613  if (varstr_cmp(best, 1, "9", 1, collation) < 0)
1614  best = "9";
1615  suffixchar = *best;
1616  suffixcollation = collation;
1617  }
1618 
1619  /* And build the string to compare to */
1620  if (datatype == NAMEOID)
1621  {
1622  cmptxt = palloc(len + 2);
1623  memcpy(cmptxt, workstr, len);
1624  cmptxt[len] = suffixchar;
1625  cmptxt[len + 1] = '\0';
1626  cmpstr = PointerGetDatum(cmptxt);
1627  }
1628  else
1629  {
1630  cmptxt = palloc(VARHDRSZ + len + 1);
1631  SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
1632  memcpy(VARDATA(cmptxt), workstr, len);
1633  *(VARDATA(cmptxt) + len) = suffixchar;
1634  cmpstr = PointerGetDatum(cmptxt);
1635  }
1636  }
1637  }
1638 
1639  /* Select appropriate character-incrementer function */
1640  if (datatype == BYTEAOID)
1641  charinc = byte_increment;
1642  else
1644 
1645  /* And search ... */
1646  while (len > 0)
1647  {
1648  int charlen;
1649  unsigned char *lastchar;
1650 
1651  /* Identify the last character --- for bytea, just the last byte */
1652  if (datatype == BYTEAOID)
1653  charlen = 1;
1654  else
1655  charlen = len - pg_mbcliplen(workstr, len, len - 1);
1656  lastchar = (unsigned char *) (workstr + len - charlen);
1657 
1658  /*
1659  * Try to generate a larger string by incrementing the last character
1660  * (for BYTEA, we treat each byte as a character).
1661  *
1662  * Note: the incrementer function is expected to return true if it's
1663  * generated a valid-per-the-encoding new character, otherwise false.
1664  * The contents of the character on false return are unspecified.
1665  */
1666  while (charinc(lastchar, charlen))
1667  {
1668  Const *workstr_const;
1669 
1670  if (datatype == BYTEAOID)
1671  workstr_const = string_to_bytea_const(workstr, len);
1672  else
1673  workstr_const = string_to_const(workstr, datatype);
1674 
1675  if (DatumGetBool(FunctionCall2Coll(ltproc,
1676  collation,
1677  cmpstr,
1678  workstr_const->constvalue)))
1679  {
1680  /* Successfully made a string larger than cmpstr */
1681  if (cmptxt)
1682  pfree(cmptxt);
1683  pfree(workstr);
1684  return workstr_const;
1685  }
1686 
1687  /* No good, release unusable value and try again */
1688  pfree(DatumGetPointer(workstr_const->constvalue));
1689  pfree(workstr_const);
1690  }
1691 
1692  /*
1693  * No luck here, so truncate off the last character and try to
1694  * increment the next one.
1695  */
1696  len -= charlen;
1697  workstr[len] = '\0';
1698  }
1699 
1700  /* Failed... */
1701  if (cmptxt)
1702  pfree(cmptxt);
1703  pfree(workstr);
1704 
1705  return NULL;
1706 }
#define VARHDRSZ
Definition: c.h:646
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
static bool byte_increment(unsigned char *ptr, int len)
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
Definition: mbutils.c:1523
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
Datum nameout(PG_FUNCTION_ARGS)
Definition: name.c:71
const void size_t len
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
Definition: pg_wchar.h:370
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
uintptr_t Datum
Definition: postgres.h:64
#define VARDATA(PTR)
Definition: varatt.h:278
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1538

References Assert, byte_increment(), Const::consttype, DatumGetBool(), DatumGetByteaPP, DatumGetCString(), DatumGetPointer(), DirectFunctionCall1, FunctionCall2Coll(), len, nameout(), palloc(), pfree(), pg_database_encoding_character_incrementer(), pg_mbcliplen(), pg_newlocale_from_collation(), PointerGetDatum(), SET_VARSIZE, string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA, VARDATA_ANY, VARHDRSZ, VARSIZE_ANY_EXHDR, and varstr_cmp().

Referenced by match_pattern_prefix(), and prefix_selectivity().

◆ match_pattern_prefix()

static List * match_pattern_prefix ( Node leftop,
Node rightop,
Pattern_Type  ptype,
Oid  expr_coll,
Oid  opfamily,
Oid  indexcollation 
)
static

Definition at line 241 of file like_support.c.

247 {
248  List *result;
249  Const *patt;
250  Const *prefix;
251  Pattern_Prefix_Status pstatus;
252  Oid ldatatype;
253  Oid rdatatype;
254  Oid eqopr;
255  Oid ltopr;
256  Oid geopr;
257  Oid preopr = InvalidOid;
258  bool collation_aware;
259  Expr *expr;
260  FmgrInfo ltproc;
261  Const *greaterstr;
262 
263  /*
264  * Can't do anything with a non-constant or NULL pattern argument.
265  *
266  * Note that since we restrict ourselves to cases with a hard constant on
267  * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
268  * about verifying that.
269  */
270  if (!IsA(rightop, Const) ||
271  ((Const *) rightop)->constisnull)
272  return NIL;
273  patt = (Const *) rightop;
274 
275  /*
276  * Try to extract a fixed prefix from the pattern.
277  */
278  pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
279  &prefix, NULL);
280 
281  /* fail if no fixed prefix */
282  if (pstatus == Pattern_Prefix_None)
283  return NIL;
284 
285  /*
286  * Identify the operators we want to use, based on the type of the
287  * left-hand argument. Usually these are just the type's regular
288  * comparison operators, but if we are considering one of the semi-legacy
289  * "pattern" opclasses, use the "pattern" operators instead. Those are
290  * not collation-sensitive but always use C collation, as we want. The
291  * selected operators also determine the needed type of the prefix
292  * constant.
293  */
294  ldatatype = exprType(leftop);
295  switch (ldatatype)
296  {
297  case TEXTOID:
298  if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
299  {
300  eqopr = TextEqualOperator;
301  ltopr = TextPatternLessOperator;
302  geopr = TextPatternGreaterEqualOperator;
303  collation_aware = false;
304  }
305  else if (opfamily == TEXT_SPGIST_FAM_OID)
306  {
307  eqopr = TextEqualOperator;
308  ltopr = TextPatternLessOperator;
309  geopr = TextPatternGreaterEqualOperator;
310  /* This opfamily has direct support for prefixing */
311  preopr = TextPrefixOperator;
312  collation_aware = false;
313  }
314  else
315  {
316  eqopr = TextEqualOperator;
317  ltopr = TextLessOperator;
318  geopr = TextGreaterEqualOperator;
319  collation_aware = true;
320  }
321  rdatatype = TEXTOID;
322  break;
323  case NAMEOID:
324 
325  /*
326  * Note that here, we need the RHS type to be text, so that the
327  * comparison value isn't improperly truncated to NAMEDATALEN.
328  */
329  eqopr = NameEqualTextOperator;
330  ltopr = NameLessTextOperator;
331  geopr = NameGreaterEqualTextOperator;
332  collation_aware = true;
333  rdatatype = TEXTOID;
334  break;
335  case BPCHAROID:
336  if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
337  {
338  eqopr = BpcharEqualOperator;
339  ltopr = BpcharPatternLessOperator;
340  geopr = BpcharPatternGreaterEqualOperator;
341  collation_aware = false;
342  }
343  else
344  {
345  eqopr = BpcharEqualOperator;
346  ltopr = BpcharLessOperator;
347  geopr = BpcharGreaterEqualOperator;
348  collation_aware = true;
349  }
350  rdatatype = BPCHAROID;
351  break;
352  case BYTEAOID:
353  eqopr = ByteaEqualOperator;
354  ltopr = ByteaLessOperator;
355  geopr = ByteaGreaterEqualOperator;
356  collation_aware = false;
357  rdatatype = BYTEAOID;
358  break;
359  default:
360  /* Can't get here unless we're attached to the wrong operator */
361  return NIL;
362  }
363 
364  /*
365  * If necessary, coerce the prefix constant to the right type. The given
366  * prefix constant is either text or bytea type, therefore the only case
367  * where we need to do anything is when converting text to bpchar. Those
368  * two types are binary-compatible, so relabeling the Const node is
369  * sufficient.
370  */
371  if (prefix->consttype != rdatatype)
372  {
373  Assert(prefix->consttype == TEXTOID &&
374  rdatatype == BPCHAROID);
375  prefix->consttype = rdatatype;
376  }
377 
378  /*
379  * If we found an exact-match pattern, generate an "=" indexqual.
380  *
381  * Here and below, check to see whether the desired operator is actually
382  * supported by the index opclass, and fail quietly if not. This allows
383  * us to not be concerned with specific opclasses (except for the legacy
384  * "pattern" cases); any index that correctly implements the operators
385  * will work.
386  */
387  if (pstatus == Pattern_Prefix_Exact)
388  {
389  if (!op_in_opfamily(eqopr, opfamily))
390  return NIL;
391  if (indexcollation != expr_coll)
392  return NIL;
393  expr = make_opclause(eqopr, BOOLOID, false,
394  (Expr *) leftop, (Expr *) prefix,
395  InvalidOid, indexcollation);
396  result = list_make1(expr);
397  return result;
398  }
399 
400  /*
401  * Anything other than Pattern_Prefix_Exact is not supported if the
402  * expression collation is nondeterministic. The optimized equality or
403  * prefix tests use bytewise comparisons, which is not consistent with
404  * nondeterministic collations.
405  *
406  * expr_coll is not set for a non-collation-aware data type such as bytea.
407  */
408  if (expr_coll && !get_collation_isdeterministic(expr_coll))
409  return NIL;
410 
411  /*
412  * Otherwise, we have a nonempty required prefix of the values. Some
413  * opclasses support prefix checks directly, otherwise we'll try to
414  * generate a range constraint.
415  */
416  if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
417  {
418  expr = make_opclause(preopr, BOOLOID, false,
419  (Expr *) leftop, (Expr *) prefix,
420  InvalidOid, indexcollation);
421  result = list_make1(expr);
422  return result;
423  }
424 
425  /*
426  * Since we need a range constraint, it's only going to work reliably if
427  * the index is collation-insensitive or has "C" collation. Note that
428  * here we are looking at the index's collation, not the expression's
429  * collation -- this test is *not* dependent on the LIKE/regex operator's
430  * collation.
431  */
432  if (collation_aware &&
433  !pg_newlocale_from_collation(indexcollation)->collate_is_c)
434  return NIL;
435 
436  /*
437  * We can always say "x >= prefix".
438  */
439  if (!op_in_opfamily(geopr, opfamily))
440  return NIL;
441  expr = make_opclause(geopr, BOOLOID, false,
442  (Expr *) leftop, (Expr *) prefix,
443  InvalidOid, indexcollation);
444  result = list_make1(expr);
445 
446  /*-------
447  * If we can create a string larger than the prefix, we can say
448  * "x < greaterstr". NB: we rely on make_greater_string() to generate
449  * a guaranteed-greater string, not just a probably-greater string.
450  * In general this is only guaranteed in C locale, so we'd better be
451  * using a C-locale index collation.
452  *-------
453  */
454  if (!op_in_opfamily(ltopr, opfamily))
455  return result;
456  fmgr_info(get_opcode(ltopr), &ltproc);
457  greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
458  if (greaterstr)
459  {
460  expr = make_opclause(ltopr, BOOLOID, false,
461  (Expr *) leftop, (Expr *) greaterstr,
462  InvalidOid, indexcollation);
463  result = lappend(result, expr);
464  }
465 
466  return result;
467 }
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
static Const * make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1285
bool get_collation_isdeterministic(Oid colloid)
Definition: lsyscache.c:1054
bool op_in_opfamily(Oid opno, Oid opfamily)
Definition: lsyscache.c:66
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: makefuncs.c:651
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
Definition: fmgr.h:57
Definition: pg_list.h:54

References Assert, Const::consttype, exprType(), fmgr_info(), get_collation_isdeterministic(), get_opcode(), InvalidOid, IsA, lappend(), list_make1, make_greater_string(), make_opclause(), NIL, OidIsValid, op_in_opfamily(), pattern_fixed_prefix(), Pattern_Prefix_Exact, Pattern_Prefix_None, and pg_newlocale_from_collation().

Referenced by like_regex_support().

◆ nlikejoinsel()

Datum nlikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 945 of file like_support.c.

946 {
948 }

References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ nlikesel()

Datum nlikesel ( PG_FUNCTION_ARGS  )

Definition at line 854 of file like_support.c.

855 {
857 }

References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.

◆ pattern_char_isalpha()

static int pattern_char_isalpha ( char  c,
bool  is_multibyte,
pg_locale_t  locale 
)
static

Definition at line 1493 of file like_support.c.

1495 {
1496  if (locale->ctype_is_c)
1497  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1498  else if (is_multibyte && IS_HIGHBIT_SET(c))
1499  return true;
1500  else if (locale->provider != COLLPROVIDER_LIBC)
1501  return IS_HIGHBIT_SET(c) ||
1502  (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1503  else
1504  return isalpha_l((unsigned char) c, locale->info.lt);
1505 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1109
char * c
#define isalpha_l
Definition: win32_port.h:449

References IS_HIGHBIT_SET, isalpha_l, and locale.

Referenced by like_fixed_prefix().

◆ pattern_fixed_prefix()

static Pattern_Prefix_Status pattern_fixed_prefix ( Const patt,
Pattern_Type  ptype,
Oid  collation,
Const **  prefix,
Selectivity rest_selec 
)
static

Definition at line 1159 of file like_support.c.

1161 {
1162  Pattern_Prefix_Status result;
1163 
1164  switch (ptype)
1165  {
1166  case Pattern_Type_Like:
1167  result = like_fixed_prefix(patt, false, collation,
1168  prefix, rest_selec);
1169  break;
1170  case Pattern_Type_Like_IC:
1171  result = like_fixed_prefix(patt, true, collation,
1172  prefix, rest_selec);
1173  break;
1174  case Pattern_Type_Regex:
1175  result = regex_fixed_prefix(patt, false, collation,
1176  prefix, rest_selec);
1177  break;
1178  case Pattern_Type_Regex_IC:
1179  result = regex_fixed_prefix(patt, true, collation,
1180  prefix, rest_selec);
1181  break;
1182  case Pattern_Type_Prefix:
1183  /* Prefix type work is trivial. */
1184  result = Pattern_Prefix_Partial;
1185  *prefix = makeConst(patt->consttype,
1186  patt->consttypmod,
1187  patt->constcollid,
1188  patt->constlen,
1189  datumCopy(patt->constvalue,
1190  patt->constbyval,
1191  patt->constlen),
1192  patt->constisnull,
1193  patt->constbyval);
1194  if (rest_selec != NULL)
1195  *rest_selec = 1.0; /* all */
1196  break;
1197  default:
1198  elog(ERROR, "unrecognized ptype: %d", (int) ptype);
1199  result = Pattern_Prefix_None; /* keep compiler quiet */
1200  break;
1201  }
1202  return result;
1203 }
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
#define elog(elevel,...)
Definition: elog.h:225
static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Definition: like_support.c:989
static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:301

References Const::consttype, datumCopy(), elog, ERROR, like_fixed_prefix(), makeConst(), Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Prefix, Pattern_Type_Regex, Pattern_Type_Regex_IC, and regex_fixed_prefix().

Referenced by match_pattern_prefix(), and patternsel_common().

◆ patternjoinsel()

static double patternjoinsel ( PG_FUNCTION_ARGS  ,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 872 of file like_support.c.

873 {
874  /* For the moment we just punt. */
875  return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
876 }

References DEFAULT_MATCH_SEL.

Referenced by iclikejoinsel(), icnlikejoinsel(), icregexeqjoinsel(), icregexnejoinsel(), likejoinsel(), nlikejoinsel(), prefixjoinsel(), regexeqjoinsel(), and regexnejoinsel().

◆ patternsel()

static double patternsel ( PG_FUNCTION_ARGS  ,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 757 of file like_support.c.

758 {
760  Oid operator = PG_GETARG_OID(1);
761  List *args = (List *) PG_GETARG_POINTER(2);
762  int varRelid = PG_GETARG_INT32(3);
763  Oid collation = PG_GET_COLLATION();
764 
765  /*
766  * If this is for a NOT LIKE or similar operator, get the corresponding
767  * positive-match operator and work with that.
768  */
769  if (negate)
770  {
771  operator = get_negator(operator);
772  if (!OidIsValid(operator))
773  elog(ERROR, "patternsel called for operator without a negator");
774  }
775 
776  return patternsel_common(root,
777  operator,
778  InvalidOid,
779  args,
780  varRelid,
781  collation,
782  ptype,
783  negate);
784 }
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
Oid get_negator(Oid opno)
Definition: lsyscache.c:1533
tree ctl root
Definition: radixtree.h:1888

References generate_unaccent_rules::args, elog, ERROR, get_negator(), InvalidOid, OidIsValid, patternsel_common(), PG_GET_COLLATION, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, and root.

Referenced by iclikesel(), icnlikesel(), icregexeqsel(), icregexnesel(), likesel(), nlikesel(), prefixsel(), regexeqsel(), and regexnesel().

◆ patternsel_common()

static double patternsel_common ( PlannerInfo root,
Oid  oprid,
Oid  opfuncid,
List args,
int  varRelid,
Oid  collation,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 483 of file like_support.c.

491 {
492  VariableStatData vardata;
493  Node *other;
494  bool varonleft;
495  Datum constval;
496  Oid consttype;
497  Oid vartype;
498  Oid rdatatype;
499  Oid eqopr;
500  Oid ltopr;
501  Oid geopr;
502  Pattern_Prefix_Status pstatus;
503  Const *patt;
504  Const *prefix = NULL;
505  Selectivity rest_selec = 0;
506  double nullfrac = 0.0;
507  double result;
508 
509  /*
510  * Initialize result to the appropriate default estimate depending on
511  * whether it's a match or not-match operator.
512  */
513  if (negate)
514  result = 1.0 - DEFAULT_MATCH_SEL;
515  else
516  result = DEFAULT_MATCH_SEL;
517 
518  /*
519  * If expression is not variable op constant, then punt and return the
520  * default estimate.
521  */
522  if (!get_restriction_variable(root, args, varRelid,
523  &vardata, &other, &varonleft))
524  return result;
525  if (!varonleft || !IsA(other, Const))
526  {
527  ReleaseVariableStats(vardata);
528  return result;
529  }
530 
531  /*
532  * If the constant is NULL, assume operator is strict and return zero, ie,
533  * operator will never return TRUE. (It's zero even for a negator op.)
534  */
535  if (((Const *) other)->constisnull)
536  {
537  ReleaseVariableStats(vardata);
538  return 0.0;
539  }
540  constval = ((Const *) other)->constvalue;
541  consttype = ((Const *) other)->consttype;
542 
543  /*
544  * The right-hand const is type text or bytea for all supported operators.
545  * We do not expect to see binary-compatible types here, since
546  * const-folding should have relabeled the const to exactly match the
547  * operator's declared type.
548  */
549  if (consttype != TEXTOID && consttype != BYTEAOID)
550  {
551  ReleaseVariableStats(vardata);
552  return result;
553  }
554 
555  /*
556  * Similarly, the exposed type of the left-hand side should be one of
557  * those we know. (Do not look at vardata.atttype, which might be
558  * something binary-compatible but different.) We can use it to identify
559  * the comparison operators and the required type of the comparison
560  * constant, much as in match_pattern_prefix().
561  */
562  vartype = vardata.vartype;
563 
564  switch (vartype)
565  {
566  case TEXTOID:
567  eqopr = TextEqualOperator;
568  ltopr = TextLessOperator;
569  geopr = TextGreaterEqualOperator;
570  rdatatype = TEXTOID;
571  break;
572  case NAMEOID:
573 
574  /*
575  * Note that here, we need the RHS type to be text, so that the
576  * comparison value isn't improperly truncated to NAMEDATALEN.
577  */
578  eqopr = NameEqualTextOperator;
579  ltopr = NameLessTextOperator;
580  geopr = NameGreaterEqualTextOperator;
581  rdatatype = TEXTOID;
582  break;
583  case BPCHAROID:
584  eqopr = BpcharEqualOperator;
585  ltopr = BpcharLessOperator;
586  geopr = BpcharGreaterEqualOperator;
587  rdatatype = BPCHAROID;
588  break;
589  case BYTEAOID:
590  eqopr = ByteaEqualOperator;
591  ltopr = ByteaLessOperator;
592  geopr = ByteaGreaterEqualOperator;
593  rdatatype = BYTEAOID;
594  break;
595  default:
596  /* Can't get here unless we're attached to the wrong operator */
597  ReleaseVariableStats(vardata);
598  return result;
599  }
600 
601  /*
602  * Grab the nullfrac for use below.
603  */
604  if (HeapTupleIsValid(vardata.statsTuple))
605  {
606  Form_pg_statistic stats;
607 
608  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
609  nullfrac = stats->stanullfrac;
610  }
611 
612  /*
613  * Pull out any fixed prefix implied by the pattern, and estimate the
614  * fractional selectivity of the remainder of the pattern. Unlike many
615  * other selectivity estimators, we use the pattern operator's actual
616  * collation for this step. This is not because we expect the collation
617  * to make a big difference in the selectivity estimate (it seldom would),
618  * but because we want to be sure we cache compiled regexps under the
619  * right cache key, so that they can be re-used at runtime.
620  */
621  patt = (Const *) other;
622  pstatus = pattern_fixed_prefix(patt, ptype, collation,
623  &prefix, &rest_selec);
624 
625  /*
626  * If necessary, coerce the prefix constant to the right type. The only
627  * case where we need to do anything is when converting text to bpchar.
628  * Those two types are binary-compatible, so relabeling the Const node is
629  * sufficient.
630  */
631  if (prefix && prefix->consttype != rdatatype)
632  {
633  Assert(prefix->consttype == TEXTOID &&
634  rdatatype == BPCHAROID);
635  prefix->consttype = rdatatype;
636  }
637 
638  if (pstatus == Pattern_Prefix_Exact)
639  {
640  /*
641  * Pattern specifies an exact match, so estimate as for '='
642  */
643  result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
644  false, true, false);
645  }
646  else
647  {
648  /*
649  * Not exact-match pattern. If we have a sufficiently large
650  * histogram, estimate selectivity for the histogram part of the
651  * population by counting matches in the histogram. If not, estimate
652  * selectivity of the fixed prefix and remainder of pattern
653  * separately, then combine the two to get an estimate of the
654  * selectivity for the part of the column population represented by
655  * the histogram. (For small histograms, we combine these
656  * approaches.)
657  *
658  * We then add up data for any most-common-values values; these are
659  * not in the histogram population, and we can get exact answers for
660  * them by applying the pattern operator, so there's no reason to
661  * approximate. (If the MCVs cover a significant part of the total
662  * population, this gives us a big leg up in accuracy.)
663  */
664  Selectivity selec;
665  int hist_size;
666  FmgrInfo opproc;
667  double mcv_selec,
668  sumcommon;
669 
670  /* Try to use the histogram entries to get selectivity */
671  if (!OidIsValid(opfuncid))
672  opfuncid = get_opcode(oprid);
673  fmgr_info(opfuncid, &opproc);
674 
675  selec = histogram_selectivity(&vardata, &opproc, collation,
676  constval, true,
677  10, 1, &hist_size);
678 
679  /* If not at least 100 entries, use the heuristic method */
680  if (hist_size < 100)
681  {
682  Selectivity heursel;
684 
685  if (pstatus == Pattern_Prefix_Partial)
686  prefixsel = prefix_selectivity(root, &vardata,
687  eqopr, ltopr, geopr,
688  collation,
689  prefix);
690  else
691  prefixsel = 1.0;
692  heursel = prefixsel * rest_selec;
693 
694  if (selec < 0) /* fewer than 10 histogram entries? */
695  selec = heursel;
696  else
697  {
698  /*
699  * For histogram sizes from 10 to 100, we combine the
700  * histogram and heuristic selectivities, putting increasingly
701  * more trust in the histogram for larger sizes.
702  */
703  double hist_weight = hist_size / 100.0;
704 
705  selec = selec * hist_weight + heursel * (1.0 - hist_weight);
706  }
707  }
708 
709  /* In any case, don't believe extremely small or large estimates. */
710  if (selec < 0.0001)
711  selec = 0.0001;
712  else if (selec > 0.9999)
713  selec = 0.9999;
714 
715  /*
716  * If we have most-common-values info, add up the fractions of the MCV
717  * entries that satisfy MCV OP PATTERN. These fractions contribute
718  * directly to the result selectivity. Also add up the total fraction
719  * represented by MCV entries.
720  */
721  mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
722  constval, true,
723  &sumcommon);
724 
725  /*
726  * Now merge the results from the MCV and histogram calculations,
727  * realizing that the histogram covers only the non-null values that
728  * are not listed in MCV.
729  */
730  selec *= 1.0 - nullfrac - sumcommon;
731  selec += mcv_selec;
732  result = selec;
733  }
734 
735  /* now adjust if we wanted not-match rather than match */
736  if (negate)
737  result = 1.0 - result - nullfrac;
738 
739  /* result should be in range, but make sure... */
740  CLAMP_PROBABILITY(result);
741 
742  if (prefix)
743  {
744  pfree(DatumGetPointer(prefix->constvalue));
745  pfree(prefix);
746  }
747 
748  ReleaseVariableStats(vardata);
749 
750  return result;
751 }
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
Datum prefixsel(PG_FUNCTION_ARGS)
Definition: like_support.c:817
Oid oprid(Operator op)
Definition: parse_oper.c:238
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4894
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:295
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:732
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:823
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
HeapTuple statsTuple
Definition: selfuncs.h:89

References generate_unaccent_rules::args, Assert, CLAMP_PROBABILITY, Const::consttype, DatumGetPointer(), DEFAULT_MATCH_SEL, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT, HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), OidIsValid, oprid(), pattern_fixed_prefix(), Pattern_Prefix_Exact, Pattern_Prefix_Partial, pfree(), prefix_selectivity(), prefixsel(), ReleaseVariableStats, root, VariableStatData::statsTuple, var_eq_const(), and VariableStatData::vartype.

Referenced by like_regex_support(), and patternsel().

◆ prefix_selectivity()

static Selectivity prefix_selectivity ( PlannerInfo root,
VariableStatData vardata,
Oid  eqopr,
Oid  ltopr,
Oid  geopr,
Oid  collation,
Const prefixcon 
)
static

Definition at line 1224 of file like_support.c.

1228 {
1230  FmgrInfo opproc;
1231  Const *greaterstrcon;
1232  Selectivity eq_sel;
1233 
1234  /* Estimate the selectivity of "x >= prefix" */
1235  fmgr_info(get_opcode(geopr), &opproc);
1236 
1238  geopr, &opproc, true, true,
1239  collation,
1240  prefixcon->constvalue,
1241  prefixcon->consttype);
1242 
1243  if (prefixsel < 0.0)
1244  {
1245  /* No histogram is present ... return a suitable default estimate */
1246  return DEFAULT_MATCH_SEL;
1247  }
1248 
1249  /*
1250  * If we can create a string larger than the prefix, say "x < greaterstr".
1251  */
1252  fmgr_info(get_opcode(ltopr), &opproc);
1253  greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
1254  if (greaterstrcon)
1255  {
1256  Selectivity topsel;
1257 
1258  topsel = ineq_histogram_selectivity(root, vardata,
1259  ltopr, &opproc, false, false,
1260  collation,
1261  greaterstrcon->constvalue,
1262  greaterstrcon->consttype);
1263 
1264  /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1265  Assert(topsel >= 0.0);
1266 
1267  /*
1268  * Merge the two selectivities in the same way as for a range query
1269  * (see clauselist_selectivity()). Note that we don't need to worry
1270  * about double-exclusion of nulls, since ineq_histogram_selectivity
1271  * doesn't count those anyway.
1272  */
1273  prefixsel = topsel + prefixsel - 1.0;
1274  }
1275 
1276  /*
1277  * If the prefix is long then the two bounding values might be too close
1278  * together for the histogram to distinguish them usefully, resulting in a
1279  * zero estimate (plus or minus roundoff error). To avoid returning a
1280  * ridiculously small estimate, compute the estimated selectivity for
1281  * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1282  * estimate should be at least that.)
1283  *
1284  * We apply this even if we couldn't make a greater string. That case
1285  * suggests that the prefix is near the maximum possible, and thus
1286  * probably off the end of the histogram, and thus we probably got a very
1287  * small estimate from the >= condition; so we still need to clamp.
1288  */
1289  eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1290  false, true, false);
1291 
1292  prefixsel = Max(prefixsel, eq_sel);
1293 
1294  return prefixsel;
1295 }
#define Max(x, y)
Definition: c.h:952
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
Definition: selfuncs.c:1041

References Assert, Const::consttype, DEFAULT_MATCH_SEL, fmgr_info(), get_opcode(), ineq_histogram_selectivity(), make_greater_string(), Max, prefixsel(), root, and var_eq_const().

Referenced by patternsel_common().

◆ prefixjoinsel()

Datum prefixjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 909 of file like_support.c.

910 {
912 }

References Pattern_Type_Prefix, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ prefixsel()

Datum prefixsel ( PG_FUNCTION_ARGS  )

Definition at line 817 of file like_support.c.

818 {
820 }

References Pattern_Type_Prefix, patternsel(), and PG_RETURN_FLOAT8.

Referenced by patternsel_common(), prefix_selectivity(), and regex_selectivity().

◆ regex_fixed_prefix()

static Pattern_Prefix_Status regex_fixed_prefix ( Const patt_const,
bool  case_insensitive,
Oid  collation,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 1091 of file like_support.c.

1093 {
1094  Oid typeid = patt_const->consttype;
1095  char *prefix;
1096  bool exact;
1097 
1098  /*
1099  * Should be unnecessary, there are no bytea regex operators defined. As
1100  * such, it should be noted that the rest of this function has *not* been
1101  * made safe for binary (possibly NULL containing) strings.
1102  */
1103  if (typeid == BYTEAOID)
1104  ereport(ERROR,
1105  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1106  errmsg("regular-expression matching not supported on type bytea")));
1107 
1108  /* Use the regexp machinery to extract the prefix, if any */
1109  prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
1110  case_insensitive, collation,
1111  &exact);
1112 
1113  if (prefix == NULL)
1114  {
1115  *prefix_const = NULL;
1116 
1117  if (rest_selec != NULL)
1118  {
1119  char *patt = TextDatumGetCString(patt_const->constvalue);
1120 
1121  *rest_selec = regex_selectivity(patt, strlen(patt),
1122  case_insensitive,
1123  0);
1124  pfree(patt);
1125  }
1126 
1127  return Pattern_Prefix_None;
1128  }
1129 
1130  *prefix_const = string_to_const(prefix, typeid);
1131 
1132  if (rest_selec != NULL)
1133  {
1134  if (exact)
1135  {
1136  /* Exact match, so there's no additional selectivity */
1137  *rest_selec = 1.0;
1138  }
1139  else
1140  {
1141  char *patt = TextDatumGetCString(patt_const->constvalue);
1142 
1143  *rest_selec = regex_selectivity(patt, strlen(patt),
1144  case_insensitive,
1145  strlen(prefix));
1146  pfree(patt);
1147  }
1148  }
1149 
1150  pfree(prefix);
1151 
1152  if (exact)
1153  return Pattern_Prefix_Exact; /* pattern specifies exact match */
1154  else
1155  return Pattern_Prefix_Partial;
1156 }
#define DatumGetTextPP(X)
Definition: fmgr.h:292
static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact)
Definition: regexp.c:1953

References Const::consttype, DatumGetTextPP, ereport, errcode(), errmsg(), ERROR, Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), regex_selectivity(), regexp_fixed_prefix(), string_to_const(), and TextDatumGetCString.

Referenced by pattern_fixed_prefix().

◆ regex_selectivity()

static Selectivity regex_selectivity ( const char *  patt,
int  pattlen,
bool  case_insensitive,
int  fixed_prefix_len 
)
static

Definition at line 1447 of file like_support.c.

1449 {
1450  Selectivity sel;
1451 
1452  /* If patt doesn't end with $, consider it to have a trailing wildcard */
1453  if (pattlen > 0 && patt[pattlen - 1] == '$' &&
1454  (pattlen == 1 || patt[pattlen - 2] != '\\'))
1455  {
1456  /* has trailing $ */
1457  sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
1458  }
1459  else
1460  {
1461  /* no trailing $ */
1462  sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
1463  sel *= FULL_WILDCARD_SEL;
1464  }
1465 
1466  /*
1467  * If there's a fixed prefix, discount its selectivity. We have to be
1468  * careful here since a very long prefix could result in pow's result
1469  * underflowing to zero (in which case "sel" probably has as well).
1470  */
1471  if (fixed_prefix_len > 0)
1472  {
1473  double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
1474 
1475  if (prefixsel > 0.0)
1476  sel /= prefixsel;
1477  }
1478 
1479  /* Make sure result stays in range */
1480  CLAMP_PROBABILITY(sel);
1481  return sel;
1482 }
static Selectivity regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)

References CLAMP_PROBABILITY, FIXED_CHAR_SEL, FULL_WILDCARD_SEL, prefixsel(), and regex_selectivity_sub().

Referenced by regex_fixed_prefix().

◆ regex_selectivity_sub()

static Selectivity regex_selectivity_sub ( const char *  patt,
int  pattlen,
bool  case_insensitive 
)
static

Definition at line 1352 of file like_support.c.

1353 {
1354  Selectivity sel = 1.0;
1355  int paren_depth = 0;
1356  int paren_pos = 0; /* dummy init to keep compiler quiet */
1357  int pos;
1358 
1359  /* since this function recurses, it could be driven to stack overflow */
1361 
1362  for (pos = 0; pos < pattlen; pos++)
1363  {
1364  if (patt[pos] == '(')
1365  {
1366  if (paren_depth == 0)
1367  paren_pos = pos; /* remember start of parenthesized item */
1368  paren_depth++;
1369  }
1370  else if (patt[pos] == ')' && paren_depth > 0)
1371  {
1372  paren_depth--;
1373  if (paren_depth == 0)
1374  sel *= regex_selectivity_sub(patt + (paren_pos + 1),
1375  pos - (paren_pos + 1),
1376  case_insensitive);
1377  }
1378  else if (patt[pos] == '|' && paren_depth == 0)
1379  {
1380  /*
1381  * If unquoted | is present at paren level 0 in pattern, we have
1382  * multiple alternatives; sum their probabilities.
1383  */
1384  sel += regex_selectivity_sub(patt + (pos + 1),
1385  pattlen - (pos + 1),
1386  case_insensitive);
1387  break; /* rest of pattern is now processed */
1388  }
1389  else if (patt[pos] == '[')
1390  {
1391  bool negclass = false;
1392 
1393  if (patt[++pos] == '^')
1394  {
1395  negclass = true;
1396  pos++;
1397  }
1398  if (patt[pos] == ']') /* ']' at start of class is not special */
1399  pos++;
1400  while (pos < pattlen && patt[pos] != ']')
1401  pos++;
1402  if (paren_depth == 0)
1403  sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
1404  }
1405  else if (patt[pos] == '.')
1406  {
1407  if (paren_depth == 0)
1408  sel *= ANY_CHAR_SEL;
1409  }
1410  else if (patt[pos] == '*' ||
1411  patt[pos] == '?' ||
1412  patt[pos] == '+')
1413  {
1414  /* Ought to be smarter about quantifiers... */
1415  if (paren_depth == 0)
1416  sel *= PARTIAL_WILDCARD_SEL;
1417  }
1418  else if (patt[pos] == '{')
1419  {
1420  while (pos < pattlen && patt[pos] != '}')
1421  pos++;
1422  if (paren_depth == 0)
1423  sel *= PARTIAL_WILDCARD_SEL;
1424  }
1425  else if (patt[pos] == '\\')
1426  {
1427  /* backslash quotes the next character */
1428  pos++;
1429  if (pos >= pattlen)
1430  break;
1431  if (paren_depth == 0)
1432  sel *= FIXED_CHAR_SEL;
1433  }
1434  else
1435  {
1436  if (paren_depth == 0)
1437  sel *= FIXED_CHAR_SEL;
1438  }
1439  }
1440  /* Could get sel > 1 if multiple wildcards */
1441  if (sel > 1.0)
1442  sel = 1.0;
1443  return sel;
1444 }
#define PARTIAL_WILDCARD_SEL
#define CHAR_RANGE_SEL
void check_stack_depth(void)
Definition: postgres.c:3572

References ANY_CHAR_SEL, CHAR_RANGE_SEL, check_stack_depth(), FIXED_CHAR_SEL, and PARTIAL_WILDCARD_SEL.

Referenced by regex_selectivity().

◆ regexeqjoinsel()

Datum regexeqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 882 of file like_support.c.

883 {
885 }

References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ regexeqsel()

Datum regexeqsel ( PG_FUNCTION_ARGS  )

Definition at line 790 of file like_support.c.

791 {
793 }

References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.

◆ regexnejoinsel()

Datum regexnejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 927 of file like_support.c.

928 {
930 }

References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ regexnesel()

Datum regexnesel ( PG_FUNCTION_ARGS  )

Definition at line 836 of file like_support.c.

837 {
839 }

References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.

◆ string_to_bytea_const()

static Const * string_to_bytea_const ( const char *  str,
size_t  str_len 
)
static

Definition at line 1777 of file like_support.c.

1778 {
1779  bytea *bstr = palloc(VARHDRSZ + str_len);
1780  Datum conval;
1781 
1782  memcpy(VARDATA(bstr), str, str_len);
1783  SET_VARSIZE(bstr, VARHDRSZ + str_len);
1784  conval = PointerGetDatum(bstr);
1785 
1786  return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
1787 }
const char * str

References InvalidOid, makeConst(), palloc(), PointerGetDatum(), SET_VARSIZE, str, VARDATA, and VARHDRSZ.

Referenced by like_fixed_prefix(), and make_greater_string().

◆ string_to_const()

static Const * string_to_const ( const char *  str,
Oid  datatype 
)
static

Definition at line 1734 of file like_support.c.

1735 {
1736  Datum conval = string_to_datum(str, datatype);
1737  Oid collation;
1738  int constlen;
1739 
1740  /*
1741  * We only need to support a few datatypes here, so hard-wire properties
1742  * instead of incurring the expense of catalog lookups.
1743  */
1744  switch (datatype)
1745  {
1746  case TEXTOID:
1747  case VARCHAROID:
1748  case BPCHAROID:
1749  collation = DEFAULT_COLLATION_OID;
1750  constlen = -1;
1751  break;
1752 
1753  case NAMEOID:
1754  collation = C_COLLATION_OID;
1755  constlen = NAMEDATALEN;
1756  break;
1757 
1758  case BYTEAOID:
1759  collation = InvalidOid;
1760  constlen = -1;
1761  break;
1762 
1763  default:
1764  elog(ERROR, "unexpected datatype in string_to_const: %u",
1765  datatype);
1766  return NULL;
1767  }
1768 
1769  return makeConst(datatype, -1, collation, constlen,
1770  conval, false, false);
1771 }
static Datum string_to_datum(const char *str, Oid datatype)
#define NAMEDATALEN

References elog, ERROR, InvalidOid, makeConst(), NAMEDATALEN, str, and string_to_datum().

Referenced by like_fixed_prefix(), make_greater_string(), and regex_fixed_prefix().

◆ string_to_datum()

static Datum string_to_datum ( const char *  str,
Oid  datatype 
)
static

Definition at line 1714 of file like_support.c.

1715 {
1716  Assert(str != NULL);
1717 
1718  /*
1719  * We cheat a little by assuming that CStringGetTextDatum() will do for
1720  * bpchar and varchar constants too...
1721  */
1722  if (datatype == NAMEOID)
1724  else if (datatype == BYTEAOID)
1726  else
1727  return CStringGetTextDatum(str);
1728 }
#define CStringGetTextDatum(s)
Definition: builtins.h:97
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:350
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:290

References Assert, byteain(), CStringGetDatum(), CStringGetTextDatum, DirectFunctionCall1, namein(), and str.

Referenced by string_to_const().

◆ text_starts_with_support()

Datum text_starts_with_support ( PG_FUNCTION_ARGS  )

Definition at line 147 of file like_support.c.

148 {
149  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
150 
152 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
static Node * like_regex_support(Node *rawreq, Pattern_Type ptype)
Definition: like_support.c:156

References like_regex_support(), Pattern_Type_Prefix, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ texticlike_support()

Datum texticlike_support ( PG_FUNCTION_ARGS  )

Definition at line 123 of file like_support.c.

124 {
125  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
126 
128 }

References like_regex_support(), Pattern_Type_Like_IC, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ texticregexeq_support()

Datum texticregexeq_support ( PG_FUNCTION_ARGS  )

Definition at line 139 of file like_support.c.

140 {
141  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
142 
144 }

References like_regex_support(), Pattern_Type_Regex_IC, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ textlike_support()

Datum textlike_support ( PG_FUNCTION_ARGS  )

Definition at line 115 of file like_support.c.

116 {
117  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
118 
120 }

References like_regex_support(), Pattern_Type_Like, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ textregexeq_support()

Datum textregexeq_support ( PG_FUNCTION_ARGS  )

Definition at line 131 of file like_support.c.

132 {
133  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
134 
136 }

References like_regex_support(), Pattern_Type_Regex, PG_GETARG_POINTER, and PG_RETURN_POINTER.