PostgreSQL Source Code  git master
like_support.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
#include "utils/varlena.h"
Include dependency graph for like_support.c:

Go to the source code of this file.

Macros

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */
 
#define CHAR_RANGE_SEL   0.25
 
#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */
 
#define FULL_WILDCARD_SEL   5.0
 
#define PARTIAL_WILDCARD_SEL   2.0
 

Enumerations

enum  Pattern_Type {
  Pattern_Type_Like , Pattern_Type_Like_IC , Pattern_Type_Regex , Pattern_Type_Regex_IC ,
  Pattern_Type_Prefix
}
 
enum  Pattern_Prefix_Status { Pattern_Prefix_None , Pattern_Prefix_Partial , Pattern_Prefix_Exact }
 

Functions

static Nodelike_regex_support (Node *rawreq, Pattern_Type ptype)
 
static Listmatch_pattern_prefix (Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
 
static double patternsel_common (PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
 
static Pattern_Prefix_Status pattern_fixed_prefix (Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
 
static Selectivity prefix_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
 
static Selectivity like_selectivity (const char *patt, int pattlen, bool case_insensitive)
 
static Selectivity regex_selectivity (const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
 
static int pattern_char_isalpha (char c, bool is_multibyte, pg_locale_t locale)
 
static Constmake_greater_string (const Const *str_const, FmgrInfo *ltproc, Oid collation)
 
static Datum string_to_datum (const char *str, Oid datatype)
 
static Conststring_to_const (const char *str, Oid datatype)
 
static Conststring_to_bytea_const (const char *str, size_t str_len)
 
Datum textlike_support (PG_FUNCTION_ARGS)
 
Datum texticlike_support (PG_FUNCTION_ARGS)
 
Datum textregexeq_support (PG_FUNCTION_ARGS)
 
Datum texticregexeq_support (PG_FUNCTION_ARGS)
 
Datum text_starts_with_support (PG_FUNCTION_ARGS)
 
static double patternsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqsel (PG_FUNCTION_ARGS)
 
Datum icregexeqsel (PG_FUNCTION_ARGS)
 
Datum likesel (PG_FUNCTION_ARGS)
 
Datum prefixsel (PG_FUNCTION_ARGS)
 
Datum iclikesel (PG_FUNCTION_ARGS)
 
Datum regexnesel (PG_FUNCTION_ARGS)
 
Datum icregexnesel (PG_FUNCTION_ARGS)
 
Datum nlikesel (PG_FUNCTION_ARGS)
 
Datum icnlikesel (PG_FUNCTION_ARGS)
 
static double patternjoinsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum icregexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum likejoinsel (PG_FUNCTION_ARGS)
 
Datum prefixjoinsel (PG_FUNCTION_ARGS)
 
Datum iclikejoinsel (PG_FUNCTION_ARGS)
 
Datum regexnejoinsel (PG_FUNCTION_ARGS)
 
Datum icregexnejoinsel (PG_FUNCTION_ARGS)
 
Datum nlikejoinsel (PG_FUNCTION_ARGS)
 
Datum icnlikejoinsel (PG_FUNCTION_ARGS)
 
static Pattern_Prefix_Status like_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Pattern_Prefix_Status regex_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Selectivity regex_selectivity_sub (const char *patt, int pattlen, bool case_insensitive)
 
static bool byte_increment (unsigned char *ptr, int len)
 

Macro Definition Documentation

◆ ANY_CHAR_SEL

#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */

Definition at line 1313 of file like_support.c.

◆ CHAR_RANGE_SEL

#define CHAR_RANGE_SEL   0.25

Definition at line 1312 of file like_support.c.

◆ FIXED_CHAR_SEL

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */

Definition at line 1311 of file like_support.c.

◆ FULL_WILDCARD_SEL

#define FULL_WILDCARD_SEL   5.0

Definition at line 1314 of file like_support.c.

◆ PARTIAL_WILDCARD_SEL

#define PARTIAL_WILDCARD_SEL   2.0

Definition at line 1315 of file like_support.c.

Enumeration Type Documentation

◆ Pattern_Prefix_Status

Enumerator
Pattern_Prefix_None 
Pattern_Prefix_Partial 
Pattern_Prefix_Exact 

Definition at line 67 of file like_support.c.

68 {
Pattern_Prefix_Status
Definition: like_support.c:68
@ Pattern_Prefix_Partial
Definition: like_support.c:69
@ Pattern_Prefix_None
Definition: like_support.c:69
@ Pattern_Prefix_Exact
Definition: like_support.c:69

◆ Pattern_Type

Enumerator
Pattern_Type_Like 
Pattern_Type_Like_IC 
Pattern_Type_Regex 
Pattern_Type_Regex_IC 
Pattern_Type_Prefix 

Definition at line 58 of file like_support.c.

59 {
65 } Pattern_Type;
Pattern_Type
Definition: like_support.c:59
@ Pattern_Type_Prefix
Definition: like_support.c:64
@ Pattern_Type_Regex_IC
Definition: like_support.c:63
@ Pattern_Type_Like
Definition: like_support.c:60
@ Pattern_Type_Regex
Definition: like_support.c:62
@ Pattern_Type_Like_IC
Definition: like_support.c:61

Function Documentation

◆ byte_increment()

static bool byte_increment ( unsigned char *  ptr,
int  len 
)
static

Definition at line 1518 of file like_support.c.

1519 {
1520  if (*ptr >= 255)
1521  return false;
1522  (*ptr)++;
1523  return true;
1524 }

Referenced by make_greater_string().

◆ iclikejoinsel()

Datum iclikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 921 of file like_support.c.

922 {
924 }
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
static double patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:875

References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ iclikesel()

Datum iclikesel ( PG_FUNCTION_ARGS  )

Definition at line 830 of file like_support.c.

831 {
833 }
static double patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Definition: like_support.c:760

References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icnlikejoinsel()

Datum icnlikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 957 of file like_support.c.

958 {
960 }

References Pattern_Type_Like_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icnlikesel()

Datum icnlikesel ( PG_FUNCTION_ARGS  )

Definition at line 866 of file like_support.c.

867 {
869 }

References Pattern_Type_Like_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icregexeqjoinsel()

Datum icregexeqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 894 of file like_support.c.

895 {
897 }

References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icregexeqsel()

Datum icregexeqsel ( PG_FUNCTION_ARGS  )

Definition at line 802 of file like_support.c.

803 {
805 }

References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ icregexnejoinsel()

Datum icregexnejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 939 of file like_support.c.

940 {
942 }

References Pattern_Type_Regex_IC, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ icregexnesel()

Datum icregexnesel ( PG_FUNCTION_ARGS  )

Definition at line 848 of file like_support.c.

849 {
851 }

References Pattern_Type_Regex_IC, patternsel(), and PG_RETURN_FLOAT8.

◆ like_fixed_prefix()

static Pattern_Prefix_Status like_fixed_prefix ( Const patt_const,
bool  case_insensitive,
Oid  collation,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 992 of file like_support.c.

994 {
995  char *match;
996  char *patt;
997  int pattlen;
998  Oid typeid = patt_const->consttype;
999  int pos,
1000  match_pos;
1001  bool is_multibyte = (pg_database_encoding_max_length() > 1);
1002  pg_locale_t locale = 0;
1003 
1004  /* the right-hand const is type text or bytea */
1005  Assert(typeid == BYTEAOID || typeid == TEXTOID);
1006 
1007  if (case_insensitive)
1008  {
1009  if (typeid == BYTEAOID)
1010  ereport(ERROR,
1011  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1012  errmsg("case insensitive matching not supported on type bytea")));
1013 
1014  if (!OidIsValid(collation))
1015  {
1016  /*
1017  * This typically means that the parser could not resolve a
1018  * conflict of implicit collations, so report it that way.
1019  */
1020  ereport(ERROR,
1021  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1022  errmsg("could not determine which collation to use for ILIKE"),
1023  errhint("Use the COLLATE clause to set the collation explicitly.")));
1024  }
1025 
1026  locale = pg_newlocale_from_collation(collation);
1027  }
1028 
1029  if (typeid != BYTEAOID)
1030  {
1031  patt = TextDatumGetCString(patt_const->constvalue);
1032  pattlen = strlen(patt);
1033  }
1034  else
1035  {
1036  bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
1037 
1038  pattlen = VARSIZE_ANY_EXHDR(bstr);
1039  patt = (char *) palloc(pattlen);
1040  memcpy(patt, VARDATA_ANY(bstr), pattlen);
1041  Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
1042  }
1043 
1044  match = palloc(pattlen + 1);
1045  match_pos = 0;
1046  for (pos = 0; pos < pattlen; pos++)
1047  {
1048  /* % and _ are wildcard characters in LIKE */
1049  if (patt[pos] == '%' ||
1050  patt[pos] == '_')
1051  break;
1052 
1053  /* Backslash escapes the next character */
1054  if (patt[pos] == '\\')
1055  {
1056  pos++;
1057  if (pos >= pattlen)
1058  break;
1059  }
1060 
1061  /* Stop if case-varying character (it's sort of a wildcard) */
1062  if (case_insensitive &&
1063  pattern_char_isalpha(patt[pos], is_multibyte, locale))
1064  break;
1065 
1066  match[match_pos++] = patt[pos];
1067  }
1068 
1069  match[match_pos] = '\0';
1070 
1071  if (typeid != BYTEAOID)
1072  *prefix_const = string_to_const(match, typeid);
1073  else
1074  *prefix_const = string_to_bytea_const(match, match_pos);
1075 
1076  if (rest_selec != NULL)
1077  *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
1078  case_insensitive);
1079 
1080  pfree(patt);
1081  pfree(match);
1082 
1083  /* in LIKE, an empty pattern is an exact match! */
1084  if (pos == pattlen)
1085  return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
1086 
1087  if (match_pos > 0)
1088  return Pattern_Prefix_Partial;
1089 
1090  return Pattern_Prefix_None;
1091 }
#define TextDatumGetCString(d)
Definition: builtins.h:98
char * Pointer
Definition: c.h:483
#define Assert(condition)
Definition: c.h:858
#define OidIsValid(objectId)
Definition: c.h:775
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
static char * locale
Definition: initdb.c:140
static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive)
static int pattern_char_isalpha(char c, bool is_multibyte, pg_locale_t locale)
static Const * string_to_const(const char *str, Oid datatype)
static Const * string_to_bytea_const(const char *str, size_t str_len)
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1510
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
unsigned int Oid
Definition: postgres_ext.h:31
Oid consttype
Definition: primnodes.h:312
Definition: c.h:687
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References Assert, Const::consttype, DatumGetByteaPP, DatumGetPointer(), ereport, errcode(), errhint(), errmsg(), ERROR, like_selectivity(), locale, OidIsValid, palloc(), pattern_char_isalpha(), Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), pg_database_encoding_max_length(), pg_newlocale_from_collation(), string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pattern_fixed_prefix().

◆ like_regex_support()

static Node * like_regex_support ( Node rawreq,
Pattern_Type  ptype 
)
static

Definition at line 156 of file like_support.c.

157 {
158  Node *ret = NULL;
159 
160  if (IsA(rawreq, SupportRequestSelectivity))
161  {
162  /*
163  * Make a selectivity estimate for a function call, just as we'd do if
164  * the call was via the corresponding operator.
165  */
167  Selectivity s1;
168 
169  if (req->is_join)
170  {
171  /*
172  * For the moment we just punt. If patternjoinsel is ever
173  * improved to do better, this should be made to call it.
174  */
176  }
177  else
178  {
179  /* Share code with operator restriction selectivity functions */
180  s1 = patternsel_common(req->root,
181  InvalidOid,
182  req->funcid,
183  req->args,
184  req->varRelid,
185  req->inputcollid,
186  ptype,
187  false);
188  }
189  req->selectivity = s1;
190  ret = (Node *) req;
191  }
192  else if (IsA(rawreq, SupportRequestIndexCondition))
193  {
194  /* Try to convert operator/function call to index conditions */
196 
197  /*
198  * Currently we have no "reverse" match operators with the pattern on
199  * the left, so we only need consider cases with the indexkey on the
200  * left.
201  */
202  if (req->indexarg != 0)
203  return NULL;
204 
205  if (is_opclause(req->node))
206  {
207  OpExpr *clause = (OpExpr *) req->node;
208 
209  Assert(list_length(clause->args) == 2);
210  ret = (Node *)
211  match_pattern_prefix((Node *) linitial(clause->args),
212  (Node *) lsecond(clause->args),
213  ptype,
214  clause->inputcollid,
215  req->opfamily,
216  req->indexcollation);
217  }
218  else if (is_funcclause(req->node)) /* be paranoid */
219  {
220  FuncExpr *clause = (FuncExpr *) req->node;
221 
222  Assert(list_length(clause->args) == 2);
223  ret = (Node *)
224  match_pattern_prefix((Node *) linitial(clause->args),
225  (Node *) lsecond(clause->args),
226  ptype,
227  clause->inputcollid,
228  req->opfamily,
229  req->indexcollation);
230  }
231  }
232 
233  return ret;
234 }
static List * match_pattern_prefix(Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
Definition: like_support.c:241
static double patternsel_common(PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
Definition: like_support.c:486
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
#define InvalidOid
Definition: postgres_ext.h:36
char * s1
#define DEFAULT_MATCH_SEL
Definition: selfuncs.h:46
List * args
Definition: primnodes.h:768
Definition: nodes.h:129
List * args
Definition: primnodes.h:836
struct PlannerInfo * root
Definition: supportnodes.h:96

References FuncExpr::args, OpExpr::args, SupportRequestSelectivity::args, Assert, DEFAULT_MATCH_SEL, SupportRequestSelectivity::funcid, SupportRequestIndexCondition::indexarg, SupportRequestIndexCondition::indexcollation, SupportRequestSelectivity::inputcollid, InvalidOid, is_funcclause(), SupportRequestSelectivity::is_join, is_opclause(), IsA, linitial, list_length(), lsecond, match_pattern_prefix(), SupportRequestIndexCondition::node, SupportRequestIndexCondition::opfamily, patternsel_common(), SupportRequestSelectivity::root, s1, SupportRequestSelectivity::selectivity, and SupportRequestSelectivity::varRelid.

Referenced by text_starts_with_support(), texticlike_support(), texticregexeq_support(), textlike_support(), and textregexeq_support().

◆ like_selectivity()

static Selectivity like_selectivity ( const char *  patt,
int  pattlen,
bool  case_insensitive 
)
static

Definition at line 1318 of file like_support.c.

1319 {
1320  Selectivity sel = 1.0;
1321  int pos;
1322 
1323  /* Skip any leading wildcard; it's already factored into initial sel */
1324  for (pos = 0; pos < pattlen; pos++)
1325  {
1326  if (patt[pos] != '%' && patt[pos] != '_')
1327  break;
1328  }
1329 
1330  for (; pos < pattlen; pos++)
1331  {
1332  /* % and _ are wildcard characters in LIKE */
1333  if (patt[pos] == '%')
1334  sel *= FULL_WILDCARD_SEL;
1335  else if (patt[pos] == '_')
1336  sel *= ANY_CHAR_SEL;
1337  else if (patt[pos] == '\\')
1338  {
1339  /* Backslash quotes the next character */
1340  pos++;
1341  if (pos >= pattlen)
1342  break;
1343  sel *= FIXED_CHAR_SEL;
1344  }
1345  else
1346  sel *= FIXED_CHAR_SEL;
1347  }
1348  /* Could get sel > 1 if multiple wildcards */
1349  if (sel > 1.0)
1350  sel = 1.0;
1351  return sel;
1352 }
#define FULL_WILDCARD_SEL
#define ANY_CHAR_SEL
#define FIXED_CHAR_SEL

References ANY_CHAR_SEL, FIXED_CHAR_SEL, and FULL_WILDCARD_SEL.

Referenced by like_fixed_prefix().

◆ likejoinsel()

Datum likejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 903 of file like_support.c.

904 {
906 }

References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ likesel()

Datum likesel ( PG_FUNCTION_ARGS  )

Definition at line 811 of file like_support.c.

812 {
814 }

References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.

◆ make_greater_string()

static Const * make_greater_string ( const Const str_const,
FmgrInfo ltproc,
Oid  collation 
)
static

Definition at line 1568 of file like_support.c.

1569 {
1570  Oid datatype = str_const->consttype;
1571  char *workstr;
1572  int len;
1573  Datum cmpstr;
1574  char *cmptxt = NULL;
1575  mbcharacter_incrementer charinc;
1576 
1577  /*
1578  * Get a modifiable copy of the prefix string in C-string format, and set
1579  * up the string we will compare to as a Datum. In C locale this can just
1580  * be the given prefix string, otherwise we need to add a suffix. Type
1581  * BYTEA sorts bytewise so it never needs a suffix either.
1582  */
1583  if (datatype == BYTEAOID)
1584  {
1585  bytea *bstr = DatumGetByteaPP(str_const->constvalue);
1586 
1587  len = VARSIZE_ANY_EXHDR(bstr);
1588  workstr = (char *) palloc(len);
1589  memcpy(workstr, VARDATA_ANY(bstr), len);
1590  Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
1591  cmpstr = str_const->constvalue;
1592  }
1593  else
1594  {
1595  if (datatype == NAMEOID)
1597  str_const->constvalue));
1598  else
1599  workstr = TextDatumGetCString(str_const->constvalue);
1600  len = strlen(workstr);
1601  if (len == 0 || pg_newlocale_from_collation(collation)->collate_is_c)
1602  cmpstr = str_const->constvalue;
1603  else
1604  {
1605  /* If first time through, determine the suffix to use */
1606  static char suffixchar = 0;
1607  static Oid suffixcollation = 0;
1608 
1609  if (!suffixchar || suffixcollation != collation)
1610  {
1611  char *best;
1612 
1613  best = "Z";
1614  if (varstr_cmp(best, 1, "z", 1, collation) < 0)
1615  best = "z";
1616  if (varstr_cmp(best, 1, "y", 1, collation) < 0)
1617  best = "y";
1618  if (varstr_cmp(best, 1, "9", 1, collation) < 0)
1619  best = "9";
1620  suffixchar = *best;
1621  suffixcollation = collation;
1622  }
1623 
1624  /* And build the string to compare to */
1625  if (datatype == NAMEOID)
1626  {
1627  cmptxt = palloc(len + 2);
1628  memcpy(cmptxt, workstr, len);
1629  cmptxt[len] = suffixchar;
1630  cmptxt[len + 1] = '\0';
1631  cmpstr = PointerGetDatum(cmptxt);
1632  }
1633  else
1634  {
1635  cmptxt = palloc(VARHDRSZ + len + 1);
1636  SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
1637  memcpy(VARDATA(cmptxt), workstr, len);
1638  *(VARDATA(cmptxt) + len) = suffixchar;
1639  cmpstr = PointerGetDatum(cmptxt);
1640  }
1641  }
1642  }
1643 
1644  /* Select appropriate character-incrementer function */
1645  if (datatype == BYTEAOID)
1646  charinc = byte_increment;
1647  else
1649 
1650  /* And search ... */
1651  while (len > 0)
1652  {
1653  int charlen;
1654  unsigned char *lastchar;
1655 
1656  /* Identify the last character --- for bytea, just the last byte */
1657  if (datatype == BYTEAOID)
1658  charlen = 1;
1659  else
1660  charlen = len - pg_mbcliplen(workstr, len, len - 1);
1661  lastchar = (unsigned char *) (workstr + len - charlen);
1662 
1663  /*
1664  * Try to generate a larger string by incrementing the last character
1665  * (for BYTEA, we treat each byte as a character).
1666  *
1667  * Note: the incrementer function is expected to return true if it's
1668  * generated a valid-per-the-encoding new character, otherwise false.
1669  * The contents of the character on false return are unspecified.
1670  */
1671  while (charinc(lastchar, charlen))
1672  {
1673  Const *workstr_const;
1674 
1675  if (datatype == BYTEAOID)
1676  workstr_const = string_to_bytea_const(workstr, len);
1677  else
1678  workstr_const = string_to_const(workstr, datatype);
1679 
1680  if (DatumGetBool(FunctionCall2Coll(ltproc,
1681  collation,
1682  cmpstr,
1683  workstr_const->constvalue)))
1684  {
1685  /* Successfully made a string larger than cmpstr */
1686  if (cmptxt)
1687  pfree(cmptxt);
1688  pfree(workstr);
1689  return workstr_const;
1690  }
1691 
1692  /* No good, release unusable value and try again */
1693  pfree(DatumGetPointer(workstr_const->constvalue));
1694  pfree(workstr_const);
1695  }
1696 
1697  /*
1698  * No luck here, so truncate off the last character and try to
1699  * increment the next one.
1700  */
1701  len -= charlen;
1702  workstr[len] = '\0';
1703  }
1704 
1705  /* Failed... */
1706  if (cmptxt)
1707  pfree(cmptxt);
1708  pfree(workstr);
1709 
1710  return NULL;
1711 }
#define VARHDRSZ
Definition: c.h:692
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
static bool byte_increment(unsigned char *ptr, int len)
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
Definition: mbutils.c:1523
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
Datum nameout(PG_FUNCTION_ARGS)
Definition: name.c:71
const void size_t len
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
Definition: pg_wchar.h:370
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
uintptr_t Datum
Definition: postgres.h:64
#define VARDATA(PTR)
Definition: varatt.h:278
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition: varlena.c:1538

References Assert, byte_increment(), Const::consttype, DatumGetBool(), DatumGetByteaPP, DatumGetCString(), DatumGetPointer(), DirectFunctionCall1, FunctionCall2Coll(), len, nameout(), palloc(), pfree(), pg_database_encoding_character_incrementer(), pg_mbcliplen(), pg_newlocale_from_collation(), PointerGetDatum(), SET_VARSIZE, string_to_bytea_const(), string_to_const(), TextDatumGetCString, VARDATA, VARDATA_ANY, VARHDRSZ, VARSIZE_ANY_EXHDR, and varstr_cmp().

Referenced by match_pattern_prefix(), and prefix_selectivity().

◆ match_pattern_prefix()

static List * match_pattern_prefix ( Node leftop,
Node rightop,
Pattern_Type  ptype,
Oid  expr_coll,
Oid  opfamily,
Oid  indexcollation 
)
static

Definition at line 241 of file like_support.c.

247 {
248  List *result;
249  Const *patt;
250  Const *prefix;
251  Pattern_Prefix_Status pstatus;
252  Oid ldatatype;
253  Oid rdatatype;
254  Oid eqopr;
255  Oid ltopr;
256  Oid geopr;
257  Oid preopr = InvalidOid;
258  bool collation_aware;
259  Expr *expr;
260  FmgrInfo ltproc;
261  Const *greaterstr;
262 
263  /*
264  * Can't do anything with a non-constant or NULL pattern argument.
265  *
266  * Note that since we restrict ourselves to cases with a hard constant on
267  * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
268  * about verifying that.
269  */
270  if (!IsA(rightop, Const) ||
271  ((Const *) rightop)->constisnull)
272  return NIL;
273  patt = (Const *) rightop;
274 
275  /*
276  * Not supported if the expression collation is nondeterministic. The
277  * optimized equality or prefix tests use bytewise comparisons, which is
278  * not consistent with nondeterministic collations. The actual
279  * pattern-matching implementation functions will later error out that
280  * pattern-matching is not supported with nondeterministic collations. (We
281  * could also error out here, but by doing it later we get more precise
282  * error messages.) (It should be possible to support at least
283  * Pattern_Prefix_Exact, but no point as long as the actual
284  * pattern-matching implementations don't support it.)
285  *
286  * expr_coll is not set for a non-collation-aware data type such as bytea.
287  */
288  if (expr_coll && !get_collation_isdeterministic(expr_coll))
289  return NIL;
290 
291  /*
292  * Try to extract a fixed prefix from the pattern.
293  */
294  pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
295  &prefix, NULL);
296 
297  /* fail if no fixed prefix */
298  if (pstatus == Pattern_Prefix_None)
299  return NIL;
300 
301  /*
302  * Identify the operators we want to use, based on the type of the
303  * left-hand argument. Usually these are just the type's regular
304  * comparison operators, but if we are considering one of the semi-legacy
305  * "pattern" opclasses, use the "pattern" operators instead. Those are
306  * not collation-sensitive but always use C collation, as we want. The
307  * selected operators also determine the needed type of the prefix
308  * constant.
309  */
310  ldatatype = exprType(leftop);
311  switch (ldatatype)
312  {
313  case TEXTOID:
314  if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
315  {
316  eqopr = TextEqualOperator;
317  ltopr = TextPatternLessOperator;
318  geopr = TextPatternGreaterEqualOperator;
319  collation_aware = false;
320  }
321  else if (opfamily == TEXT_SPGIST_FAM_OID)
322  {
323  eqopr = TextEqualOperator;
324  ltopr = TextPatternLessOperator;
325  geopr = TextPatternGreaterEqualOperator;
326  /* This opfamily has direct support for prefixing */
327  preopr = TextPrefixOperator;
328  collation_aware = false;
329  }
330  else
331  {
332  eqopr = TextEqualOperator;
333  ltopr = TextLessOperator;
334  geopr = TextGreaterEqualOperator;
335  collation_aware = true;
336  }
337  rdatatype = TEXTOID;
338  break;
339  case NAMEOID:
340 
341  /*
342  * Note that here, we need the RHS type to be text, so that the
343  * comparison value isn't improperly truncated to NAMEDATALEN.
344  */
345  eqopr = NameEqualTextOperator;
346  ltopr = NameLessTextOperator;
347  geopr = NameGreaterEqualTextOperator;
348  collation_aware = true;
349  rdatatype = TEXTOID;
350  break;
351  case BPCHAROID:
352  if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
353  {
354  eqopr = BpcharEqualOperator;
355  ltopr = BpcharPatternLessOperator;
356  geopr = BpcharPatternGreaterEqualOperator;
357  collation_aware = false;
358  }
359  else
360  {
361  eqopr = BpcharEqualOperator;
362  ltopr = BpcharLessOperator;
363  geopr = BpcharGreaterEqualOperator;
364  collation_aware = true;
365  }
366  rdatatype = BPCHAROID;
367  break;
368  case BYTEAOID:
369  eqopr = ByteaEqualOperator;
370  ltopr = ByteaLessOperator;
371  geopr = ByteaGreaterEqualOperator;
372  collation_aware = false;
373  rdatatype = BYTEAOID;
374  break;
375  default:
376  /* Can't get here unless we're attached to the wrong operator */
377  return NIL;
378  }
379 
380  /*
381  * If necessary, coerce the prefix constant to the right type. The given
382  * prefix constant is either text or bytea type, therefore the only case
383  * where we need to do anything is when converting text to bpchar. Those
384  * two types are binary-compatible, so relabeling the Const node is
385  * sufficient.
386  */
387  if (prefix->consttype != rdatatype)
388  {
389  Assert(prefix->consttype == TEXTOID &&
390  rdatatype == BPCHAROID);
391  prefix->consttype = rdatatype;
392  }
393 
394  /*
395  * If we found an exact-match pattern, generate an "=" indexqual.
396  *
397  * Here and below, check to see whether the desired operator is actually
398  * supported by the index opclass, and fail quietly if not. This allows
399  * us to not be concerned with specific opclasses (except for the legacy
400  * "pattern" cases); any index that correctly implements the operators
401  * will work.
402  */
403  if (pstatus == Pattern_Prefix_Exact)
404  {
405  if (!op_in_opfamily(eqopr, opfamily))
406  return NIL;
407  expr = make_opclause(eqopr, BOOLOID, false,
408  (Expr *) leftop, (Expr *) prefix,
409  InvalidOid, indexcollation);
410  result = list_make1(expr);
411  return result;
412  }
413 
414  /*
415  * Otherwise, we have a nonempty required prefix of the values. Some
416  * opclasses support prefix checks directly, otherwise we'll try to
417  * generate a range constraint.
418  */
419  if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
420  {
421  expr = make_opclause(preopr, BOOLOID, false,
422  (Expr *) leftop, (Expr *) prefix,
423  InvalidOid, indexcollation);
424  result = list_make1(expr);
425  return result;
426  }
427 
428  /*
429  * Since we need a range constraint, it's only going to work reliably if
430  * the index is collation-insensitive or has "C" collation. Note that
431  * here we are looking at the index's collation, not the expression's
432  * collation -- this test is *not* dependent on the LIKE/regex operator's
433  * collation.
434  */
435  if (collation_aware &&
436  !pg_newlocale_from_collation(indexcollation)->collate_is_c)
437  return NIL;
438 
439  /*
440  * We can always say "x >= prefix".
441  */
442  if (!op_in_opfamily(geopr, opfamily))
443  return NIL;
444  expr = make_opclause(geopr, BOOLOID, false,
445  (Expr *) leftop, (Expr *) prefix,
446  InvalidOid, indexcollation);
447  result = list_make1(expr);
448 
449  /*-------
450  * If we can create a string larger than the prefix, we can say
451  * "x < greaterstr". NB: we rely on make_greater_string() to generate
452  * a guaranteed-greater string, not just a probably-greater string.
453  * In general this is only guaranteed in C locale, so we'd better be
454  * using a C-locale index collation.
455  *-------
456  */
457  if (!op_in_opfamily(ltopr, opfamily))
458  return result;
459  fmgr_info(get_opcode(ltopr), &ltproc);
460  greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
461  if (greaterstr)
462  {
463  expr = make_opclause(ltopr, BOOLOID, false,
464  (Expr *) leftop, (Expr *) greaterstr,
465  InvalidOid, indexcollation);
466  result = lappend(result, expr);
467  }
468 
469  return result;
470 }
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
static Const * make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1285
bool get_collation_isdeterministic(Oid colloid)
Definition: lsyscache.c:1054
bool op_in_opfamily(Oid opno, Oid opfamily)
Definition: lsyscache.c:66
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Definition: makefuncs.c:628
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
Definition: fmgr.h:57
Definition: pg_list.h:54

References Assert, Const::consttype, exprType(), fmgr_info(), get_collation_isdeterministic(), get_opcode(), InvalidOid, IsA, lappend(), list_make1, make_greater_string(), make_opclause(), NIL, OidIsValid, op_in_opfamily(), pattern_fixed_prefix(), Pattern_Prefix_Exact, Pattern_Prefix_None, and pg_newlocale_from_collation().

Referenced by like_regex_support().

◆ nlikejoinsel()

Datum nlikejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 948 of file like_support.c.

949 {
951 }

References Pattern_Type_Like, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ nlikesel()

Datum nlikesel ( PG_FUNCTION_ARGS  )

Definition at line 857 of file like_support.c.

858 {
860 }

References Pattern_Type_Like, patternsel(), and PG_RETURN_FLOAT8.

◆ pattern_char_isalpha()

static int pattern_char_isalpha ( char  c,
bool  is_multibyte,
pg_locale_t  locale 
)
static

Definition at line 1496 of file like_support.c.

1498 {
1499  if (locale->ctype_is_c)
1500  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1501  else if (is_multibyte && IS_HIGHBIT_SET(c))
1502  return true;
1503  else if (locale->provider == COLLPROVIDER_ICU)
1504  return IS_HIGHBIT_SET(c) ||
1505  (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1506  else if (locale->provider == COLLPROVIDER_LIBC)
1507  return isalpha_l((unsigned char) c, locale->info.lt);
1508  else
1509  return isalpha((unsigned char) c);
1510 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1155
char * c
#define isalpha_l
Definition: win32_port.h:449

References IS_HIGHBIT_SET, isalpha_l, and locale.

Referenced by like_fixed_prefix().

◆ pattern_fixed_prefix()

static Pattern_Prefix_Status pattern_fixed_prefix ( Const patt,
Pattern_Type  ptype,
Oid  collation,
Const **  prefix,
Selectivity rest_selec 
)
static

Definition at line 1162 of file like_support.c.

1164 {
1165  Pattern_Prefix_Status result;
1166 
1167  switch (ptype)
1168  {
1169  case Pattern_Type_Like:
1170  result = like_fixed_prefix(patt, false, collation,
1171  prefix, rest_selec);
1172  break;
1173  case Pattern_Type_Like_IC:
1174  result = like_fixed_prefix(patt, true, collation,
1175  prefix, rest_selec);
1176  break;
1177  case Pattern_Type_Regex:
1178  result = regex_fixed_prefix(patt, false, collation,
1179  prefix, rest_selec);
1180  break;
1181  case Pattern_Type_Regex_IC:
1182  result = regex_fixed_prefix(patt, true, collation,
1183  prefix, rest_selec);
1184  break;
1185  case Pattern_Type_Prefix:
1186  /* Prefix type work is trivial. */
1187  result = Pattern_Prefix_Partial;
1188  *prefix = makeConst(patt->consttype,
1189  patt->consttypmod,
1190  patt->constcollid,
1191  patt->constlen,
1192  datumCopy(patt->constvalue,
1193  patt->constbyval,
1194  patt->constlen),
1195  patt->constisnull,
1196  patt->constbyval);
1197  if (rest_selec != NULL)
1198  *rest_selec = 1.0; /* all */
1199  break;
1200  default:
1201  elog(ERROR, "unrecognized ptype: %d", (int) ptype);
1202  result = Pattern_Prefix_None; /* keep compiler quiet */
1203  break;
1204  }
1205  return result;
1206 }
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
#define elog(elevel,...)
Definition: elog.h:225
static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Definition: like_support.c:992
static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:301

References Const::consttype, datumCopy(), elog, ERROR, like_fixed_prefix(), makeConst(), Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Type_Like, Pattern_Type_Like_IC, Pattern_Type_Prefix, Pattern_Type_Regex, Pattern_Type_Regex_IC, and regex_fixed_prefix().

Referenced by match_pattern_prefix(), and patternsel_common().

◆ patternjoinsel()

static double patternjoinsel ( PG_FUNCTION_ARGS  ,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 875 of file like_support.c.

876 {
877  /* For the moment we just punt. */
878  return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
879 }

References DEFAULT_MATCH_SEL.

Referenced by iclikejoinsel(), icnlikejoinsel(), icregexeqjoinsel(), icregexnejoinsel(), likejoinsel(), nlikejoinsel(), prefixjoinsel(), regexeqjoinsel(), and regexnejoinsel().

◆ patternsel()

static double patternsel ( PG_FUNCTION_ARGS  ,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 760 of file like_support.c.

761 {
763  Oid operator = PG_GETARG_OID(1);
764  List *args = (List *) PG_GETARG_POINTER(2);
765  int varRelid = PG_GETARG_INT32(3);
766  Oid collation = PG_GET_COLLATION();
767 
768  /*
769  * If this is for a NOT LIKE or similar operator, get the corresponding
770  * positive-match operator and work with that.
771  */
772  if (negate)
773  {
774  operator = get_negator(operator);
775  if (!OidIsValid(operator))
776  elog(ERROR, "patternsel called for operator without a negator");
777  }
778 
779  return patternsel_common(root,
780  operator,
781  InvalidOid,
782  args,
783  varRelid,
784  collation,
785  ptype,
786  negate);
787 }
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GET_COLLATION()
Definition: fmgr.h:198
Oid get_negator(Oid opno)
Definition: lsyscache.c:1533
tree ctl root
Definition: radixtree.h:1886

References generate_unaccent_rules::args, elog, ERROR, get_negator(), InvalidOid, OidIsValid, patternsel_common(), PG_GET_COLLATION, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, and root.

Referenced by iclikesel(), icnlikesel(), icregexeqsel(), icregexnesel(), likesel(), nlikesel(), prefixsel(), regexeqsel(), and regexnesel().

◆ patternsel_common()

static double patternsel_common ( PlannerInfo root,
Oid  oprid,
Oid  opfuncid,
List args,
int  varRelid,
Oid  collation,
Pattern_Type  ptype,
bool  negate 
)
static

Definition at line 486 of file like_support.c.

494 {
495  VariableStatData vardata;
496  Node *other;
497  bool varonleft;
498  Datum constval;
499  Oid consttype;
500  Oid vartype;
501  Oid rdatatype;
502  Oid eqopr;
503  Oid ltopr;
504  Oid geopr;
505  Pattern_Prefix_Status pstatus;
506  Const *patt;
507  Const *prefix = NULL;
508  Selectivity rest_selec = 0;
509  double nullfrac = 0.0;
510  double result;
511 
512  /*
513  * Initialize result to the appropriate default estimate depending on
514  * whether it's a match or not-match operator.
515  */
516  if (negate)
517  result = 1.0 - DEFAULT_MATCH_SEL;
518  else
519  result = DEFAULT_MATCH_SEL;
520 
521  /*
522  * If expression is not variable op constant, then punt and return the
523  * default estimate.
524  */
525  if (!get_restriction_variable(root, args, varRelid,
526  &vardata, &other, &varonleft))
527  return result;
528  if (!varonleft || !IsA(other, Const))
529  {
530  ReleaseVariableStats(vardata);
531  return result;
532  }
533 
534  /*
535  * If the constant is NULL, assume operator is strict and return zero, ie,
536  * operator will never return TRUE. (It's zero even for a negator op.)
537  */
538  if (((Const *) other)->constisnull)
539  {
540  ReleaseVariableStats(vardata);
541  return 0.0;
542  }
543  constval = ((Const *) other)->constvalue;
544  consttype = ((Const *) other)->consttype;
545 
546  /*
547  * The right-hand const is type text or bytea for all supported operators.
548  * We do not expect to see binary-compatible types here, since
549  * const-folding should have relabeled the const to exactly match the
550  * operator's declared type.
551  */
552  if (consttype != TEXTOID && consttype != BYTEAOID)
553  {
554  ReleaseVariableStats(vardata);
555  return result;
556  }
557 
558  /*
559  * Similarly, the exposed type of the left-hand side should be one of
560  * those we know. (Do not look at vardata.atttype, which might be
561  * something binary-compatible but different.) We can use it to identify
562  * the comparison operators and the required type of the comparison
563  * constant, much as in match_pattern_prefix().
564  */
565  vartype = vardata.vartype;
566 
567  switch (vartype)
568  {
569  case TEXTOID:
570  eqopr = TextEqualOperator;
571  ltopr = TextLessOperator;
572  geopr = TextGreaterEqualOperator;
573  rdatatype = TEXTOID;
574  break;
575  case NAMEOID:
576 
577  /*
578  * Note that here, we need the RHS type to be text, so that the
579  * comparison value isn't improperly truncated to NAMEDATALEN.
580  */
581  eqopr = NameEqualTextOperator;
582  ltopr = NameLessTextOperator;
583  geopr = NameGreaterEqualTextOperator;
584  rdatatype = TEXTOID;
585  break;
586  case BPCHAROID:
587  eqopr = BpcharEqualOperator;
588  ltopr = BpcharLessOperator;
589  geopr = BpcharGreaterEqualOperator;
590  rdatatype = BPCHAROID;
591  break;
592  case BYTEAOID:
593  eqopr = ByteaEqualOperator;
594  ltopr = ByteaLessOperator;
595  geopr = ByteaGreaterEqualOperator;
596  rdatatype = BYTEAOID;
597  break;
598  default:
599  /* Can't get here unless we're attached to the wrong operator */
600  ReleaseVariableStats(vardata);
601  return result;
602  }
603 
604  /*
605  * Grab the nullfrac for use below.
606  */
607  if (HeapTupleIsValid(vardata.statsTuple))
608  {
609  Form_pg_statistic stats;
610 
611  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
612  nullfrac = stats->stanullfrac;
613  }
614 
615  /*
616  * Pull out any fixed prefix implied by the pattern, and estimate the
617  * fractional selectivity of the remainder of the pattern. Unlike many
618  * other selectivity estimators, we use the pattern operator's actual
619  * collation for this step. This is not because we expect the collation
620  * to make a big difference in the selectivity estimate (it seldom would),
621  * but because we want to be sure we cache compiled regexps under the
622  * right cache key, so that they can be re-used at runtime.
623  */
624  patt = (Const *) other;
625  pstatus = pattern_fixed_prefix(patt, ptype, collation,
626  &prefix, &rest_selec);
627 
628  /*
629  * If necessary, coerce the prefix constant to the right type. The only
630  * case where we need to do anything is when converting text to bpchar.
631  * Those two types are binary-compatible, so relabeling the Const node is
632  * sufficient.
633  */
634  if (prefix && prefix->consttype != rdatatype)
635  {
636  Assert(prefix->consttype == TEXTOID &&
637  rdatatype == BPCHAROID);
638  prefix->consttype = rdatatype;
639  }
640 
641  if (pstatus == Pattern_Prefix_Exact)
642  {
643  /*
644  * Pattern specifies an exact match, so estimate as for '='
645  */
646  result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
647  false, true, false);
648  }
649  else
650  {
651  /*
652  * Not exact-match pattern. If we have a sufficiently large
653  * histogram, estimate selectivity for the histogram part of the
654  * population by counting matches in the histogram. If not, estimate
655  * selectivity of the fixed prefix and remainder of pattern
656  * separately, then combine the two to get an estimate of the
657  * selectivity for the part of the column population represented by
658  * the histogram. (For small histograms, we combine these
659  * approaches.)
660  *
661  * We then add up data for any most-common-values values; these are
662  * not in the histogram population, and we can get exact answers for
663  * them by applying the pattern operator, so there's no reason to
664  * approximate. (If the MCVs cover a significant part of the total
665  * population, this gives us a big leg up in accuracy.)
666  */
667  Selectivity selec;
668  int hist_size;
669  FmgrInfo opproc;
670  double mcv_selec,
671  sumcommon;
672 
673  /* Try to use the histogram entries to get selectivity */
674  if (!OidIsValid(opfuncid))
675  opfuncid = get_opcode(oprid);
676  fmgr_info(opfuncid, &opproc);
677 
678  selec = histogram_selectivity(&vardata, &opproc, collation,
679  constval, true,
680  10, 1, &hist_size);
681 
682  /* If not at least 100 entries, use the heuristic method */
683  if (hist_size < 100)
684  {
685  Selectivity heursel;
687 
688  if (pstatus == Pattern_Prefix_Partial)
689  prefixsel = prefix_selectivity(root, &vardata,
690  eqopr, ltopr, geopr,
691  collation,
692  prefix);
693  else
694  prefixsel = 1.0;
695  heursel = prefixsel * rest_selec;
696 
697  if (selec < 0) /* fewer than 10 histogram entries? */
698  selec = heursel;
699  else
700  {
701  /*
702  * For histogram sizes from 10 to 100, we combine the
703  * histogram and heuristic selectivities, putting increasingly
704  * more trust in the histogram for larger sizes.
705  */
706  double hist_weight = hist_size / 100.0;
707 
708  selec = selec * hist_weight + heursel * (1.0 - hist_weight);
709  }
710  }
711 
712  /* In any case, don't believe extremely small or large estimates. */
713  if (selec < 0.0001)
714  selec = 0.0001;
715  else if (selec > 0.9999)
716  selec = 0.9999;
717 
718  /*
719  * If we have most-common-values info, add up the fractions of the MCV
720  * entries that satisfy MCV OP PATTERN. These fractions contribute
721  * directly to the result selectivity. Also add up the total fraction
722  * represented by MCV entries.
723  */
724  mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
725  constval, true,
726  &sumcommon);
727 
728  /*
729  * Now merge the results from the MCV and histogram calculations,
730  * realizing that the histogram covers only the non-null values that
731  * are not listed in MCV.
732  */
733  selec *= 1.0 - nullfrac - sumcommon;
734  selec += mcv_selec;
735  result = selec;
736  }
737 
738  /* now adjust if we wanted not-match rather than match */
739  if (negate)
740  result = 1.0 - result - nullfrac;
741 
742  /* result should be in range, but make sure... */
743  CLAMP_PROBABILITY(result);
744 
745  if (prefix)
746  {
747  pfree(DatumGetPointer(prefix->constvalue));
748  pfree(prefix);
749  }
750 
751  ReleaseVariableStats(vardata);
752 
753  return result;
754 }
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
Datum prefixsel(PG_FUNCTION_ARGS)
Definition: like_support.c:820
Oid oprid(Operator op)
Definition: parse_oper.c:238
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4891
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:295
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:732
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:823
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
HeapTuple statsTuple
Definition: selfuncs.h:89

References generate_unaccent_rules::args, Assert, CLAMP_PROBABILITY, Const::consttype, DatumGetPointer(), DEFAULT_MATCH_SEL, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT, HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), OidIsValid, oprid(), pattern_fixed_prefix(), Pattern_Prefix_Exact, Pattern_Prefix_Partial, pfree(), prefix_selectivity(), prefixsel(), ReleaseVariableStats, root, VariableStatData::statsTuple, var_eq_const(), and VariableStatData::vartype.

Referenced by like_regex_support(), and patternsel().

◆ prefix_selectivity()

static Selectivity prefix_selectivity ( PlannerInfo root,
VariableStatData vardata,
Oid  eqopr,
Oid  ltopr,
Oid  geopr,
Oid  collation,
Const prefixcon 
)
static

Definition at line 1227 of file like_support.c.

1231 {
1233  FmgrInfo opproc;
1234  Const *greaterstrcon;
1235  Selectivity eq_sel;
1236 
1237  /* Estimate the selectivity of "x >= prefix" */
1238  fmgr_info(get_opcode(geopr), &opproc);
1239 
1241  geopr, &opproc, true, true,
1242  collation,
1243  prefixcon->constvalue,
1244  prefixcon->consttype);
1245 
1246  if (prefixsel < 0.0)
1247  {
1248  /* No histogram is present ... return a suitable default estimate */
1249  return DEFAULT_MATCH_SEL;
1250  }
1251 
1252  /*
1253  * If we can create a string larger than the prefix, say "x < greaterstr".
1254  */
1255  fmgr_info(get_opcode(ltopr), &opproc);
1256  greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
1257  if (greaterstrcon)
1258  {
1259  Selectivity topsel;
1260 
1261  topsel = ineq_histogram_selectivity(root, vardata,
1262  ltopr, &opproc, false, false,
1263  collation,
1264  greaterstrcon->constvalue,
1265  greaterstrcon->consttype);
1266 
1267  /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1268  Assert(topsel >= 0.0);
1269 
1270  /*
1271  * Merge the two selectivities in the same way as for a range query
1272  * (see clauselist_selectivity()). Note that we don't need to worry
1273  * about double-exclusion of nulls, since ineq_histogram_selectivity
1274  * doesn't count those anyway.
1275  */
1276  prefixsel = topsel + prefixsel - 1.0;
1277  }
1278 
1279  /*
1280  * If the prefix is long then the two bounding values might be too close
1281  * together for the histogram to distinguish them usefully, resulting in a
1282  * zero estimate (plus or minus roundoff error). To avoid returning a
1283  * ridiculously small estimate, compute the estimated selectivity for
1284  * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1285  * estimate should be at least that.)
1286  *
1287  * We apply this even if we couldn't make a greater string. That case
1288  * suggests that the prefix is near the maximum possible, and thus
1289  * probably off the end of the histogram, and thus we probably got a very
1290  * small estimate from the >= condition; so we still need to clamp.
1291  */
1292  eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1293  false, true, false);
1294 
1295  prefixsel = Max(prefixsel, eq_sel);
1296 
1297  return prefixsel;
1298 }
#define Max(x, y)
Definition: c.h:998
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
Definition: selfuncs.c:1041

References Assert, Const::consttype, DEFAULT_MATCH_SEL, fmgr_info(), get_opcode(), ineq_histogram_selectivity(), make_greater_string(), Max, prefixsel(), root, and var_eq_const().

Referenced by patternsel_common().

◆ prefixjoinsel()

Datum prefixjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 912 of file like_support.c.

913 {
915 }

References Pattern_Type_Prefix, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ prefixsel()

Datum prefixsel ( PG_FUNCTION_ARGS  )

Definition at line 820 of file like_support.c.

821 {
823 }

References Pattern_Type_Prefix, patternsel(), and PG_RETURN_FLOAT8.

Referenced by patternsel_common(), prefix_selectivity(), and regex_selectivity().

◆ regex_fixed_prefix()

static Pattern_Prefix_Status regex_fixed_prefix ( Const patt_const,
bool  case_insensitive,
Oid  collation,
Const **  prefix_const,
Selectivity rest_selec 
)
static

Definition at line 1094 of file like_support.c.

1096 {
1097  Oid typeid = patt_const->consttype;
1098  char *prefix;
1099  bool exact;
1100 
1101  /*
1102  * Should be unnecessary, there are no bytea regex operators defined. As
1103  * such, it should be noted that the rest of this function has *not* been
1104  * made safe for binary (possibly NULL containing) strings.
1105  */
1106  if (typeid == BYTEAOID)
1107  ereport(ERROR,
1108  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1109  errmsg("regular-expression matching not supported on type bytea")));
1110 
1111  /* Use the regexp machinery to extract the prefix, if any */
1112  prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
1113  case_insensitive, collation,
1114  &exact);
1115 
1116  if (prefix == NULL)
1117  {
1118  *prefix_const = NULL;
1119 
1120  if (rest_selec != NULL)
1121  {
1122  char *patt = TextDatumGetCString(patt_const->constvalue);
1123 
1124  *rest_selec = regex_selectivity(patt, strlen(patt),
1125  case_insensitive,
1126  0);
1127  pfree(patt);
1128  }
1129 
1130  return Pattern_Prefix_None;
1131  }
1132 
1133  *prefix_const = string_to_const(prefix, typeid);
1134 
1135  if (rest_selec != NULL)
1136  {
1137  if (exact)
1138  {
1139  /* Exact match, so there's no additional selectivity */
1140  *rest_selec = 1.0;
1141  }
1142  else
1143  {
1144  char *patt = TextDatumGetCString(patt_const->constvalue);
1145 
1146  *rest_selec = regex_selectivity(patt, strlen(patt),
1147  case_insensitive,
1148  strlen(prefix));
1149  pfree(patt);
1150  }
1151  }
1152 
1153  pfree(prefix);
1154 
1155  if (exact)
1156  return Pattern_Prefix_Exact; /* pattern specifies exact match */
1157  else
1158  return Pattern_Prefix_Partial;
1159 }
#define DatumGetTextPP(X)
Definition: fmgr.h:292
static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact)
Definition: regexp.c:1953

References Const::consttype, DatumGetTextPP, ereport, errcode(), errmsg(), ERROR, Pattern_Prefix_Exact, Pattern_Prefix_None, Pattern_Prefix_Partial, pfree(), regex_selectivity(), regexp_fixed_prefix(), string_to_const(), and TextDatumGetCString.

Referenced by pattern_fixed_prefix().

◆ regex_selectivity()

static Selectivity regex_selectivity ( const char *  patt,
int  pattlen,
bool  case_insensitive,
int  fixed_prefix_len 
)
static

Definition at line 1450 of file like_support.c.

1452 {
1453  Selectivity sel;
1454 
1455  /* If patt doesn't end with $, consider it to have a trailing wildcard */
1456  if (pattlen > 0 && patt[pattlen - 1] == '$' &&
1457  (pattlen == 1 || patt[pattlen - 2] != '\\'))
1458  {
1459  /* has trailing $ */
1460  sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
1461  }
1462  else
1463  {
1464  /* no trailing $ */
1465  sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
1466  sel *= FULL_WILDCARD_SEL;
1467  }
1468 
1469  /*
1470  * If there's a fixed prefix, discount its selectivity. We have to be
1471  * careful here since a very long prefix could result in pow's result
1472  * underflowing to zero (in which case "sel" probably has as well).
1473  */
1474  if (fixed_prefix_len > 0)
1475  {
1476  double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
1477 
1478  if (prefixsel > 0.0)
1479  sel /= prefixsel;
1480  }
1481 
1482  /* Make sure result stays in range */
1483  CLAMP_PROBABILITY(sel);
1484  return sel;
1485 }
static Selectivity regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)

References CLAMP_PROBABILITY, FIXED_CHAR_SEL, FULL_WILDCARD_SEL, prefixsel(), and regex_selectivity_sub().

Referenced by regex_fixed_prefix().

◆ regex_selectivity_sub()

static Selectivity regex_selectivity_sub ( const char *  patt,
int  pattlen,
bool  case_insensitive 
)
static

Definition at line 1355 of file like_support.c.

1356 {
1357  Selectivity sel = 1.0;
1358  int paren_depth = 0;
1359  int paren_pos = 0; /* dummy init to keep compiler quiet */
1360  int pos;
1361 
1362  /* since this function recurses, it could be driven to stack overflow */
1364 
1365  for (pos = 0; pos < pattlen; pos++)
1366  {
1367  if (patt[pos] == '(')
1368  {
1369  if (paren_depth == 0)
1370  paren_pos = pos; /* remember start of parenthesized item */
1371  paren_depth++;
1372  }
1373  else if (patt[pos] == ')' && paren_depth > 0)
1374  {
1375  paren_depth--;
1376  if (paren_depth == 0)
1377  sel *= regex_selectivity_sub(patt + (paren_pos + 1),
1378  pos - (paren_pos + 1),
1379  case_insensitive);
1380  }
1381  else if (patt[pos] == '|' && paren_depth == 0)
1382  {
1383  /*
1384  * If unquoted | is present at paren level 0 in pattern, we have
1385  * multiple alternatives; sum their probabilities.
1386  */
1387  sel += regex_selectivity_sub(patt + (pos + 1),
1388  pattlen - (pos + 1),
1389  case_insensitive);
1390  break; /* rest of pattern is now processed */
1391  }
1392  else if (patt[pos] == '[')
1393  {
1394  bool negclass = false;
1395 
1396  if (patt[++pos] == '^')
1397  {
1398  negclass = true;
1399  pos++;
1400  }
1401  if (patt[pos] == ']') /* ']' at start of class is not special */
1402  pos++;
1403  while (pos < pattlen && patt[pos] != ']')
1404  pos++;
1405  if (paren_depth == 0)
1406  sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
1407  }
1408  else if (patt[pos] == '.')
1409  {
1410  if (paren_depth == 0)
1411  sel *= ANY_CHAR_SEL;
1412  }
1413  else if (patt[pos] == '*' ||
1414  patt[pos] == '?' ||
1415  patt[pos] == '+')
1416  {
1417  /* Ought to be smarter about quantifiers... */
1418  if (paren_depth == 0)
1419  sel *= PARTIAL_WILDCARD_SEL;
1420  }
1421  else if (patt[pos] == '{')
1422  {
1423  while (pos < pattlen && patt[pos] != '}')
1424  pos++;
1425  if (paren_depth == 0)
1426  sel *= PARTIAL_WILDCARD_SEL;
1427  }
1428  else if (patt[pos] == '\\')
1429  {
1430  /* backslash quotes the next character */
1431  pos++;
1432  if (pos >= pattlen)
1433  break;
1434  if (paren_depth == 0)
1435  sel *= FIXED_CHAR_SEL;
1436  }
1437  else
1438  {
1439  if (paren_depth == 0)
1440  sel *= FIXED_CHAR_SEL;
1441  }
1442  }
1443  /* Could get sel > 1 if multiple wildcards */
1444  if (sel > 1.0)
1445  sel = 1.0;
1446  return sel;
1447 }
#define PARTIAL_WILDCARD_SEL
#define CHAR_RANGE_SEL
void check_stack_depth(void)
Definition: postgres.c:3564

References ANY_CHAR_SEL, CHAR_RANGE_SEL, check_stack_depth(), FIXED_CHAR_SEL, and PARTIAL_WILDCARD_SEL.

Referenced by regex_selectivity().

◆ regexeqjoinsel()

Datum regexeqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 885 of file like_support.c.

886 {
888 }

References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ regexeqsel()

Datum regexeqsel ( PG_FUNCTION_ARGS  )

Definition at line 793 of file like_support.c.

794 {
796 }

References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.

◆ regexnejoinsel()

Datum regexnejoinsel ( PG_FUNCTION_ARGS  )

Definition at line 930 of file like_support.c.

931 {
933 }

References Pattern_Type_Regex, patternjoinsel(), and PG_RETURN_FLOAT8.

◆ regexnesel()

Datum regexnesel ( PG_FUNCTION_ARGS  )

Definition at line 839 of file like_support.c.

840 {
842 }

References Pattern_Type_Regex, patternsel(), and PG_RETURN_FLOAT8.

◆ string_to_bytea_const()

static Const * string_to_bytea_const ( const char *  str,
size_t  str_len 
)
static

Definition at line 1782 of file like_support.c.

1783 {
1784  bytea *bstr = palloc(VARHDRSZ + str_len);
1785  Datum conval;
1786 
1787  memcpy(VARDATA(bstr), str, str_len);
1788  SET_VARSIZE(bstr, VARHDRSZ + str_len);
1789  conval = PointerGetDatum(bstr);
1790 
1791  return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
1792 }
const char * str

References InvalidOid, makeConst(), palloc(), PointerGetDatum(), SET_VARSIZE, str, VARDATA, and VARHDRSZ.

Referenced by like_fixed_prefix(), and make_greater_string().

◆ string_to_const()

static Const * string_to_const ( const char *  str,
Oid  datatype 
)
static

Definition at line 1739 of file like_support.c.

1740 {
1741  Datum conval = string_to_datum(str, datatype);
1742  Oid collation;
1743  int constlen;
1744 
1745  /*
1746  * We only need to support a few datatypes here, so hard-wire properties
1747  * instead of incurring the expense of catalog lookups.
1748  */
1749  switch (datatype)
1750  {
1751  case TEXTOID:
1752  case VARCHAROID:
1753  case BPCHAROID:
1754  collation = DEFAULT_COLLATION_OID;
1755  constlen = -1;
1756  break;
1757 
1758  case NAMEOID:
1759  collation = C_COLLATION_OID;
1760  constlen = NAMEDATALEN;
1761  break;
1762 
1763  case BYTEAOID:
1764  collation = InvalidOid;
1765  constlen = -1;
1766  break;
1767 
1768  default:
1769  elog(ERROR, "unexpected datatype in string_to_const: %u",
1770  datatype);
1771  return NULL;
1772  }
1773 
1774  return makeConst(datatype, -1, collation, constlen,
1775  conval, false, false);
1776 }
static Datum string_to_datum(const char *str, Oid datatype)
#define NAMEDATALEN

References elog, ERROR, InvalidOid, makeConst(), NAMEDATALEN, str, and string_to_datum().

Referenced by like_fixed_prefix(), make_greater_string(), and regex_fixed_prefix().

◆ string_to_datum()

static Datum string_to_datum ( const char *  str,
Oid  datatype 
)
static

Definition at line 1719 of file like_support.c.

1720 {
1721  Assert(str != NULL);
1722 
1723  /*
1724  * We cheat a little by assuming that CStringGetTextDatum() will do for
1725  * bpchar and varchar constants too...
1726  */
1727  if (datatype == NAMEOID)
1729  else if (datatype == BYTEAOID)
1731  else
1732  return CStringGetTextDatum(str);
1733 }
#define CStringGetTextDatum(s)
Definition: builtins.h:97
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:350
Datum byteain(PG_FUNCTION_ARGS)
Definition: varlena.c:290

References Assert, byteain(), CStringGetDatum(), CStringGetTextDatum, DirectFunctionCall1, namein(), and str.

Referenced by string_to_const().

◆ text_starts_with_support()

Datum text_starts_with_support ( PG_FUNCTION_ARGS  )

Definition at line 147 of file like_support.c.

148 {
149  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
150 
152 }
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
static Node * like_regex_support(Node *rawreq, Pattern_Type ptype)
Definition: like_support.c:156

References like_regex_support(), Pattern_Type_Prefix, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ texticlike_support()

Datum texticlike_support ( PG_FUNCTION_ARGS  )

Definition at line 123 of file like_support.c.

124 {
125  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
126 
128 }

References like_regex_support(), Pattern_Type_Like_IC, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ texticregexeq_support()

Datum texticregexeq_support ( PG_FUNCTION_ARGS  )

Definition at line 139 of file like_support.c.

140 {
141  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
142 
144 }

References like_regex_support(), Pattern_Type_Regex_IC, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ textlike_support()

Datum textlike_support ( PG_FUNCTION_ARGS  )

Definition at line 115 of file like_support.c.

116 {
117  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
118 
120 }

References like_regex_support(), Pattern_Type_Like, PG_GETARG_POINTER, and PG_RETURN_POINTER.

◆ textregexeq_support()

Datum textregexeq_support ( PG_FUNCTION_ARGS  )

Definition at line 131 of file like_support.c.

132 {
133  Node *rawreq = (Node *) PG_GETARG_POINTER(0);
134 
136 }

References like_regex_support(), Pattern_Type_Regex, PG_GETARG_POINTER, and PG_RETURN_POINTER.