PostgreSQL Source Code  git master
regc_locale.c File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  cname
 

Functions

static chr element (struct vars *v, const chr *startp, const chr *endp)
 
static struct cvecrange (struct vars *v, chr a, chr b, int cases)
 
static int before (chr x, chr y)
 
static struct cvececlass (struct vars *v, chr c, int cases)
 
static enum char_classes lookupcclass (struct vars *v, const chr *startp, const chr *endp)
 
static struct cveccclasscvec (struct vars *v, enum char_classes cclasscode, int cases)
 
static int cclass_column_index (struct colormap *cm, chr c)
 
static struct cvecallcases (struct vars *v, chr c)
 
static int cmp (const chr *x, const chr *y, size_t len)
 
static int casecmp (const chr *x, const chr *y, size_t len)
 

Variables

static const struct cname cnames []
 
static const char *const classNames [NUM_CCLASSES+1]
 

Function Documentation

◆ allcases()

static struct cvec* allcases ( struct vars v,
chr  c 
)
static

Definition at line 721 of file regc_locale.c.

723 {
724  struct cvec *cv;
725  chr lc,
726  uc;
727 
728  lc = pg_wc_tolower(c);
729  uc = pg_wc_toupper(c);
730 
731  cv = getcvec(v, 2, 0);
732  addchr(cv, lc);
733  if (lc != uc)
734  addchr(cv, uc);
735  return cv;
736 }
char * c
static void addchr(struct cvec *cv, chr c)
Definition: regc_cvec.c:79
static struct cvec * getcvec(struct vars *v, int nchrs, int nranges)
Definition: regc_cvec.c:112
static pg_wchar pg_wc_tolower(pg_wchar c)
static pg_wchar pg_wc_toupper(pg_wchar c)
pg_wchar chr
Definition: regcustom.h:58
Definition: regguts.h:274

References addchr(), getcvec(), pg_wc_tolower(), and pg_wc_toupper().

Referenced by eclass().

◆ before()

static int before ( chr  x,
chr  y 
)
static

Definition at line 492 of file regc_locale.c.

493 {
494  if (x < y)
495  return 1;
496  return 0;
497 }
int y
Definition: isn.c:72
int x
Definition: isn.c:71

References x, and y.

Referenced by avlRotate(), dclist_insert_before(), DescribeQuery(), dlist_insert_before(), do_edit(), ExecQueryAndProcessResults(), ExecQueryUsingCursor(), and range().

◆ casecmp()

static int casecmp ( const chr x,
const chr y,
size_t  len 
)
static

Definition at line 762 of file regc_locale.c.

764 {
765  for (; len > 0; len--, x++, y++)
766  {
767  if ((*x != *y) && (pg_wc_tolower(*x) != pg_wc_tolower(*y)))
768  return 1;
769  }
770  return 0;
771 }
const void size_t len

References len, pg_wc_tolower(), x, and y.

◆ cclass_column_index()

static int cclass_column_index ( struct colormap cm,
chr  c 
)
static

Definition at line 675 of file regc_locale.c.

676 {
677  int colnum = 0;
678 
679  /* Shouldn't go through all these pushups for simple chrs */
681 
682  /*
683  * Note: we should not see requests to consider cclasses that are not
684  * treated as locale-specific by cclasscvec(), above.
685  */
686  if (cm->classbits[CC_PRINT] && pg_wc_isprint(c))
687  colnum |= cm->classbits[CC_PRINT];
688  if (cm->classbits[CC_ALNUM] && pg_wc_isalnum(c))
689  colnum |= cm->classbits[CC_ALNUM];
690  if (cm->classbits[CC_ALPHA] && pg_wc_isalpha(c))
691  colnum |= cm->classbits[CC_ALPHA];
692  if (cm->classbits[CC_WORD] && pg_wc_isword(c))
693  colnum |= cm->classbits[CC_WORD];
694  assert(cm->classbits[CC_ASCII] == 0);
695  assert(cm->classbits[CC_BLANK] == 0);
696  assert(cm->classbits[CC_CNTRL] == 0);
697  if (cm->classbits[CC_DIGIT] && pg_wc_isdigit(c))
698  colnum |= cm->classbits[CC_DIGIT];
699  if (cm->classbits[CC_PUNCT] && pg_wc_ispunct(c))
700  colnum |= cm->classbits[CC_PUNCT];
701  assert(cm->classbits[CC_XDIGIT] == 0);
702  if (cm->classbits[CC_SPACE] && pg_wc_isspace(c))
703  colnum |= cm->classbits[CC_SPACE];
704  if (cm->classbits[CC_LOWER] && pg_wc_islower(c))
705  colnum |= cm->classbits[CC_LOWER];
706  if (cm->classbits[CC_UPPER] && pg_wc_isupper(c))
707  colnum |= cm->classbits[CC_UPPER];
708  if (cm->classbits[CC_GRAPH] && pg_wc_isgraph(c))
709  colnum |= cm->classbits[CC_GRAPH];
710 
711  return colnum;
712 }
static int pg_wc_islower(pg_wchar c)
static int pg_wc_isword(pg_wchar c)
static int pg_wc_isspace(pg_wchar c)
static int pg_wc_ispunct(pg_wchar c)
static int pg_wc_isgraph(pg_wchar c)
static int pg_wc_isprint(pg_wchar c)
static int pg_wc_isalnum(pg_wchar c)
static int pg_wc_isdigit(pg_wchar c)
static int pg_wc_isupper(pg_wchar c)
static int pg_wc_isalpha(pg_wchar c)
#define MAX_SIMPLE_CHR
Definition: regcustom.h:86
#define assert(x)
Definition: regcustom.h:55
@ CC_UPPER
Definition: regguts.h:136
@ CC_WORD
Definition: regguts.h:136
@ CC_LOWER
Definition: regguts.h:136
@ CC_ASCII
Definition: regguts.h:135
@ CC_ALNUM
Definition: regguts.h:135
@ CC_XDIGIT
Definition: regguts.h:136
@ CC_PRINT
Definition: regguts.h:136
@ CC_BLANK
Definition: regguts.h:135
@ CC_GRAPH
Definition: regguts.h:135
@ CC_CNTRL
Definition: regguts.h:135
@ CC_SPACE
Definition: regguts.h:136
@ CC_DIGIT
Definition: regguts.h:135
@ CC_ALPHA
Definition: regguts.h:135
@ CC_PUNCT
Definition: regguts.h:136
int classbits[NUM_CCLASSES]
Definition: regguts.h:238

References assert, CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT, colormap::classbits, MAX_SIMPLE_CHR, pg_wc_isalnum(), pg_wc_isalpha(), pg_wc_isdigit(), pg_wc_isgraph(), pg_wc_islower(), pg_wc_isprint(), pg_wc_ispunct(), pg_wc_isspace(), pg_wc_isupper(), and pg_wc_isword().

Referenced by pg_reg_getcolor().

◆ cclasscvec()

static struct cvec* cclasscvec ( struct vars v,
enum char_classes  cclasscode,
int  cases 
)
static

Definition at line 573 of file regc_locale.c.

576 {
577  struct cvec *cv = NULL;
578 
579  /*
580  * Remap lower and upper to alpha if the match is case insensitive.
581  */
582 
583  if (cases &&
584  (cclasscode == CC_LOWER ||
585  cclasscode == CC_UPPER))
587 
588  /*
589  * Now compute the character class contents. For classes that are based
590  * on the behavior of a <wctype.h> or <ctype.h> function, we use
591  * pg_ctype_get_cache so that we can cache the results. Other classes
592  * have definitions that are hard-wired here, and for those we just
593  * construct a transient cvec on the fly.
594  *
595  * NB: keep this code in sync with cclass_column_index(), below.
596  */
597 
598  switch (cclasscode)
599  {
600  case CC_PRINT:
602  break;
603  case CC_ALNUM:
605  break;
606  case CC_ALPHA:
608  break;
609  case CC_WORD:
611  break;
612  case CC_ASCII:
613  /* hard-wired meaning */
614  cv = getcvec(v, 0, 1);
615  if (cv)
616  addrange(cv, 0, 0x7f);
617  break;
618  case CC_BLANK:
619  /* hard-wired meaning */
620  cv = getcvec(v, 2, 0);
621  addchr(cv, '\t');
622  addchr(cv, ' ');
623  break;
624  case CC_CNTRL:
625  /* hard-wired meaning */
626  cv = getcvec(v, 0, 2);
627  addrange(cv, 0x0, 0x1f);
628  addrange(cv, 0x7f, 0x9f);
629  break;
630  case CC_DIGIT:
632  break;
633  case CC_PUNCT:
635  break;
636  case CC_XDIGIT:
637 
638  /*
639  * It's not clear how to define this in non-western locales, and
640  * even less clear that there's any particular use in trying. So
641  * just hard-wire the meaning.
642  */
643  cv = getcvec(v, 0, 3);
644  if (cv)
645  {
646  addrange(cv, '0', '9');
647  addrange(cv, 'a', 'f');
648  addrange(cv, 'A', 'F');
649  }
650  break;
651  case CC_SPACE:
653  break;
654  case CC_LOWER:
656  break;
657  case CC_UPPER:
659  break;
660  case CC_GRAPH:
662  break;
663  }
664 
665  /* If cv is NULL now, the reason must be "out of memory" */
666  if (cv == NULL)
667  ERR(REG_ESPACE);
668  return cv;
669 }
#define ERR
Definition: _int.h:161
static void addrange(struct cvec *cv, chr from, chr to)
Definition: regc_cvec.c:90
static struct cvec * pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
#define REG_ESPACE
Definition: regex.h:149
int cclasscode
Definition: regguts.h:281

References addchr(), addrange(), CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT, cvec::cclasscode, ERR, getcvec(), pg_ctype_get_cache(), pg_wc_isalnum(), pg_wc_isalpha(), pg_wc_isdigit(), pg_wc_isgraph(), pg_wc_islower(), pg_wc_isprint(), pg_wc_ispunct(), pg_wc_isspace(), pg_wc_isupper(), pg_wc_isword(), and REG_ESPACE.

◆ cmp()

static int cmp ( const chr x,
const chr y,
size_t  len 
)
static

Definition at line 747 of file regc_locale.c.

749 {
750  return memcmp(VS(x), VS(y), len * sizeof(chr));
751 }
#define VS(x)
Definition: regguts.h:61

References len, VS, x, and y.

Referenced by addToArray(), adjacent_cmp_bounds(), adjacent_inner_consistent(), append_startup_cost_compare(), append_total_cost_compare(), avlInsertNode(), binaryCompareStrings(), bit_cmp(), bitposition(), bounds_adjacent(), bpchar_larger(), bpchar_smaller(), bpcharcmp(), bpcharge(), bpchargt(), bpcharle(), bpcharlt(), bsearch_arg(), build_expanded_ranges(), byteacmp(), byteage(), byteagt(), byteale(), bytealt(), calc_word_similarity(), collectMatchBitmap(), comp_ptrgm(), compareItems(), compareStrings(), executeComparison(), findeq(), gcd_var(), GetVariable(), gin_btree_compare_prefix(), gin_cmp_prefix(), gin_cmp_tslexeme(), ginMergeItemPointers(), IndexNextWithReorder(), invariant_g_offset(), invariant_l_nontarget_offset(), invariant_l_offset(), invariant_leq_offset(), library_name_compare(), list_sort(), ln_var(), make_row_comparison_op(), make_row_distinct_op(), matchPartialInPendingList(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), merge_overlapping_ranges(), multirange_cmp(), multirange_elem_bsearch_comparison(), multirange_ge(), multirange_gt(), multirange_le(), multirange_lt(), oidvectoreq(), oidvectorge(), oidvectorgt(), oidvectorle(), oidvectorlt(), oidvectorne(), pairingheap_GISTSearchItem_cmp(), range_cmp(), range_compare(), range_contains_elem_internal(), range_ge(), range_gt(), range_le(), range_lt(), range_serialize(), rbound_bsearch(), rbt_find(), rbt_find_great(), rbt_find_less(), rbt_insert(), reduce_expanded_ranges(), seg_different(), seg_ge(), seg_gt(), seg_le(), seg_lt(), seg_same(), set_cheapest(), SetVariable(), SetVariableHooks(), sift_up(), sort_expanded_ranges(), spg_range_quad_inner_consistent(), transformAExprIn(), tsCompareString(), tsvector_bsearch(), tsvector_concat(), and VariableHasHook().

◆ eclass()

static struct cvec* eclass ( struct vars v,
chr  c,
int  cases 
)
static

Definition at line 504 of file regc_locale.c.

508 {
509  struct cvec *cv;
510 
511  /* crude fake equivalence class for testing */
512  if ((v->cflags & REG_FAKE) && c == 'x')
513  {
514  cv = getcvec(v, 4, 0);
515  addchr(cv, CHR('x'));
516  addchr(cv, CHR('y'));
517  if (cases)
518  {
519  addchr(cv, CHR('X'));
520  addchr(cv, CHR('Y'));
521  }
522  return cv;
523  }
524 
525  /* otherwise, none */
526  if (cases)
527  return allcases(v, c);
528  cv = getcvec(v, 1, 0);
529  assert(cv != NULL);
530  addchr(cv, c);
531  return cv;
532 }
static struct cvec * allcases(struct vars *v, chr c)
Definition: regc_locale.c:721
#define CHR(c)
Definition: regcustom.h:61
#define REG_FAKE
Definition: regex.h:116
int cflags
Definition: regcomp.c:287

References addchr(), allcases(), assert, vars::cflags, CHR, getcvec(), and REG_FAKE.

Referenced by eclass_useful_for_merging(), make_canonical_pathkey(), make_pathkey_from_sortinfo(), and print_pathkeys().

◆ element()

static chr element ( struct vars v,
const chr startp,
const chr endp 
)
static

Definition at line 376 of file regc_locale.c.

379 {
380  const struct cname *cn;
381  size_t len;
382 
383  /* generic: one-chr names stand for themselves */
384  assert(startp < endp);
385  len = endp - startp;
386  if (len == 1)
387  return *startp;
388 
389  NOTE(REG_ULOCALE);
390 
391  /* search table */
392  for (cn = cnames; cn->name != NULL; cn++)
393  {
394  if (strlen(cn->name) == len &&
395  pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
396  {
397  break; /* NOTE BREAK OUT */
398  }
399  }
400  if (cn->name != NULL)
401  return CHR(cn->code);
402 
403  /* couldn't find it */
404  ERR(REG_ECOLLATE);
405  return 0;
406 }
@ NOTE
Definition: pg_regress.c:82
static const struct cname cnames[]
#define REG_ULOCALE
Definition: regex.h:70
#define REG_ECOLLATE
Definition: regex.h:140
const char * name
Definition: regc_locale.c:57
const char code
Definition: regc_locale.c:58
int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n)
Definition: wstrncmp.c:55

References assert, CHR, cnames, cname::code, ERR, len, cname::name, NOTE, pg_char_and_wchar_strncmp(), REG_ECOLLATE, and REG_ULOCALE.

Referenced by chrnamed(), ecpg_store_input(), ExecEvalHashedScalarArrayOp(), json_array_element(), json_array_element_text(), jsonb_array_element(), jsonb_array_element_text(), nfalsepos_for_missing_strings(), populate_array_element(), populate_with_dummy_strings(), transformArrayExpr(), transformCreateSchemaStmt(), and transformCreateStmt().

◆ lookupcclass()

static enum char_classes lookupcclass ( struct vars v,
const chr startp,
const chr endp 
)
static

Definition at line 504 of file regc_locale.c.

543 {
544  size_t len;
545  const char *const *namePtr;
546  int i;
547 
548  /*
549  * Map the name to the corresponding enumerated value.
550  */
551  len = endp - startp;
552  for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
553  {
554  if (strlen(*namePtr) == len &&
555  pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
556  return (enum char_classes) i;
557  }
558 
559  ERR(REG_ECTYPE);
560  return (enum char_classes) 0;
561 }
int i
Definition: isn.c:73
static const char *const classNames[NUM_CCLASSES+1]
Definition: regc_locale.c:356
#define REG_ECTYPE
Definition: regex.h:141
char_classes
Definition: regguts.h:134

◆ range()

static struct cvec* range ( struct vars v,
chr  a,
chr  b,
int  cases 
)
static

Definition at line 412 of file regc_locale.c.

416 {
417  int nchrs;
418  struct cvec *cv;
419  chr c,
420  cc;
421 
422  if (a != b && !before(a, b))
423  {
424  ERR(REG_ERANGE);
425  return NULL;
426  }
427 
428  if (!cases)
429  { /* easy version */
430  cv = getcvec(v, 0, 1);
431  NOERRN();
432  addrange(cv, a, b);
433  return cv;
434  }
435 
436  /*
437  * When case-independent, it's hard to decide when cvec ranges are usable,
438  * so for now at least, we won't try. We use a range for the originally
439  * specified chrs and then add on any case-equivalents that are outside
440  * that range as individual chrs.
441  *
442  * To ensure sane behavior if someone specifies a very large range, limit
443  * the allocation size to 100000 chrs (arbitrary) and check for overrun
444  * inside the loop below.
445  */
446  nchrs = b - a + 1;
447  if (nchrs <= 0 || nchrs > 100000)
448  nchrs = 100000;
449 
450  cv = getcvec(v, nchrs, 1);
451  NOERRN();
452  addrange(cv, a, b);
453 
454  for (c = a; c <= b; c++)
455  {
456  cc = pg_wc_tolower(c);
457  if (cc != c &&
458  (before(cc, a) || before(b, cc)))
459  {
460  if (cv->nchrs >= cv->chrspace)
461  {
462  ERR(REG_ETOOBIG);
463  return NULL;
464  }
465  addchr(cv, cc);
466  }
467  cc = pg_wc_toupper(c);
468  if (cc != c &&
469  (before(cc, a) || before(b, cc)))
470  {
471  if (cv->nchrs >= cv->chrspace)
472  {
473  ERR(REG_ETOOBIG);
474  return NULL;
475  }
476  addchr(cv, cc);
477  }
478  if (CANCEL_REQUESTED(v->re))
479  {
480  ERR(REG_CANCEL);
481  return NULL;
482  }
483  }
484 
485  return cv;
486 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69
static int before(chr x, chr y)
Definition: regc_locale.c:492
#define NOERRN()
Definition: regcomp.c:322
#define REG_CANCEL
Definition: regex.h:157
#define REG_ETOOBIG
Definition: regex.h:155
#define REG_ERANGE
Definition: regex.h:148
#define CANCEL_REQUESTED(re)
Definition: regguts.h:517
int chrspace
Definition: regguts.h:276
int nchrs
Definition: regguts.h:275
regex_t * re
Definition: regcomp.c:283

References a, addchr(), addrange(), b, before(), CANCEL_REQUESTED, cvec::chrspace, ERR, getcvec(), cvec::nchrs, NOERRN, pg_wc_tolower(), pg_wc_toupper(), vars::re, REG_CANCEL, REG_ERANGE, and REG_ETOOBIG.

Referenced by AdjustIntervalForTypmod(), brin_range_deserialize(), brin_range_serialize(), chrnamed(), codepoint_range_cmp(), compute_range_stats(), DecodeInterval(), DecodeTime(), DecodeTimeCommon(), DecodeTimeForInterval(), ensure_free_space_in_buffer(), executeItemOptUnwrapTarget(), g_box_consider_split(), get_gist_range_class(), interval_in(), make_range(), mda_get_prod(), multirange_constructor1(), multirange_get_range(), multirange_in(), multirange_out(), multirange_send(), multirange_unnest(), pg_prng_uint64_range(), pg_to_ascii(), printJsonPathItem(), range_constructor2(), range_constructor3(), range_deduplicate_values(), range_deserialize(), range_get_flags(), range_gist_class_split(), range_gist_double_sorting_split(), range_gist_fallback_split(), range_gist_picksplit(), range_gist_single_sorting_split(), range_in(), range_out(), range_recv(), range_send(), range_serialize(), range_set_contain_empty(), record_manifest_details_for_wal_range(), spg_range_quad_inner_consistent(), spg_range_quad_picksplit(), and transformJsonArrayQueryConstructor().

Variable Documentation

◆ classNames

const char* const classNames[NUM_CCLASSES+1]
static
Initial value:
= {
"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit", "word",
NULL
}

Definition at line 356 of file regc_locale.c.

◆ cnames

const struct cname cnames[]
static

Referenced by element().