PostgreSQL Source Code  git master
test_regex.c File Reference
#include "postgres.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "regex/regex.h"
#include "utils/array.h"
#include "utils/builtins.h"
Include dependency graph for test_regex.c:

Go to the source code of this file.

Data Structures

struct  test_re_flags
 
struct  test_regex_ctx
 

Typedefs

typedef struct test_re_flags test_re_flags
 
typedef struct test_regex_ctx test_regex_ctx
 

Functions

static void test_re_compile (text *text_re, int cflags, Oid collation, regex_t *result_re)
 
static void parse_test_flags (test_re_flags *flags, text *opts)
 
static test_regex_ctxsetup_test_matches (text *orig_str, regex_t *cpattern, test_re_flags *flags, Oid collation, bool use_subpatterns)
 
static ArrayTypebuild_test_info_result (regex_t *cpattern, test_re_flags *flags)
 
static ArrayTypebuild_test_match_result (test_regex_ctx *matchctx)
 
 PG_FUNCTION_INFO_V1 (test_regex)
 
Datum test_regex (PG_FUNCTION_ARGS)
 
static bool test_re_execute (regex_t *re, pg_wchar *data, int data_len, int start_search, rm_detail_t *details, int nmatch, regmatch_t *pmatch, int eflags)
 

Variables

 PG_MODULE_MAGIC
 

Typedef Documentation

◆ test_re_flags

typedef struct test_re_flags test_re_flags

◆ test_regex_ctx

Function Documentation

◆ build_test_info_result()

static ArrayType * build_test_info_result ( regex_t cpattern,
test_re_flags flags 
)
static

Definition at line 628 of file test_regex.c.

References bit(), buf, construct_md_array(), cstring_to_text(), test_re_flags::info, lengthof, PointerGetDatum, regex_t::re_info, regex_t::re_nsub, REG_UBACKREF, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_UEMPTYMATCH, REG_UIMPOSSIBLE, REG_ULOCALE, REG_ULOOKAROUND, REG_UNONPOSIX, REG_UPBOTCH, REG_USHORTEST, REG_UUNPORT, REG_UUNSPEC, and snprintf.

Referenced by test_regex().

629 {
630  /* Translation data for flag bits in regex_t.re_info */
631  struct infoname
632  {
633  int bit;
634  const char *text;
635  };
636  static const struct infoname infonames[] = {
637  {REG_UBACKREF, "REG_UBACKREF"},
638  {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
639  {REG_UBOUNDS, "REG_UBOUNDS"},
640  {REG_UBRACES, "REG_UBRACES"},
641  {REG_UBSALNUM, "REG_UBSALNUM"},
642  {REG_UPBOTCH, "REG_UPBOTCH"},
643  {REG_UBBS, "REG_UBBS"},
644  {REG_UNONPOSIX, "REG_UNONPOSIX"},
645  {REG_UUNSPEC, "REG_UUNSPEC"},
646  {REG_UUNPORT, "REG_UUNPORT"},
647  {REG_ULOCALE, "REG_ULOCALE"},
648  {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
649  {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
650  {REG_USHORTEST, "REG_USHORTEST"},
651  {0, NULL}
652  };
653  const struct infoname *inf;
654  Datum elems[lengthof(infonames) + 1];
655  int nresults = 0;
656  char buf[80];
657  int dims[1];
658  int lbs[1];
659 
660  /* Set up results: first, the number of subexpressions */
661  snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
662  elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
663 
664  /* Report individual info bit states */
665  for (inf = infonames; inf->bit != 0; inf++)
666  {
667  if (cpattern->re_info & inf->bit)
668  {
669  if (flags->info & inf->bit)
670  elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
671  else
672  {
673  snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
674  elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
675  }
676  }
677  else
678  {
679  if (flags->info & inf->bit)
680  {
681  snprintf(buf, sizeof(buf), "missing %s!", inf->text);
682  elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
683  }
684  }
685  }
686 
687  /* And form an array */
688  dims[0] = nresults;
689  lbs[0] = 1;
690  /* XXX: this hardcodes assumptions about the text type */
691  return construct_md_array(elems, NULL, 1, dims, lbs,
692  TEXTOID, -1, false, TYPALIGN_INT);
693 }
#define REG_USHORTEST
Definition: regex.h:75
#define REG_UIMPOSSIBLE
Definition: regex.h:74
#define REG_UBSALNUM
Definition: regex.h:64
#define PointerGetDatum(X)
Definition: postgres.h:600
#define lengthof(array)
Definition: c.h:734
#define REG_ULOCALE
Definition: regex.h:72
size_t re_nsub
Definition: regex.h:58
static char * buf
Definition: pg_test_fsync.c:68
#define REG_UBBS
Definition: regex.h:67
struct varlena text
Definition: c.h:635
uintptr_t Datum
Definition: postgres.h:411
#define REG_UUNPORT
Definition: regex.h:71
long re_info
Definition: regex.h:59
#define REG_UBACKREF
Definition: regex.h:60
#define REG_UEMPTYMATCH
Definition: regex.h:73
text * cstring_to_text(const char *s)
Definition: varlena.c:190
Datum bit(PG_FUNCTION_ARGS)
Definition: varbit.c:391
#define REG_UUNSPEC
Definition: regex.h:69
#define REG_UBOUNDS
Definition: regex.h:62
#define REG_UNONPOSIX
Definition: regex.h:68
#define REG_UBRACES
Definition: regex.h:63
ArrayType * construct_md_array(Datum *elems, bool *nulls, int ndims, int *dims, int *lbs, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3353
#define REG_ULOOKAROUND
Definition: regex.h:61
#define snprintf
Definition: port.h:216
#define REG_UPBOTCH
Definition: regex.h:65

◆ build_test_match_result()

static ArrayType * build_test_match_result ( test_regex_ctx matchctx)
static

Definition at line 702 of file test_regex.c.

References Assert, buf, test_re_flags::cflags, construct_md_array(), test_regex_ctx::conv_buf, cstring_to_text(), cstring_to_text_with_len(), test_regex_ctx::details, DirectFunctionCall3, test_regex_ctx::elems, i, test_re_flags::indices, Int32GetDatum, test_regex_ctx::match_locs, test_regex_ctx::next_match, test_regex_ctx::npatterns, test_regex_ctx::nulls, test_regex_ctx::orig_str, pg_wchar2mb_with_len(), PointerGetDatum, test_regex_ctx::re_flags, REG_EXPECT, regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, snprintf, text_substr(), and test_regex_ctx::wide_str.

Referenced by test_regex().

703 {
704  char *buf = matchctx->conv_buf;
705  Datum *elems = matchctx->elems;
706  bool *nulls = matchctx->nulls;
707  bool indices = matchctx->re_flags.indices;
708  char bufstr[80];
709  int dims[1];
710  int lbs[1];
711  int loc;
712  int i;
713 
714  /* Extract matching substrings from the original string */
715  loc = matchctx->next_match * matchctx->npatterns * 2;
716  for (i = 0; i < matchctx->npatterns; i++)
717  {
718  int so = matchctx->match_locs[loc++];
719  int eo = matchctx->match_locs[loc++];
720 
721  if (indices)
722  {
723  /* Report eo this way for consistency with Tcl */
724  snprintf(bufstr, sizeof(bufstr), "%d %d",
725  so, so < 0 ? eo : eo - 1);
726  elems[i] = PointerGetDatum(cstring_to_text(bufstr));
727  nulls[i] = false;
728  }
729  else if (so < 0 || eo < 0)
730  {
731  elems[i] = (Datum) 0;
732  nulls[i] = true;
733  }
734  else if (buf)
735  {
736  int len = pg_wchar2mb_with_len(matchctx->wide_str + so,
737  buf,
738  eo - so);
739 
740  Assert(len < matchctx->conv_bufsiz);
741  elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
742  nulls[i] = false;
743  }
744  else
745  {
747  PointerGetDatum(matchctx->orig_str),
748  Int32GetDatum(so + 1),
749  Int32GetDatum(eo - so));
750  nulls[i] = false;
751  }
752  }
753 
754  /* In EXPECT indices mode, also report the "details" */
755  if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
756  {
757  int so = matchctx->details.rm_extend.rm_so;
758  int eo = matchctx->details.rm_extend.rm_eo;
759 
760  snprintf(bufstr, sizeof(bufstr), "%d %d",
761  so, so < 0 ? eo : eo - 1);
762  elems[i] = PointerGetDatum(cstring_to_text(bufstr));
763  nulls[i] = false;
764  i++;
765  }
766 
767  /* And form an array */
768  dims[0] = i;
769  lbs[0] = 1;
770  /* XXX: this hardcodes assumptions about the text type */
771  return construct_md_array(elems, nulls, 1, dims, lbs,
772  TEXTOID, -1, false, TYPALIGN_INT);
773 }
int * match_locs
Definition: test_regex.c:47
#define REG_EXPECT
Definition: regex.h:115
#define PointerGetDatum(X)
Definition: postgres.h:600
regoff_t rm_so
Definition: regex.h:87
test_re_flags re_flags
Definition: test_regex.c:40
bool * nulls
Definition: test_regex.c:51
regoff_t rm_eo
Definition: regex.h:88
static char * buf
Definition: pg_test_fsync.c:68
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:202
rm_detail_t details
Definition: test_regex.c:41
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:630
Datum text_substr(PG_FUNCTION_ARGS)
Definition: varlena.c:839
uintptr_t Datum
Definition: postgres.h:411
text * orig_str
Definition: test_regex.c:42
text * cstring_to_text(const char *s)
Definition: varlena.c:190
#define Assert(condition)
Definition: c.h:804
char * conv_buf
Definition: test_regex.c:53
pg_wchar * wide_str
Definition: test_regex.c:52
regmatch_t rm_extend
Definition: regex.h:94
#define Int32GetDatum(X)
Definition: postgres.h:523
int i
Datum * elems
Definition: test_regex.c:50
int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
Definition: mbutils.c:951
ArrayType * construct_md_array(Datum *elems, bool *nulls, int ndims, int *dims, int *lbs, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3353
#define snprintf
Definition: port.h:216

◆ parse_test_flags()

static void parse_test_flags ( test_re_flags flags,
text opts 
)
static

Definition at line 260 of file test_regex.c.

References test_re_flags::cflags, test_re_flags::eflags, ereport, errcode(), errmsg(), ERROR, test_re_flags::glob, i, test_re_flags::indices, test_re_flags::info, test_re_flags::partial, pg_mblen(), REG_ADVANCED, REG_ADVF, REG_BOSONLY, REG_DUMP, REG_EXPANDED, REG_EXPECT, REG_EXTENDED, REG_FAKE, REG_FTRACE, REG_ICASE, REG_MTRACE, REG_NEWLINE, REG_NLANCH, REG_NLSTOP, REG_NOSUB, REG_NOTBOL, REG_NOTEOL, REG_PROGRESS, REG_QUOTE, REG_SMALL, REG_UBACKREF, REG_UBBS, REG_UBOUNDS, REG_UBRACES, REG_UBSALNUM, REG_UEMPTYMATCH, REG_UIMPOSSIBLE, REG_ULOCALE, REG_ULOOKAROUND, REG_UNONPOSIX, REG_UPBOTCH, REG_USHORTEST, REG_UUNPORT, REG_UUNSPEC, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by test_regex().

261 {
262  /* these defaults must match Tcl's */
263  int cflags = REG_ADVANCED;
264  int eflags = 0;
265  long info = 0;
266 
267  flags->glob = false;
268  flags->indices = false;
269  flags->partial = false;
270 
271  if (opts)
272  {
273  char *opt_p = VARDATA_ANY(opts);
274  int opt_len = VARSIZE_ANY_EXHDR(opts);
275  int i;
276 
277  for (i = 0; i < opt_len; i++)
278  {
279  switch (opt_p[i])
280  {
281  case '-':
282  /* allowed, no-op */
283  break;
284  case '!':
285  flags->partial = true;
286  break;
287  case '*':
288  /* test requires Unicode --- ignored here */
289  break;
290  case '0':
291  flags->indices = true;
292  break;
293 
294  /* These flags correspond to user-exposed RE options: */
295  case 'g': /* global match */
296  flags->glob = true;
297  break;
298  case 'i': /* case insensitive */
299  cflags |= REG_ICASE;
300  break;
301  case 'n': /* \n affects ^ $ . [^ */
302  cflags |= REG_NEWLINE;
303  break;
304  case 'p': /* ~Perl, \n affects . [^ */
305  cflags |= REG_NLSTOP;
306  cflags &= ~REG_NLANCH;
307  break;
308  case 'w': /* weird, \n affects ^ $ only */
309  cflags &= ~REG_NLSTOP;
310  cflags |= REG_NLANCH;
311  break;
312  case 'x': /* expanded syntax */
313  cflags |= REG_EXPANDED;
314  break;
315 
316  /* These flags correspond to Tcl's -xflags options: */
317  case 'a':
318  cflags |= REG_ADVF;
319  break;
320  case 'b':
321  cflags &= ~REG_ADVANCED;
322  break;
323  case 'c':
324 
325  /*
326  * Tcl calls this TCL_REG_CANMATCH, but it's really
327  * REG_EXPECT. In this implementation we must also set
328  * the partial and indices flags, so that
329  * setup_test_matches and build_test_match_result will
330  * emit the desired data. (They'll emit more fields than
331  * Tcl would, but that's fine.)
332  */
333  cflags |= REG_EXPECT;
334  flags->partial = true;
335  flags->indices = true;
336  break;
337  case 'e':
338  cflags &= ~REG_ADVANCED;
339  cflags |= REG_EXTENDED;
340  break;
341  case 'q':
342  cflags &= ~REG_ADVANCED;
343  cflags |= REG_QUOTE;
344  break;
345  case 'o': /* o for opaque */
346  cflags |= REG_NOSUB;
347  break;
348  case 's': /* s for start */
349  cflags |= REG_BOSONLY;
350  break;
351  case '+':
352  cflags |= REG_FAKE;
353  break;
354  case ',':
355  cflags |= REG_PROGRESS;
356  break;
357  case '.':
358  cflags |= REG_DUMP;
359  break;
360  case ':':
361  eflags |= REG_MTRACE;
362  break;
363  case ';':
364  eflags |= REG_FTRACE;
365  break;
366  case '^':
367  eflags |= REG_NOTBOL;
368  break;
369  case '$':
370  eflags |= REG_NOTEOL;
371  break;
372  case 't':
373  cflags |= REG_EXPECT;
374  break;
375  case '%':
376  eflags |= REG_SMALL;
377  break;
378 
379  /* These flags define expected info bits: */
380  case 'A':
381  info |= REG_UBSALNUM;
382  break;
383  case 'B':
384  info |= REG_UBRACES;
385  break;
386  case 'E':
387  info |= REG_UBBS;
388  break;
389  case 'H':
390  info |= REG_ULOOKAROUND;
391  break;
392  case 'I':
393  info |= REG_UIMPOSSIBLE;
394  break;
395  case 'L':
396  info |= REG_ULOCALE;
397  break;
398  case 'M':
399  info |= REG_UUNPORT;
400  break;
401  case 'N':
402  info |= REG_UEMPTYMATCH;
403  break;
404  case 'P':
405  info |= REG_UNONPOSIX;
406  break;
407  case 'Q':
408  info |= REG_UBOUNDS;
409  break;
410  case 'R':
411  info |= REG_UBACKREF;
412  break;
413  case 'S':
414  info |= REG_UUNSPEC;
415  break;
416  case 'T':
417  info |= REG_USHORTEST;
418  break;
419  case 'U':
420  info |= REG_UPBOTCH;
421  break;
422 
423  default:
424  ereport(ERROR,
425  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
426  errmsg("invalid regular expression test option: \"%.*s\"",
427  pg_mblen(opt_p + i), opt_p + i)));
428  break;
429  }
430  }
431  }
432  flags->cflags = cflags;
433  flags->eflags = eflags;
434  flags->info = info;
435 }
#define REG_USHORTEST
Definition: regex.h:75
#define REG_UIMPOSSIBLE
Definition: regex.h:74
#define REG_NLSTOP
Definition: regex.h:111
#define REG_UBSALNUM
Definition: regex.h:64
#define REG_NOTBOL
Definition: regex.h:126
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define REG_EXPECT
Definition: regex.h:115
#define REG_QUOTE
Definition: regex.h:106
int errcode(int sqlerrcode)
Definition: elog.c:698
#define REG_FTRACE
Definition: regex.h:129
#define REG_ULOCALE
Definition: regex.h:72
#define REG_ICASE
Definition: regex.h:108
#define REG_MTRACE
Definition: regex.h:130
#define REG_PROGRESS
Definition: regex.h:119
#define ERROR
Definition: elog.h:46
#define REG_BOSONLY
Definition: regex.h:116
#define REG_FAKE
Definition: regex.h:118
#define REG_NEWLINE
Definition: regex.h:113
#define REG_UBBS
Definition: regex.h:67
#define REG_ADVANCED
Definition: regex.h:105
#define REG_EXTENDED
Definition: regex.h:103
#define REG_UUNPORT
Definition: regex.h:71
#define REG_ADVF
Definition: regex.h:104
#define REG_SMALL
Definition: regex.h:131
#define REG_DUMP
Definition: regex.h:117
#define ereport(elevel,...)
Definition: elog.h:157
#define REG_UBACKREF
Definition: regex.h:60
#define REG_UEMPTYMATCH
Definition: regex.h:73
#define REG_UUNSPEC
Definition: regex.h:69
#define REG_NLANCH
Definition: regex.h:112
#define REG_UBOUNDS
Definition: regex.h:62
#define REG_NOSUB
Definition: regex.h:109
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
int errmsg(const char *fmt,...)
Definition: elog.c:909
int i
#define REG_EXPANDED
Definition: regex.h:110
#define REG_UNONPOSIX
Definition: regex.h:68
#define REG_UBRACES
Definition: regex.h:63
#define REG_ULOOKAROUND
Definition: regex.h:61
#define REG_NOTEOL
Definition: regex.h:127
#define REG_UPBOTCH
Definition: regex.h:65

◆ PG_FUNCTION_INFO_V1()

PG_FUNCTION_INFO_V1 ( test_regex  )

◆ setup_test_matches()

static test_regex_ctx * setup_test_matches ( text orig_str,
regex_t cpattern,
test_re_flags flags,
Oid  collation,
bool  use_subpatterns 
)
static

Definition at line 445 of file test_regex.c.

References Assert, test_regex_ctx::conv_buf, test_regex_ctx::conv_bufsiz, test_regex_ctx::details, test_re_flags::eflags, ereport, errcode(), errmsg(), ERROR, test_re_flags::glob, i, test_re_flags::indices, test_regex_ctx::match_locs, MaxAllocSize, test_regex_ctx::nmatches, test_regex_ctx::npatterns, test_regex_ctx::orig_str, palloc(), palloc0(), test_re_flags::partial, pfree(), pg_database_encoding_max_length(), pg_mb2wchar_with_len(), test_regex_ctx::re_flags, regex_t::re_nsub, repalloc(), regmatch_t::rm_eo, rm_detail_t::rm_extend, regmatch_t::rm_so, test_re_execute(), VARDATA_ANY, VARSIZE_ANY_EXHDR, and test_regex_ctx::wide_str.

Referenced by test_regex().

449 {
450  test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx));
452  int orig_len;
453  pg_wchar *wide_str;
454  int wide_len;
455  regmatch_t *pmatch;
456  int pmatch_len;
457  int array_len;
458  int array_idx;
459  int prev_match_end;
460  int start_search;
461  int maxlen = 0; /* largest fetch length in characters */
462 
463  /* save flags */
464  matchctx->re_flags = *re_flags;
465 
466  /* save original string --- we'll extract result substrings from it */
467  matchctx->orig_str = orig_str;
468 
469  /* convert string to pg_wchar form for matching */
470  orig_len = VARSIZE_ANY_EXHDR(orig_str);
471  wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
472  wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
473 
474  /* do we want to remember subpatterns? */
475  if (use_subpatterns && cpattern->re_nsub > 0)
476  {
477  matchctx->npatterns = cpattern->re_nsub + 1;
478  pmatch_len = cpattern->re_nsub + 1;
479  }
480  else
481  {
482  use_subpatterns = false;
483  matchctx->npatterns = 1;
484  pmatch_len = 1;
485  }
486 
487  /* temporary output space for RE package */
488  pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
489 
490  /*
491  * the real output space (grown dynamically if needed)
492  *
493  * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
494  * than at 2^27
495  */
496  array_len = re_flags->glob ? 255 : 31;
497  matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
498  array_idx = 0;
499 
500  /* search for the pattern, perhaps repeatedly */
501  prev_match_end = 0;
502  start_search = 0;
503  while (test_re_execute(cpattern, wide_str, wide_len,
504  start_search,
505  &matchctx->details,
506  pmatch_len, pmatch,
507  re_flags->eflags))
508  {
509  /* enlarge output space if needed */
510  while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
511  {
512  array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
513  if (array_len > MaxAllocSize / sizeof(int))
514  ereport(ERROR,
515  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
516  errmsg("too many regular expression matches")));
517  matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
518  sizeof(int) * array_len);
519  }
520 
521  /* save this match's locations */
522  for (int i = 0; i < matchctx->npatterns; i++)
523  {
524  int so = pmatch[i].rm_so;
525  int eo = pmatch[i].rm_eo;
526 
527  matchctx->match_locs[array_idx++] = so;
528  matchctx->match_locs[array_idx++] = eo;
529  if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
530  maxlen = (eo - so);
531  }
532  matchctx->nmatches++;
533  prev_match_end = pmatch[0].rm_eo;
534 
535  /* if not glob, stop after one match */
536  if (!re_flags->glob)
537  break;
538 
539  /*
540  * Advance search position. Normally we start the next search at the
541  * end of the previous match; but if the match was of zero length, we
542  * have to advance by one character, or we'd just find the same match
543  * again.
544  */
545  start_search = prev_match_end;
546  if (pmatch[0].rm_so == pmatch[0].rm_eo)
547  start_search++;
548  if (start_search > wide_len)
549  break;
550  }
551 
552  /*
553  * If we had no match, but "partial" and "indices" are set, emit the
554  * details.
555  */
556  if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
557  {
558  /* enlarge output space if needed */
559  while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
560  {
561  array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
562  if (array_len > MaxAllocSize / sizeof(int))
563  ereport(ERROR,
564  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
565  errmsg("too many regular expression matches")));
566  matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
567  sizeof(int) * array_len);
568  }
569 
570  matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
571  matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
572  /* we don't have pmatch data, so emit -1 */
573  for (int i = 1; i < matchctx->npatterns; i++)
574  {
575  matchctx->match_locs[array_idx++] = -1;
576  matchctx->match_locs[array_idx++] = -1;
577  }
578  matchctx->nmatches++;
579  }
580 
581  Assert(array_idx <= array_len);
582 
583  if (eml > 1)
584  {
585  int64 maxsiz = eml * (int64) maxlen;
586  int conv_bufsiz;
587 
588  /*
589  * Make the conversion buffer large enough for any substring of
590  * interest.
591  *
592  * Worst case: assume we need the maximum size (maxlen*eml), but take
593  * advantage of the fact that the original string length in bytes is
594  * an upper bound on the byte length of any fetched substring (and we
595  * know that len+1 is safe to allocate because the varlena header is
596  * longer than 1 byte).
597  */
598  if (maxsiz > orig_len)
599  conv_bufsiz = orig_len + 1;
600  else
601  conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */
602 
603  matchctx->conv_buf = palloc(conv_bufsiz);
604  matchctx->conv_bufsiz = conv_bufsiz;
605  matchctx->wide_str = wide_str;
606  }
607  else
608  {
609  /* No need to keep the wide string if we're in a single-byte charset. */
610  pfree(wide_str);
611  matchctx->wide_str = NULL;
612  matchctx->conv_buf = NULL;
613  matchctx->conv_bufsiz = 0;
614  }
615 
616  /* Clean up temp storage */
617  pfree(pmatch);
618 
619  return matchctx;
620 }
int * match_locs
Definition: test_regex.c:47
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
regoff_t rm_so
Definition: regex.h:87
test_re_flags re_flags
Definition: test_regex.c:40
int errcode(int sqlerrcode)
Definition: elog.c:698
static bool test_re_execute(regex_t *re, pg_wchar *data, int data_len, int start_search, rm_detail_t *details, int nmatch, regmatch_t *pmatch, int eflags)
Definition: test_regex.c:211
regoff_t rm_eo
Definition: regex.h:88
size_t re_nsub
Definition: regex.h:58
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ERROR
Definition: elog.h:46
rm_detail_t details
Definition: test_regex.c:41
unsigned int pg_wchar
Definition: mbprint.c:31
#define MaxAllocSize
Definition: memutils.h:40
void * palloc0(Size size)
Definition: mcxt.c:1093
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:929
text * orig_str
Definition: test_regex.c:42
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
char * conv_buf
Definition: test_regex.c:53
pg_wchar * wide_str
Definition: test_regex.c:52
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1495
regmatch_t rm_extend
Definition: regex.h:94
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
int i

◆ test_re_compile()

static void test_re_compile ( text text_re,
int  cflags,
Oid  collation,
regex_t result_re 
)
static

Definition at line 161 of file test_regex.c.

References CHECK_FOR_INTERRUPTS, ereport, errcode(), errmsg(), ERROR, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regcomp(), pg_regerror(), REG_OKAY, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by test_regex().

163 {
164  int text_re_len = VARSIZE_ANY_EXHDR(text_re);
165  char *text_re_val = VARDATA_ANY(text_re);
166  pg_wchar *pattern;
167  int pattern_len;
168  int regcomp_result;
169  char errMsg[100];
170 
171  /* Convert pattern string to wide characters */
172  pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
173  pattern_len = pg_mb2wchar_with_len(text_re_val,
174  pattern,
175  text_re_len);
176 
177  regcomp_result = pg_regcomp(result_re,
178  pattern,
179  pattern_len,
180  cflags,
181  collation);
182 
183  pfree(pattern);
184 
185  if (regcomp_result != REG_OKAY)
186  {
187  /* re didn't compile (no need for pg_regfree, if so) */
188 
189  /*
190  * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
191  * before reporting a regex error. This is so that if the regex
192  * library aborts and returns REG_CANCEL, we don't print an error
193  * message that implies the regex was invalid.
194  */
196 
197  pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
198  ereport(ERROR,
199  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
200  errmsg("invalid regular expression: %s", errMsg)));
201  }
202 }
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
int errcode(int sqlerrcode)
Definition: elog.c:698
int pg_regcomp(regex_t *re, const chr *string, size_t len, int flags, Oid collation)
Definition: regcomp.c:328
void pfree(void *pointer)
Definition: mcxt.c:1169
#define REG_OKAY
Definition: regex.h:139
#define ERROR
Definition: elog.h:46
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
unsigned int pg_wchar
Definition: mbprint.c:31
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
Definition: mbutils.c:929
#define ereport(elevel,...)
Definition: elog.h:157
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120

◆ test_re_execute()

static bool test_re_execute ( regex_t re,
pg_wchar data,
int  data_len,
int  start_search,
rm_detail_t details,
int  nmatch,
regmatch_t pmatch,
int  eflags 
)
static

Definition at line 211 of file test_regex.c.

References CHECK_FOR_INTERRUPTS, ereport, errcode(), errmsg(), ERROR, i, pg_regerror(), pg_regexec(), REG_NOMATCH, REG_OKAY, regmatch_t::rm_eo, rm_detail_t::rm_extend, and regmatch_t::rm_so.

Referenced by setup_test_matches().

216 {
217  int regexec_result;
218  char errMsg[100];
219 
220  /* Initialize match locations in case engine doesn't */
221  details->rm_extend.rm_so = -1;
222  details->rm_extend.rm_eo = -1;
223  for (int i = 0; i < nmatch; i++)
224  {
225  pmatch[i].rm_so = -1;
226  pmatch[i].rm_eo = -1;
227  }
228 
229  /* Perform RE match and return result */
230  regexec_result = pg_regexec(re,
231  data,
232  data_len,
233  start_search,
234  details,
235  nmatch,
236  pmatch,
237  eflags);
238 
239  if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
240  {
241  /* re failed??? */
243  pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
244  ereport(ERROR,
245  (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
246  errmsg("regular expression failed: %s", errMsg)));
247  }
248 
249  return (regexec_result == REG_OKAY);
250 }
regoff_t rm_so
Definition: regex.h:87
int errcode(int sqlerrcode)
Definition: elog.c:698
regoff_t rm_eo
Definition: regex.h:88
#define REG_OKAY
Definition: regex.h:139
#define ERROR
Definition: elog.h:46
size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
Definition: regerror.c:60
#define ereport(elevel,...)
Definition: elog.h:157
regmatch_t rm_extend
Definition: regex.h:94
int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)
Definition: regexec.c:176
int errmsg(const char *fmt,...)
Definition: elog.c:909
int i
#define REG_NOMATCH
Definition: regex.h:140
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120

◆ test_regex()

Datum test_regex ( PG_FUNCTION_ARGS  )

Definition at line 80 of file test_regex.c.

References build_test_info_result(), build_test_match_result(), test_re_flags::cflags, test_regex_ctx::elems, MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, test_regex_ctx::next_match, test_regex_ctx::nmatches, test_regex_ctx::npatterns, test_regex_ctx::nulls, palloc(), parse_test_flags(), PG_GET_COLLATION, PG_GETARG_TEXT_P_COPY, PG_GETARG_TEXT_PP, pg_regfree(), PointerGetDatum, setup_test_matches(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, test_re_compile(), and FuncCallContext::user_fctx.

81 {
82  FuncCallContext *funcctx;
83  test_regex_ctx *matchctx;
84  ArrayType *result_ary;
85 
86  if (SRF_IS_FIRSTCALL())
87  {
88  text *pattern = PG_GETARG_TEXT_PP(0);
89  text *flags = PG_GETARG_TEXT_PP(2);
90  Oid collation = PG_GET_COLLATION();
91  test_re_flags re_flags;
92  regex_t cpattern;
93  MemoryContext oldcontext;
94 
95  funcctx = SRF_FIRSTCALL_INIT();
96  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
97 
98  /* Determine options */
99  parse_test_flags(&re_flags, flags);
100 
101  /* set up the compiled pattern */
102  test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
103 
104  /* be sure to copy the input string into the multi-call ctx */
105  matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
106  &re_flags,
107  collation,
108  true);
109 
110  /* Pre-create workspace that build_test_match_result needs */
111  matchctx->elems = (Datum *) palloc(sizeof(Datum) *
112  (matchctx->npatterns + 1));
113  matchctx->nulls = (bool *) palloc(sizeof(bool) *
114  (matchctx->npatterns + 1));
115 
116  MemoryContextSwitchTo(oldcontext);
117  funcctx->user_fctx = (void *) matchctx;
118 
119  /*
120  * Return the first result row, which is info equivalent to Tcl's
121  * "regexp -about" output
122  */
123  result_ary = build_test_info_result(&cpattern, &re_flags);
124 
125  pg_regfree(&cpattern);
126 
127  SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
128  }
129  else
130  {
131  /* Each subsequent row describes one match */
132  funcctx = SRF_PERCALL_SETUP();
133  matchctx = (test_regex_ctx *) funcctx->user_fctx;
134 
135  if (matchctx->next_match < matchctx->nmatches)
136  {
137  result_ary = build_test_match_result(matchctx);
138  matchctx->next_match++;
139  SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
140  }
141  }
142 
143  SRF_RETURN_DONE(funcctx);
144 }
static ArrayType * build_test_match_result(test_regex_ctx *matchctx)
Definition: test_regex.c:702
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:293
#define PointerGetDatum(X)
Definition: postgres.h:600
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
bool * nulls
Definition: test_regex.c:51
static test_regex_ctx * setup_test_matches(text *orig_str, regex_t *cpattern, test_re_flags *flags, Oid collation, bool use_subpatterns)
Definition: test_regex.c:445
unsigned int Oid
Definition: postgres_ext.h:31
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:297
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_GETARG_TEXT_P_COPY(n)
Definition: fmgr.h:315
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:299
static ArrayType * build_test_info_result(regex_t *cpattern, test_re_flags *flags)
Definition: test_regex.c:628
static void test_re_compile(text *text_re, int cflags, Oid collation, regex_t *result_re)
Definition: test_regex.c:161
uintptr_t Datum
Definition: postgres.h:411
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
static void parse_test_flags(test_re_flags *flags, text *opts)
Definition: test_regex.c:260
void * user_fctx
Definition: funcapi.h:82
void * palloc(Size size)
Definition: mcxt.c:1062
Definition: c.h:621
Datum * elems
Definition: test_regex.c:50
void pg_regfree(regex_t *re)
Definition: regfree.c:49
Definition: regex.h:55
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:317
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:295

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Definition at line 23 of file test_regex.c.