PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
regc_lex.c
Go to the documentation of this file.
1/*
2 * lexical analyzer
3 * This file is #included by regcomp.c.
4 *
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 *
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
10 * thanks all of them.
11 *
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
16 *
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
19 *
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * src/backend/regex/regc_lex.c
32 *
33 */
34
35/* scanning macros (know about v) */
36#define ATEOS() (v->now >= v->stop)
37#define HAVE(n) (v->stop - v->now >= (n))
38#define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
39#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
40#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
41 *(v->now+1) == CHR(b) && \
42 *(v->now+2) == CHR(c))
43#define SET(c) (v->nexttype = (c))
44#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
45#define RET(c) return (SET(c), 1)
46#define RETV(c, n) return (SETV(c, n), 1)
47#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
48#define LASTTYPE(t) (v->lasttype == (t))
49
50/* lexical contexts */
51#define L_ERE 1 /* mainline ERE/ARE */
52#define L_BRE 2 /* mainline BRE */
53#define L_Q 3 /* REG_QUOTE */
54#define L_EBND 4 /* ERE/ARE bound */
55#define L_BBND 5 /* BRE bound */
56#define L_BRACK 6 /* brackets */
57#define L_CEL 7 /* collating element */
58#define L_ECL 8 /* equivalence class */
59#define L_CCL 9 /* character class */
60#define INTOCON(c) (v->lexcon = (c))
61#define INCON(con) (v->lexcon == (con))
62
63/* construct pointer past end of chr array */
64#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
65
66/*
67 * lexstart - set up lexical stuff, scan leading options
68 */
69static void
70lexstart(struct vars *v)
71{
72 prefixes(v); /* may turn on new type bits etc. */
73 NOERR();
74
75 if (v->cflags & REG_QUOTE)
76 {
78 INTOCON(L_Q);
79 }
80 else if (v->cflags & REG_EXTENDED)
81 {
82 assert(!(v->cflags & REG_QUOTE));
84 }
85 else
86 {
87 assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
89 }
90
91 v->nexttype = EMPTY; /* remember we were at the start */
92 next(v); /* set up the first token */
93}
94
95/*
96 * prefixes - implement various special prefixes
97 */
98static void
99prefixes(struct vars *v)
100{
101 /* literal string doesn't get any of this stuff */
102 if (v->cflags & REG_QUOTE)
103 return;
104
105 /* initial "***" gets special things */
106 if (HAVE(4) && NEXT3('*', '*', '*'))
107 switch (*(v->now + 3))
108 {
109 case CHR('?'): /* "***?" error, msg shows version */
111 return; /* proceed no further */
112 break;
113 case CHR('='): /* "***=" shifts to literal string */
115 v->cflags |= REG_QUOTE;
117 v->now += 4;
118 return; /* and there can be no more prefixes */
119 break;
120 case CHR(':'): /* "***:" shifts to AREs */
122 v->cflags |= REG_ADVANCED;
123 v->now += 4;
124 break;
125 default: /* otherwise *** is just an error */
127 return;
128 break;
129 }
130
131 /* BREs and EREs don't get embedded options */
132 if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
133 return;
134
135 /* embedded options (AREs only) */
136 if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
137 {
139 v->now += 2;
140 for (; !ATEOS() && iscalpha(*v->now); v->now++)
141 switch (*v->now)
142 {
143 case CHR('b'): /* BREs (but why???) */
144 v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
145 break;
146 case CHR('c'): /* case sensitive */
147 v->cflags &= ~REG_ICASE;
148 break;
149 case CHR('e'): /* plain EREs */
150 v->cflags |= REG_EXTENDED;
151 v->cflags &= ~(REG_ADVF | REG_QUOTE);
152 break;
153 case CHR('i'): /* case insensitive */
154 v->cflags |= REG_ICASE;
155 break;
156 case CHR('m'): /* Perloid synonym for n */
157 case CHR('n'): /* \n affects ^ $ . [^ */
158 v->cflags |= REG_NEWLINE;
159 break;
160 case CHR('p'): /* ~Perl, \n affects . [^ */
161 v->cflags |= REG_NLSTOP;
162 v->cflags &= ~REG_NLANCH;
163 break;
164 case CHR('q'): /* literal string */
165 v->cflags |= REG_QUOTE;
166 v->cflags &= ~REG_ADVANCED;
167 break;
168 case CHR('s'): /* single line, \n ordinary */
169 v->cflags &= ~REG_NEWLINE;
170 break;
171 case CHR('t'): /* tight syntax */
172 v->cflags &= ~REG_EXPANDED;
173 break;
174 case CHR('w'): /* weird, \n affects ^ $ only */
175 v->cflags &= ~REG_NLSTOP;
176 v->cflags |= REG_NLANCH;
177 break;
178 case CHR('x'): /* expanded syntax */
179 v->cflags |= REG_EXPANDED;
180 break;
181 default:
183 return;
184 }
185 if (!NEXT1(')'))
186 {
188 return;
189 }
190 v->now++;
191 if (v->cflags & REG_QUOTE)
193 }
194}
195
196/*
197 * next - get next token
198 */
199static int /* 1 normal, 0 failure */
200next(struct vars *v)
201{
202 chr c;
203
204next_restart: /* loop here after eating a comment */
205
206 /* errors yield an infinite sequence of failures */
207 if (ISERR())
208 return 0; /* the error has set nexttype to EOS */
209
210 /* remember flavor of last token */
211 v->lasttype = v->nexttype;
212
213 /* REG_BOSONLY */
214 if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
215 {
216 /* at start of a REG_BOSONLY RE */
217 RETV(SBEGIN, 0); /* same as \A */
218 }
219
220 /* skip white space etc. if appropriate (not in literal or []) */
221 if (v->cflags & REG_EXPANDED)
222 switch (v->lexcon)
223 {
224 case L_ERE:
225 case L_BRE:
226 case L_EBND:
227 case L_BBND:
228 skip(v);
229 break;
230 }
231
232 /* handle EOS, depending on context */
233 if (ATEOS())
234 {
235 switch (v->lexcon)
236 {
237 case L_ERE:
238 case L_BRE:
239 case L_Q:
240 RET(EOS);
241 break;
242 case L_EBND:
243 case L_BBND:
245 break;
246 case L_BRACK:
247 case L_CEL:
248 case L_ECL:
249 case L_CCL:
251 break;
252 }
254 }
255
256 /* okay, time to actually get a character */
257 c = *v->now++;
258
259 /* deal with the easy contexts, punt EREs to code below */
260 switch (v->lexcon)
261 {
262 case L_BRE: /* punt BREs to separate function */
263 return brenext(v, c);
264 break;
265 case L_ERE: /* see below */
266 break;
267 case L_Q: /* literal strings are easy */
268 RETV(PLAIN, c);
269 break;
270 case L_BBND: /* bounds are fairly simple */
271 case L_EBND:
272 switch (c)
273 {
274 case CHR('0'):
275 case CHR('1'):
276 case CHR('2'):
277 case CHR('3'):
278 case CHR('4'):
279 case CHR('5'):
280 case CHR('6'):
281 case CHR('7'):
282 case CHR('8'):
283 case CHR('9'):
284 RETV(DIGIT, (chr) DIGITVAL(c));
285 break;
286 case CHR(','):
287 RET(',');
288 break;
289 case CHR('}'): /* ERE bound ends with } */
290 if (INCON(L_EBND))
291 {
292 INTOCON(L_ERE);
293 if ((v->cflags & REG_ADVF) && NEXT1('?'))
294 {
295 v->now++;
297 RETV('}', 0);
298 }
299 RETV('}', 1);
300 }
301 else
303 break;
304 case CHR('\\'): /* BRE bound ends with \} */
305 if (INCON(L_BBND) && NEXT1('}'))
306 {
307 v->now++;
308 INTOCON(L_BRE);
309 RETV('}', 1);
310 }
311 else
313 break;
314 default:
316 break;
317 }
319 break;
320 case L_BRACK: /* brackets are not too hard */
321 switch (c)
322 {
323 case CHR(']'):
324 if (LASTTYPE('['))
325 RETV(PLAIN, c);
326 else
327 {
329 L_ERE : L_BRE);
330 RET(']');
331 }
332 break;
333 case CHR('\\'):
334 NOTE(REG_UBBS);
335 if (!(v->cflags & REG_ADVF))
336 RETV(PLAIN, c);
338 if (ATEOS())
340 if (!lexescape(v))
341 return 0;
342 switch (v->nexttype)
343 { /* not all escapes okay here */
344 case PLAIN:
345 case CCLASSS:
346 case CCLASSC:
347 return 1;
348 break;
349 }
350 /* not one of the acceptable escapes */
352 break;
353 case CHR('-'):
354 if (LASTTYPE('[') || NEXT1(']'))
355 RETV(PLAIN, c);
356 else
357 RETV(RANGE, c);
358 break;
359 case CHR('['):
360 if (ATEOS())
362 switch (*v->now++)
363 {
364 case CHR('.'):
365 INTOCON(L_CEL);
366 /* might or might not be locale-specific */
367 RET(COLLEL);
368 break;
369 case CHR('='):
370 INTOCON(L_ECL);
372 RET(ECLASS);
373 break;
374 case CHR(':'):
375 INTOCON(L_CCL);
377 RET(CCLASS);
378 break;
379 default: /* oops */
380 v->now--;
381 RETV(PLAIN, c);
382 break;
383 }
385 break;
386 default:
387 RETV(PLAIN, c);
388 break;
389 }
391 break;
392 case L_CEL: /* collating elements are easy */
393 if (c == CHR('.') && NEXT1(']'))
394 {
395 v->now++;
397 RETV(END, '.');
398 }
399 else
400 RETV(PLAIN, c);
401 break;
402 case L_ECL: /* ditto equivalence classes */
403 if (c == CHR('=') && NEXT1(']'))
404 {
405 v->now++;
407 RETV(END, '=');
408 }
409 else
410 RETV(PLAIN, c);
411 break;
412 case L_CCL: /* ditto character classes */
413 if (c == CHR(':') && NEXT1(']'))
414 {
415 v->now++;
417 RETV(END, ':');
418 }
419 else
420 RETV(PLAIN, c);
421 break;
422 default:
424 break;
425 }
426
427 /* that got rid of everything except EREs and AREs */
429
430 /* deal with EREs and AREs, except for backslashes */
431 switch (c)
432 {
433 case CHR('|'):
434 RET('|');
435 break;
436 case CHR('*'):
437 if ((v->cflags & REG_ADVF) && NEXT1('?'))
438 {
439 v->now++;
441 RETV('*', 0);
442 }
443 RETV('*', 1);
444 break;
445 case CHR('+'):
446 if ((v->cflags & REG_ADVF) && NEXT1('?'))
447 {
448 v->now++;
450 RETV('+', 0);
451 }
452 RETV('+', 1);
453 break;
454 case CHR('?'):
455 if ((v->cflags & REG_ADVF) && NEXT1('?'))
456 {
457 v->now++;
459 RETV('?', 0);
460 }
461 RETV('?', 1);
462 break;
463 case CHR('{'): /* bounds start or plain character */
464 if (v->cflags & REG_EXPANDED)
465 skip(v);
466 if (ATEOS() || !iscdigit(*v->now))
467 {
470 RETV(PLAIN, c);
471 }
472 else
473 {
476 RET('{');
477 }
479 break;
480 case CHR('('): /* parenthesis, or advanced extension */
481 if ((v->cflags & REG_ADVF) && NEXT1('?'))
482 {
484 v->now++;
485 if (ATEOS())
487 switch (*v->now++)
488 {
489 case CHR(':'): /* non-capturing paren */
490 RETV('(', 0);
491 break;
492 case CHR('#'): /* comment */
493 while (!ATEOS() && *v->now != CHR(')'))
494 v->now++;
495 if (!ATEOS())
496 v->now++;
497 assert(v->nexttype == v->lasttype);
498 goto next_restart;
499 case CHR('='): /* positive lookahead */
502 break;
503 case CHR('!'): /* negative lookahead */
506 break;
507 case CHR('<'):
508 if (ATEOS())
510 switch (*v->now++)
511 {
512 case CHR('='): /* positive lookbehind */
515 break;
516 case CHR('!'): /* negative lookbehind */
519 break;
520 default:
522 break;
523 }
525 break;
526 default:
528 break;
529 }
531 }
532 RETV('(', 1);
533 break;
534 case CHR(')'):
535 if (LASTTYPE('('))
537 RETV(')', c);
538 break;
539 case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
540 if (HAVE(6) && *(v->now + 0) == CHR('[') &&
541 *(v->now + 1) == CHR(':') &&
542 (*(v->now + 2) == CHR('<') ||
543 *(v->now + 2) == CHR('>')) &&
544 *(v->now + 3) == CHR(':') &&
545 *(v->now + 4) == CHR(']') &&
546 *(v->now + 5) == CHR(']'))
547 {
548 c = *(v->now + 2);
549 v->now += 6;
551 RET((c == CHR('<')) ? '<' : '>');
552 }
554 if (NEXT1('^'))
555 {
556 v->now++;
557 RETV('[', 0);
558 }
559 RETV('[', 1);
560 break;
561 case CHR('.'):
562 RET('.');
563 break;
564 case CHR('^'):
565 RET('^');
566 break;
567 case CHR('$'):
568 RET('$');
569 break;
570 case CHR('\\'): /* mostly punt backslashes to code below */
571 if (ATEOS())
573 break;
574 default: /* ordinary character */
575 RETV(PLAIN, c);
576 break;
577 }
578
579 /* ERE/ARE backslash handling; backslash already eaten */
580 assert(!ATEOS());
581 if (!(v->cflags & REG_ADVF))
582 { /* only AREs have non-trivial escapes */
583 if (iscalnum(*v->now))
584 {
587 }
588 RETV(PLAIN, *v->now++);
589 }
590 return lexescape(v);
591}
592
593/*
594 * lexescape - parse an ARE backslash escape (backslash already eaten)
595 *
596 * This is used for ARE backslashes both normally and inside bracket
597 * expressions. In the latter case, not all escape types are allowed,
598 * but the caller must reject unwanted ones after we return.
599 */
600static int
601lexescape(struct vars *v)
602{
603 chr c;
604 static const chr alert[] = {
605 CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
606 };
607 static const chr esc[] = {
608 CHR('E'), CHR('S'), CHR('C')
609 };
610 const chr *save;
611
612 assert(v->cflags & REG_ADVF);
613
614 assert(!ATEOS());
615 c = *v->now++;
616
617 /* if it's not alphanumeric ASCII, treat it as a plain character */
618 if (!('a' <= c && c <= 'z') &&
619 !('A' <= c && c <= 'Z') &&
620 !('0' <= c && c <= '9'))
621 RETV(PLAIN, c);
622
624 switch (c)
625 {
626 case CHR('a'):
627 RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
628 break;
629 case CHR('A'):
630 RETV(SBEGIN, 0);
631 break;
632 case CHR('b'):
633 RETV(PLAIN, CHR('\b'));
634 break;
635 case CHR('B'):
636 RETV(PLAIN, CHR('\\'));
637 break;
638 case CHR('c'):
640 if (ATEOS())
642 RETV(PLAIN, (chr) (*v->now++ & 037));
643 break;
644 case CHR('d'):
647 break;
648 case CHR('D'):
651 break;
652 case CHR('e'):
654 RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
655 break;
656 case CHR('f'):
657 RETV(PLAIN, CHR('\f'));
658 break;
659 case CHR('m'):
660 RET('<');
661 break;
662 case CHR('M'):
663 RET('>');
664 break;
665 case CHR('n'):
666 RETV(PLAIN, CHR('\n'));
667 break;
668 case CHR('r'):
669 RETV(PLAIN, CHR('\r'));
670 break;
671 case CHR('s'):
674 break;
675 case CHR('S'):
678 break;
679 case CHR('t'):
680 RETV(PLAIN, CHR('\t'));
681 break;
682 case CHR('u'):
683 c = lexdigits(v, 16, 4, 4);
684 if (ISERR() || !CHR_IS_IN_RANGE(c))
686 RETV(PLAIN, c);
687 break;
688 case CHR('U'):
689 c = lexdigits(v, 16, 8, 8);
690 if (ISERR() || !CHR_IS_IN_RANGE(c))
692 RETV(PLAIN, c);
693 break;
694 case CHR('v'):
695 RETV(PLAIN, CHR('\v'));
696 break;
697 case CHR('w'):
700 break;
701 case CHR('W'):
704 break;
705 case CHR('x'):
707 c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
708 if (ISERR() || !CHR_IS_IN_RANGE(c))
710 RETV(PLAIN, c);
711 break;
712 case CHR('y'):
714 RETV(WBDRY, 0);
715 break;
716 case CHR('Y'):
718 RETV(NWBDRY, 0);
719 break;
720 case CHR('Z'):
721 RETV(SEND, 0);
722 break;
723 case CHR('1'):
724 case CHR('2'):
725 case CHR('3'):
726 case CHR('4'):
727 case CHR('5'):
728 case CHR('6'):
729 case CHR('7'):
730 case CHR('8'):
731 case CHR('9'):
732 save = v->now;
733 v->now--; /* put first digit back */
734 c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
735 if (ISERR())
737 /* ugly heuristic (first test is "exactly 1 digit?") */
738 if (v->now == save || ((int) c > 0 && (int) c <= v->nsubexp))
739 {
741 RETV(BACKREF, c);
742 }
743 /* oops, doesn't look like it's a backref after all... */
744 v->now = save;
745 /* and fall through into octal number */
746 /* FALLTHROUGH */
747 case CHR('0'):
749 v->now--; /* put first digit back */
750 c = lexdigits(v, 8, 1, 3);
751 if (ISERR())
753 if (c > 0xff)
754 {
755 /* out of range, so we handled one digit too much */
756 v->now--;
757 c >>= 3;
758 }
759 RETV(PLAIN, c);
760 break;
761 default:
762
763 /*
764 * Throw an error for unrecognized ASCII alpha escape sequences,
765 * which reserves them for future use if needed.
766 */
768 break;
769 }
771}
772
773/*
774 * lexdigits - slurp up digits and return chr value
775 *
776 * This does not account for overflow; callers should range-check the result
777 * if maxlen is large enough to make that possible.
778 */
779static chr /* chr value; errors signalled via ERR */
780lexdigits(struct vars *v,
781 int base,
782 int minlen,
783 int maxlen)
784{
785 uchr n; /* unsigned to avoid overflow misbehavior */
786 int len;
787 chr c;
788 int d;
789 const uchr ub = (uchr) base;
790
791 n = 0;
792 for (len = 0; len < maxlen && !ATEOS(); len++)
793 {
794 c = *v->now++;
795 switch (c)
796 {
797 case CHR('0'):
798 case CHR('1'):
799 case CHR('2'):
800 case CHR('3'):
801 case CHR('4'):
802 case CHR('5'):
803 case CHR('6'):
804 case CHR('7'):
805 case CHR('8'):
806 case CHR('9'):
807 d = DIGITVAL(c);
808 break;
809 case CHR('a'):
810 case CHR('A'):
811 d = 10;
812 break;
813 case CHR('b'):
814 case CHR('B'):
815 d = 11;
816 break;
817 case CHR('c'):
818 case CHR('C'):
819 d = 12;
820 break;
821 case CHR('d'):
822 case CHR('D'):
823 d = 13;
824 break;
825 case CHR('e'):
826 case CHR('E'):
827 d = 14;
828 break;
829 case CHR('f'):
830 case CHR('F'):
831 d = 15;
832 break;
833 default:
834 v->now--; /* oops, not a digit at all */
835 d = -1;
836 break;
837 }
838
839 if (d >= base)
840 { /* not a plausible digit */
841 v->now--;
842 d = -1;
843 }
844 if (d < 0)
845 break; /* NOTE BREAK OUT */
846 n = n * ub + (uchr) d;
847 }
848 if (len < minlen)
850
851 return (chr) n;
852}
853
854/*
855 * brenext - get next BRE token
856 *
857 * This is much like EREs except for all the stupid backslashes and the
858 * context-dependency of some things.
859 */
860static int /* 1 normal, 0 failure */
861brenext(struct vars *v,
862 chr c)
863{
864 switch (c)
865 {
866 case CHR('*'):
867 if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
868 RETV(PLAIN, c);
869 RETV('*', 1);
870 break;
871 case CHR('['):
872 if (HAVE(6) && *(v->now + 0) == CHR('[') &&
873 *(v->now + 1) == CHR(':') &&
874 (*(v->now + 2) == CHR('<') ||
875 *(v->now + 2) == CHR('>')) &&
876 *(v->now + 3) == CHR(':') &&
877 *(v->now + 4) == CHR(']') &&
878 *(v->now + 5) == CHR(']'))
879 {
880 c = *(v->now + 2);
881 v->now += 6;
883 RET((c == CHR('<')) ? '<' : '>');
884 }
886 if (NEXT1('^'))
887 {
888 v->now++;
889 RETV('[', 0);
890 }
891 RETV('[', 1);
892 break;
893 case CHR('.'):
894 RET('.');
895 break;
896 case CHR('^'):
897 if (LASTTYPE(EMPTY))
898 RET('^');
899 if (LASTTYPE('('))
900 {
902 RET('^');
903 }
904 RETV(PLAIN, c);
905 break;
906 case CHR('$'):
907 if (v->cflags & REG_EXPANDED)
908 skip(v);
909 if (ATEOS())
910 RET('$');
911 if (NEXT2('\\', ')'))
912 {
914 RET('$');
915 }
916 RETV(PLAIN, c);
917 break;
918 case CHR('\\'):
919 break; /* see below */
920 default:
921 RETV(PLAIN, c);
922 break;
923 }
924
925 assert(c == CHR('\\'));
926
927 if (ATEOS())
929
930 c = *v->now++;
931 switch (c)
932 {
933 case CHR('{'):
936 RET('{');
937 break;
938 case CHR('('):
939 RETV('(', 1);
940 break;
941 case CHR(')'):
942 RETV(')', c);
943 break;
944 case CHR('<'):
946 RET('<');
947 break;
948 case CHR('>'):
950 RET('>');
951 break;
952 case CHR('1'):
953 case CHR('2'):
954 case CHR('3'):
955 case CHR('4'):
956 case CHR('5'):
957 case CHR('6'):
958 case CHR('7'):
959 case CHR('8'):
960 case CHR('9'):
963 break;
964 default:
965 if (iscalnum(c))
966 {
969 }
970 RETV(PLAIN, c);
971 break;
972 }
973
975 return 0;
976}
977
978/*
979 * skip - skip white space and comments in expanded form
980 */
981static void
982skip(struct vars *v)
983{
984 const chr *start = v->now;
985
987
988 for (;;)
989 {
990 while (!ATEOS() && iscspace(*v->now))
991 v->now++;
992 if (ATEOS() || *v->now != CHR('#'))
993 break; /* NOTE BREAK OUT */
994 assert(NEXT1('#'));
995 while (!ATEOS() && *v->now != CHR('\n'))
996 v->now++;
997 /* leave the newline to be picked up by the iscspace loop */
998 }
999
1000 if (v->now != start)
1002}
1003
1004/*
1005 * newline - return the chr for a newline
1006 *
1007 * This helps confine use of CHR to this source file.
1008 */
1009static chr
1011{
1012 return CHR('\n');
1013}
1014
1015/*
1016 * chrnamed - return the chr known by a given (chr string) name
1017 *
1018 * The code is a bit clumsy, but this routine gets only such specialized
1019 * use that it hardly matters.
1020 */
1021static chr
1022chrnamed(struct vars *v,
1023 const chr *startp, /* start of name */
1024 const chr *endp, /* just past end of name */
1025 chr lastresort) /* what to return if name lookup fails */
1026{
1027 chr c;
1028 int errsave;
1029 int e;
1030 struct cvec *cv;
1031
1032 errsave = v->err;
1033 v->err = 0;
1034 c = element(v, startp, endp);
1035 e = v->err;
1036 v->err = errsave;
1037
1038 if (e != 0)
1039 return lastresort;
1040
1041 cv = range(v, c, c, 0);
1042 if (cv->nchrs == 0)
1043 return lastresort;
1044 return cv->chrs[0];
1045}
#define END
Definition: _int.h:160
#define ERR
Definition: _int.h:161
#define errsave(context,...)
Definition: elog.h:261
return str start
const void size_t len
@ NOTE
Definition: pg_regress.c:88
char * c
e
Definition: preproc-init.c:82
#define L_ERE
Definition: regc_lex.c:51
#define NEXT2(a, b)
Definition: regc_lex.c:39
#define RET(c)
Definition: regc_lex.c:45
#define INTOCON(c)
Definition: regc_lex.c:60
#define INCON(con)
Definition: regc_lex.c:61
static int lexescape(struct vars *v)
Definition: regc_lex.c:601
#define L_BBND
Definition: regc_lex.c:55
#define ATEOS()
Definition: regc_lex.c:36
#define L_Q
Definition: regc_lex.c:53
static void skip(struct vars *v)
Definition: regc_lex.c:982
static chr lexdigits(struct vars *v, int base, int minlen, int maxlen)
Definition: regc_lex.c:780
#define HAVE(n)
Definition: regc_lex.c:37
#define LASTTYPE(t)
Definition: regc_lex.c:48
#define RETV(c, n)
Definition: regc_lex.c:46
static chr newline(void)
Definition: regc_lex.c:1010
#define L_CEL
Definition: regc_lex.c:57
#define FAILW(e)
Definition: regc_lex.c:47
#define L_EBND
Definition: regc_lex.c:54
#define L_ECL
Definition: regc_lex.c:58
static int brenext(struct vars *v, chr c)
Definition: regc_lex.c:861
static void lexstart(struct vars *v)
Definition: regc_lex.c:70
static void prefixes(struct vars *v)
Definition: regc_lex.c:99
#define ENDOF(array)
Definition: regc_lex.c:64
#define L_CCL
Definition: regc_lex.c:59
#define NEXT3(a, b, c)
Definition: regc_lex.c:40
#define L_BRACK
Definition: regc_lex.c:56
static chr chrnamed(struct vars *v, const chr *startp, const chr *endp, chr lastresort)
Definition: regc_lex.c:1022
#define NEXT1(c)
Definition: regc_lex.c:38
#define L_BRE
Definition: regc_lex.c:52
static int next(struct vars *v)
Definition: regc_lex.c:200
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
#define COLLEL
Definition: regcomp.c:334
#define NWBDRY
Definition: regcomp.c:345
#define NOERR()
Definition: regcomp.c:321
#define EMPTY
Definition: regcomp.c:329
#define SBEGIN
Definition: regcomp.c:347
#define ISERR()
Definition: regcomp.c:317
#define CCLASSS
Definition: regcomp.c:338
#define CCLASS
Definition: regcomp.c:336
#define WBDRY
Definition: regcomp.c:344
#define DIGIT
Definition: regcomp.c:332
#define CCLASSC
Definition: regcomp.c:339
#define ECLASS
Definition: regcomp.c:335
#define BACKREF
Definition: regcomp.c:333
#define LACON
Definition: regcomp.c:341
#define EOS
Definition: regcomp.c:330
#define PLAIN
Definition: regcomp.c:331
#define SEND
Definition: regcomp.c:348
#define RANGE
Definition: regcomp.c:340
unsigned uchr
Definition: regcustom.h:60
#define DIGITVAL(c)
Definition: regcustom.h:63
#define iscalnum(x)
Definition: regcustom.h:90
#define iscdigit(x)
Definition: regcustom.h:92
#define CHR_IS_IN_RANGE(c)
Definition: regcustom.h:77
pg_wchar chr
Definition: regcustom.h:59
#define CHR(c)
Definition: regcustom.h:62
#define iscspace(x)
Definition: regcustom.h:93
#define iscalpha(x)
Definition: regcustom.h:91
#define assert(x)
Definition: regcustom.h:56
#define REG_BADOPT
Definition: regex.h:232
#define REG_ICASE
Definition: regex.h:184
#define REG_EBRACK
Definition: regex.h:222
#define REG_UBOUNDS
Definition: regex.h:140
#define REG_BADRPT
Definition: regex.h:228
#define REG_EESCAPE
Definition: regex.h:220
#define REG_ULOOKAROUND
Definition: regex.h:139
#define REG_UBBS
Definition: regex.h:144
#define REG_ADVANCED
Definition: regex.h:181
#define REG_EXPANDED
Definition: regex.h:186
#define REG_NLANCH
Definition: regex.h:188
#define REG_EXTENDED
Definition: regex.h:179
#define REG_NLSTOP
Definition: regex.h:187
#define REG_ADVF
Definition: regex.h:180
#define REG_UUNSPEC
Definition: regex.h:146
#define REG_UNONPOSIX
Definition: regex.h:145
#define REG_BADBR
Definition: regex.h:225
#define REG_NEWLINE
Definition: regex.h:189
#define REG_UBSALNUM
Definition: regex.h:142
#define REG_ULOCALE
Definition: regex.h:148
#define REG_UUNPORT
Definition: regex.h:147
#define REG_EBRACE
Definition: regex.h:224
#define REG_BADPAT
Definition: regex.h:217
#define REG_BOSONLY
Definition: regex.h:192
#define REG_UBRACES
Definition: regex.h:141
#define REG_UBACKREF
Definition: regex.h:138
#define REG_QUOTE
Definition: regex.h:182
#define LATYPE_AHEAD_NEG
Definition: regguts.h:105
#define LATYPE_BEHIND_POS
Definition: regguts.h:106
#define NOTREACHED
Definition: regguts.h:96
#define LATYPE_BEHIND_NEG
Definition: regguts.h:107
@ CC_WORD
Definition: regguts.h:141
@ CC_SPACE
Definition: regguts.h:141
@ CC_DIGIT
Definition: regguts.h:140
#define LATYPE_AHEAD_POS
Definition: regguts.h:104
Definition: regguts.h:279
int nchrs
Definition: regguts.h:280
chr * chrs
Definition: regguts.h:282
Definition: regcomp.c:282
const chr * now
Definition: regcomp.c:284
int err
Definition: regcomp.c:286
int cflags
Definition: regcomp.c:287
int lexcon
Definition: regcomp.c:291
int nexttype
Definition: regcomp.c:289
int lasttype
Definition: regcomp.c:288