PostgreSQL Source Code git master
string_utils.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * String-processing utility routines for frontend code
4 *
5 * Assorted utility functions that are useful in constructing SQL queries
6 * and interpreting backend output.
7 *
8 *
9 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
11 *
12 * src/fe_utils/string_utils.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres_fe.h"
17
18#include <ctype.h>
19
20#include "common/keywords.h"
22#include "mb/pg_wchar.h"
23
25
26/* Globals exported by this file */
29
30static int fmtIdEncoding = -1;
31
32
33/*
34 * Returns a temporary PQExpBuffer, valid until the next call to the function.
35 * This is used by fmtId and fmtQualifiedId.
36 *
37 * Non-reentrant and non-thread-safe but reduces memory leakage. You can
38 * replace this with a custom version by setting the getLocalPQExpBuffer
39 * function pointer.
40 */
41static PQExpBuffer
43{
44 static PQExpBuffer id_return = NULL;
45
46 if (id_return) /* first time through? */
47 {
48 /* same buffer, just wipe contents */
49 resetPQExpBuffer(id_return);
50 }
51 else
52 {
53 /* new buffer */
54 id_return = createPQExpBuffer();
55 }
56
57 return id_return;
58}
59
60/*
61 * Set the encoding that fmtId() and fmtQualifiedId() use.
62 *
63 * This is not safe against multiple connections having different encodings,
64 * but there is no real other way to address the need to know the encoding for
65 * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
66 * rid of fmtId().
67 */
68void
70{
71 fmtIdEncoding = encoding;
72}
73
74/*
75 * Return the currently configured encoding for fmtId() and fmtQualifiedId().
76 */
77static int
79{
80 if (fmtIdEncoding != -1)
81 return fmtIdEncoding;
82
83 /*
84 * In assertion builds it seems best to fail hard if the encoding was not
85 * set, to make it easier to find places with missing calls. But in
86 * production builds that seems like a bad idea, thus we instead just
87 * default to UTF-8.
88 */
89 Assert(fmtIdEncoding != -1);
90
91 return PG_UTF8;
92}
93
94/*
95 * Quotes input string if it's not a legitimate SQL identifier as-is.
96 *
97 * Note that the returned string must be used before calling fmtIdEnc again,
98 * since we re-use the same return buffer each time.
99 */
100const char *
101fmtIdEnc(const char *rawid, int encoding)
102{
103 PQExpBuffer id_return = getLocalPQExpBuffer();
104
105 const char *cp;
106 bool need_quotes = false;
107 size_t remaining = strlen(rawid);
108
109 /*
110 * These checks need to match the identifier production in scan.l. Don't
111 * use islower() etc.
112 */
114 need_quotes = true;
115 /* slightly different rules for first character */
116 else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
117 need_quotes = true;
118 else
119 {
120 /* otherwise check the entire string */
121 cp = rawid;
122 for (size_t i = 0; i < remaining; i++, cp++)
123 {
124 if (!((*cp >= 'a' && *cp <= 'z')
125 || (*cp >= '0' && *cp <= '9')
126 || (*cp == '_')))
127 {
128 need_quotes = true;
129 break;
130 }
131 }
132 }
133
134 if (!need_quotes)
135 {
136 /*
137 * Check for keyword. We quote keywords except for unreserved ones.
138 * (In some cases we could avoid quoting a col_name or type_func_name
139 * keyword, but it seems much harder than it's worth to tell that.)
140 *
141 * Note: ScanKeywordLookup() does case-insensitive comparison, but
142 * that's fine, since we already know we have all-lower-case.
143 */
144 int kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
145
146 if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
147 need_quotes = true;
148 }
149
150 if (!need_quotes)
151 {
152 /* no quoting needed */
153 appendPQExpBufferStr(id_return, rawid);
154 }
155 else
156 {
157 appendPQExpBufferChar(id_return, '"');
158
159 cp = &rawid[0];
160 while (remaining > 0)
161 {
162 int charlen;
163
164 /* Fast path for plain ASCII */
165 if (!IS_HIGHBIT_SET(*cp))
166 {
167 /*
168 * Did we find a double-quote in the string? Then make this a
169 * double double-quote per SQL99. Before, we put in a
170 * backslash/double-quote pair. - thomas 2000-08-05
171 */
172 if (*cp == '"')
173 appendPQExpBufferChar(id_return, '"');
174 appendPQExpBufferChar(id_return, *cp);
175 remaining--;
176 cp++;
177 continue;
178 }
179
180 /* Slow path for possible multibyte characters */
181 charlen = pg_encoding_mblen(encoding, cp);
182
183 if (remaining < charlen ||
184 pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
185 {
186 /*
187 * Multibyte character is invalid. It's important to verify
188 * that as invalid multibyte characters could e.g. be used to
189 * "skip" over quote characters, e.g. when parsing
190 * character-by-character.
191 *
192 * Replace the character's first byte with an invalid
193 * sequence. The invalid sequence ensures that the escaped
194 * string will trigger an error on the server-side, even if we
195 * can't directly report an error here.
196 *
197 * It would be a bit faster to verify the whole string the
198 * first time we encounter a set highbit, but this way we can
199 * replace just the invalid data, which probably makes it
200 * easier for users to find the invalidly encoded portion of a
201 * larger string.
202 */
203 if (enlargePQExpBuffer(id_return, 2))
204 {
206 id_return->data + id_return->len);
207 id_return->len += 2;
208 id_return->data[id_return->len] = '\0';
209 }
210
211 /*
212 * Handle the following bytes as if this byte didn't exist.
213 * That's safer in case the subsequent bytes contain
214 * characters that are significant for the caller (e.g. '>' in
215 * html).
216 */
217 remaining--;
218 cp++;
219 }
220 else
221 {
222 for (int i = 0; i < charlen; i++)
223 {
224 appendPQExpBufferChar(id_return, *cp);
225 remaining--;
226 cp++;
227 }
228 }
229 }
230
231 appendPQExpBufferChar(id_return, '"');
232 }
233
234 return id_return->data;
235}
236
237/*
238 * Quotes input string if it's not a legitimate SQL identifier as-is.
239 *
240 * Note that the returned string must be used before calling fmtId again,
241 * since we re-use the same return buffer each time.
242 *
243 * NB: This assumes setFmtEncoding() previously has been called to configure
244 * the encoding of rawid. It is preferable to use fmtIdEnc() with an
245 * explicit encoding.
246 */
247const char *
248fmtId(const char *rawid)
249{
250 return fmtIdEnc(rawid, getFmtEncoding());
251}
252
253/*
254 * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
255 * needed.
256 *
257 * Like fmtId, use the result before calling again.
258 *
259 * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
260 * use that buffer until we're finished with calling fmtId().
261 */
262const char *
263fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
264{
265 PQExpBuffer id_return;
266 PQExpBuffer lcl_pqexp = createPQExpBuffer();
267
268 /* Some callers might fail to provide a schema name */
269 if (schema && *schema)
270 {
271 appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
272 }
273 appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));
274
275 id_return = getLocalPQExpBuffer();
276
277 appendPQExpBufferStr(id_return, lcl_pqexp->data);
278 destroyPQExpBuffer(lcl_pqexp);
279
280 return id_return->data;
281}
282
283/*
284 * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
285 *
286 * Like fmtId, use the result before calling again.
287 *
288 * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
289 * use that buffer until we're finished with calling fmtId().
290 *
291 * NB: This assumes setFmtEncoding() previously has been called to configure
292 * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
293 * with an explicit encoding.
294 */
295const char *
296fmtQualifiedId(const char *schema, const char *id)
297{
298 return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
299}
300
301
302/*
303 * Format a Postgres version number (in the PG_VERSION_NUM integer format
304 * returned by PQserverVersion()) as a string. This exists mainly to
305 * encapsulate knowledge about two-part vs. three-part version numbers.
306 *
307 * For reentrancy, caller must supply the buffer the string is put in.
308 * Recommended size of the buffer is 32 bytes.
309 *
310 * Returns address of 'buf', as a notational convenience.
311 */
312char *
313formatPGVersionNumber(int version_number, bool include_minor,
314 char *buf, size_t buflen)
315{
316 if (version_number >= 100000)
317 {
318 /* New two-part style */
319 if (include_minor)
320 snprintf(buf, buflen, "%d.%d", version_number / 10000,
321 version_number % 10000);
322 else
323 snprintf(buf, buflen, "%d", version_number / 10000);
324 }
325 else
326 {
327 /* Old three-part style */
328 if (include_minor)
329 snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
330 (version_number / 100) % 100,
331 version_number % 100);
332 else
333 snprintf(buf, buflen, "%d.%d", version_number / 10000,
334 (version_number / 100) % 100);
335 }
336 return buf;
337}
338
339
340/*
341 * Convert a string value to an SQL string literal and append it to
342 * the given buffer. We assume the specified client_encoding and
343 * standard_conforming_strings settings.
344 *
345 * This is essentially equivalent to libpq's PQescapeStringInternal,
346 * except for the output buffer structure. We need it in situations
347 * where we do not have a PGconn available. Where we do,
348 * appendStringLiteralConn is a better choice.
349 */
350void
352 int encoding, bool std_strings)
353{
354 size_t length = strlen(str);
355 const char *source = str;
356 char *target;
357 size_t remaining = length;
358
359 if (!enlargePQExpBuffer(buf, 2 * length + 2))
360 return;
361
362 target = buf->data + buf->len;
363 *target++ = '\'';
364
365 while (remaining > 0)
366 {
367 char c = *source;
368 int charlen;
369 int i;
370
371 /* Fast path for plain ASCII */
372 if (!IS_HIGHBIT_SET(c))
373 {
374 /* Apply quoting if needed */
375 if (SQL_STR_DOUBLE(c, !std_strings))
376 *target++ = c;
377 /* Copy the character */
378 *target++ = c;
379 source++;
380 remaining--;
381 continue;
382 }
383
384 /* Slow path for possible multibyte characters */
385 charlen = PQmblen(source, encoding);
386
387 if (remaining < charlen ||
389 {
390 /*
391 * Multibyte character is invalid. It's important to verify that
392 * as invalid multibyte characters could e.g. be used to "skip"
393 * over quote characters, e.g. when parsing
394 * character-by-character.
395 *
396 * Replace the character's first byte with an invalid sequence.
397 * The invalid sequence ensures that the escaped string will
398 * trigger an error on the server-side, even if we can't directly
399 * report an error here.
400 *
401 * We know there's enough space for the invalid sequence because
402 * the "target" buffer is 2 * length + 2 long, and at worst we're
403 * replacing a single input byte with two invalid bytes.
404 *
405 * It would be a bit faster to verify the whole string the first
406 * time we encounter a set highbit, but this way we can replace
407 * just the invalid data, which probably makes it easier for users
408 * to find the invalidly encoded portion of a larger string.
409 */
411 target += 2;
412
413 /*
414 * Handle the following bytes as if this byte didn't exist. That's
415 * safer in case the subsequent bytes contain important characters
416 * for the caller (e.g. '>' in html).
417 */
418 source++;
419 remaining--;
420 }
421 else
422 {
423 /* Copy the character */
424 for (i = 0; i < charlen; i++)
425 {
426 *target++ = *source++;
427 remaining--;
428 }
429 }
430 }
431
432 /* Write the terminating quote and NUL character. */
433 *target++ = '\'';
434 *target = '\0';
435
436 buf->len = target - buf->data;
437}
438
439
440/*
441 * Convert a string value to an SQL string literal and append it to
442 * the given buffer. Encoding and string syntax rules are as indicated
443 * by current settings of the PGconn.
444 */
445void
447{
448 size_t length = strlen(str);
449
450 /*
451 * XXX This is a kluge to silence escape_string_warning in our utility
452 * programs. It should go away someday.
453 */
454 if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
455 {
456 /* ensure we are not adjacent to an identifier */
457 if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
461 return;
462 }
463 /* XXX end kluge */
464
465 if (!enlargePQExpBuffer(buf, 2 * length + 2))
466 return;
468 buf->len += PQescapeStringConn(conn, buf->data + buf->len,
469 str, length, NULL);
471}
472
473
474/*
475 * Convert a string value to a dollar quoted literal and append it to
476 * the given buffer. If the dqprefix parameter is not NULL then the
477 * dollar quote delimiter will begin with that (after the opening $).
478 *
479 * No escaping is done at all on str, in compliance with the rules
480 * for parsing dollar quoted strings. Also, we need not worry about
481 * encoding issues.
482 */
483void
484appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
485{
486 static const char suffixes[] = "_XXXXXXX";
487 int nextchar = 0;
488 PQExpBuffer delimBuf = createPQExpBuffer();
489
490 /* start with $ + dqprefix if not NULL */
491 appendPQExpBufferChar(delimBuf, '$');
492 if (dqprefix)
493 appendPQExpBufferStr(delimBuf, dqprefix);
494
495 /*
496 * Make sure we choose a delimiter which (without the trailing $) is not
497 * present in the string being quoted. We don't check with the trailing $
498 * because a string ending in $foo must not be quoted with $foo$.
499 */
500 while (strstr(str, delimBuf->data) != NULL)
501 {
502 appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
503 nextchar %= sizeof(suffixes) - 1;
504 }
505
506 /* add trailing $ */
507 appendPQExpBufferChar(delimBuf, '$');
508
509 /* quote it and we are all done */
510 appendPQExpBufferStr(buf, delimBuf->data);
512 appendPQExpBufferStr(buf, delimBuf->data);
513
514 destroyPQExpBuffer(delimBuf);
515}
516
517
518/*
519 * Convert a bytea value (presented as raw bytes) to an SQL string literal
520 * and append it to the given buffer. We assume the specified
521 * standard_conforming_strings setting.
522 *
523 * This is needed in situations where we do not have a PGconn available.
524 * Where we do, PQescapeByteaConn is a better choice.
525 */
526void
527appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
528 bool std_strings)
529{
530 const unsigned char *source = str;
531 char *target;
532
533 static const char hextbl[] = "0123456789abcdef";
534
535 /*
536 * This implementation is hard-wired to produce hex-format output. We do
537 * not know the server version the output will be loaded into, so making
538 * an intelligent format choice is impossible. It might be better to
539 * always use the old escaped format.
540 */
541 if (!enlargePQExpBuffer(buf, 2 * length + 5))
542 return;
543
544 target = buf->data + buf->len;
545 *target++ = '\'';
546 if (!std_strings)
547 *target++ = '\\';
548 *target++ = '\\';
549 *target++ = 'x';
550
551 while (length-- > 0)
552 {
553 unsigned char c = *source++;
554
555 *target++ = hextbl[(c >> 4) & 0xF];
556 *target++ = hextbl[c & 0xF];
557 }
558
559 /* Write the terminating quote and NUL character. */
560 *target++ = '\'';
561 *target = '\0';
562
563 buf->len = target - buf->data;
564}
565
566
567/*
568 * Append the given string to the shell command being built in the buffer,
569 * with shell-style quoting as needed to create exactly one argument.
570 *
571 * Forbid LF or CR characters, which have scant practical use beyond designing
572 * security breaches. The Windows command shell is unusable as a conduit for
573 * arguments containing LF or CR characters. A future major release should
574 * reject those characters in CREATE ROLE and CREATE DATABASE, because use
575 * there eventually leads to errors here.
576 *
577 * appendShellString() simply prints an error and dies if LF or CR appears.
578 * appendShellStringNoError() omits those characters from the result, and
579 * returns false if there were any.
580 */
581void
583{
585 {
586 fprintf(stderr,
587 _("shell command argument contains a newline or carriage return: \"%s\"\n"),
588 str);
590 }
591}
592
593bool
595{
596#ifdef WIN32
597 int backslash_run_length = 0;
598#endif
599 bool ok = true;
600 const char *p;
601
602 /*
603 * Don't bother with adding quotes if the string is nonempty and clearly
604 * contains only safe characters.
605 */
606 if (*str != '\0' &&
607 strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
608 {
610 return ok;
611 }
612
613#ifndef WIN32
615 for (p = str; *p; p++)
616 {
617 if (*p == '\n' || *p == '\r')
618 {
619 ok = false;
620 continue;
621 }
622
623 if (*p == '\'')
624 appendPQExpBufferStr(buf, "'\"'\"'");
625 else
627 }
629#else /* WIN32 */
630
631 /*
632 * A Windows system() argument experiences two layers of interpretation.
633 * First, cmd.exe interprets the string. Its behavior is undocumented,
634 * but a caret escapes any byte except LF or CR that would otherwise have
635 * special meaning. Handling of a caret before LF or CR differs between
636 * "cmd.exe /c" and other modes, and it is unusable here.
637 *
638 * Second, the new process parses its command line to construct argv (see
639 * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats
640 * backslash-double quote sequences specially.
641 */
643 for (p = str; *p; p++)
644 {
645 if (*p == '\n' || *p == '\r')
646 {
647 ok = false;
648 continue;
649 }
650
651 /* Change N backslashes before a double quote to 2N+1 backslashes. */
652 if (*p == '"')
653 {
654 while (backslash_run_length)
655 {
657 backslash_run_length--;
658 }
660 }
661 else if (*p == '\\')
662 backslash_run_length++;
663 else
664 backslash_run_length = 0;
665
666 /*
667 * Decline to caret-escape the most mundane characters, to ease
668 * debugging and lest we approach the command length limit.
669 */
670 if (!((*p >= 'a' && *p <= 'z') ||
671 (*p >= 'A' && *p <= 'Z') ||
672 (*p >= '0' && *p <= '9')))
675 }
676
677 /*
678 * Change N backslashes at end of argument to 2N backslashes, because they
679 * precede the double quote that terminates the argument.
680 */
681 while (backslash_run_length)
682 {
684 backslash_run_length--;
685 }
687#endif /* WIN32 */
688
689 return ok;
690}
691
692
693/*
694 * Append the given string to the buffer, with suitable quoting for passing
695 * the string as a value in a keyword/value pair in a libpq connection string.
696 */
697void
699{
700 const char *s;
701 bool needquotes;
702
703 /*
704 * If the string is one or more plain ASCII characters, no need to quote
705 * it. This is quite conservative, but better safe than sorry.
706 */
707 needquotes = true;
708 for (s = str; *s; s++)
709 {
710 if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
711 (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
712 {
713 needquotes = true;
714 break;
715 }
716 needquotes = false;
717 }
718
719 if (needquotes)
720 {
722 while (*str)
723 {
724 /* ' and \ must be escaped by to \' and \\ */
725 if (*str == '\'' || *str == '\\')
727
729 str++;
730 }
732 }
733 else
735}
736
737
738/*
739 * Append a psql meta-command that connects to the given database with the
740 * then-current connection's user, host and port.
741 */
742void
744{
745 const char *s;
746 bool complex;
747
748 /*
749 * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
750 * For other names, even many not technically requiring it, skip to the
751 * general case. No database has a zero-length name.
752 */
753 complex = false;
754
755 for (s = dbname; *s; s++)
756 {
757 if (*s == '\n' || *s == '\r')
758 {
759 fprintf(stderr,
760 _("database name contains a newline or carriage return: \"%s\"\n"),
761 dbname);
763 }
764
765 if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
766 (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
767 {
768 complex = true;
769 }
770 }
771
772 if (complex)
773 {
775
777
778 /*
779 * Force the target psql's encoding to SQL_ASCII. We don't really
780 * know the encoding of the database name, and it doesn't matter as
781 * long as psql will forward it to the server unchanged.
782 */
783 appendPQExpBufferStr(buf, "\\encoding SQL_ASCII\n");
784 appendPQExpBufferStr(buf, "\\connect -reuse-previous=on ");
785
786 appendPQExpBufferStr(&connstr, "dbname=");
788
789 /*
790 * As long as the name does not contain a newline, SQL identifier
791 * quoting satisfies the psql meta-command parser. Prefer not to
792 * involve psql-interpreted single quotes, which behaved differently
793 * before PostgreSQL 9.2.
794 */
796
798 }
799 else
800 {
801 appendPQExpBufferStr(buf, "\\connect ");
803 }
805}
806
807
808/*
809 * Deconstruct the text representation of a 1-dimensional Postgres array
810 * into individual items.
811 *
812 * On success, returns true and sets *itemarray and *nitems to describe
813 * an array of individual strings. On parse failure, returns false;
814 * *itemarray may exist or be NULL.
815 *
816 * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
817 */
818bool
819parsePGArray(const char *atext, char ***itemarray, int *nitems)
820{
821 int inputlen;
822 char **items;
823 char *strings;
824 int curitem;
825
826 /*
827 * We expect input in the form of "{item,item,item}" where any item is
828 * either raw data, or surrounded by double quotes (in which case embedded
829 * characters including backslashes and quotes are backslashed).
830 *
831 * We build the result as an array of pointers followed by the actual
832 * string data, all in one malloc block for convenience of deallocation.
833 * The worst-case storage need is not more than one pointer and one
834 * character for each input character (consider "{,,,,,,,,,,}").
835 */
836 *itemarray = NULL;
837 *nitems = 0;
838 inputlen = strlen(atext);
839 if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
840 return false; /* bad input */
841 items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
842 if (items == NULL)
843 return false; /* out of memory */
844 *itemarray = items;
845 strings = (char *) (items + inputlen);
846
847 atext++; /* advance over initial '{' */
848 curitem = 0;
849 while (*atext != '}')
850 {
851 if (*atext == '\0')
852 return false; /* premature end of string */
853 items[curitem] = strings;
854 while (*atext != '}' && *atext != ',')
855 {
856 if (*atext == '\0')
857 return false; /* premature end of string */
858 if (*atext != '"')
859 *strings++ = *atext++; /* copy unquoted data */
860 else
861 {
862 /* process quoted substring */
863 atext++;
864 while (*atext != '"')
865 {
866 if (*atext == '\0')
867 return false; /* premature end of string */
868 if (*atext == '\\')
869 {
870 atext++;
871 if (*atext == '\0')
872 return false; /* premature end of string */
873 }
874 *strings++ = *atext++; /* copy quoted data */
875 }
876 atext++;
877 }
878 }
879 *strings++ = '\0';
880 if (*atext == ',')
881 atext++;
882 curitem++;
883 }
884 if (atext[1] != '\0')
885 return false; /* bogus syntax (embedded '}') */
886 *nitems = curitem;
887 return true;
888}
889
890
891/*
892 * Append one element to the text representation of a 1-dimensional Postgres
893 * array.
894 *
895 * The caller must provide the initial '{' and closing '}' of the array.
896 * This function handles all else, including insertion of commas and
897 * quoting of values.
898 *
899 * We assume that typdelim is ','.
900 */
901void
902appendPGArray(PQExpBuffer buffer, const char *value)
903{
904 bool needquote;
905 const char *tmp;
906
907 if (buffer->data[buffer->len - 1] != '{')
908 appendPQExpBufferChar(buffer, ',');
909
910 /* Decide if we need quotes; this should match array_out()'s choices. */
911 if (value[0] == '\0')
912 needquote = true; /* force quotes for empty string */
913 else if (pg_strcasecmp(value, "NULL") == 0)
914 needquote = true; /* force quotes for literal NULL */
915 else
916 needquote = false;
917
918 if (!needquote)
919 {
920 for (tmp = value; *tmp; tmp++)
921 {
922 char ch = *tmp;
923
924 if (ch == '"' || ch == '\\' ||
925 ch == '{' || ch == '}' || ch == ',' ||
926 /* these match scanner_isspace(): */
927 ch == ' ' || ch == '\t' || ch == '\n' ||
928 ch == '\r' || ch == '\v' || ch == '\f')
929 {
930 needquote = true;
931 break;
932 }
933 }
934 }
935
936 if (needquote)
937 {
938 appendPQExpBufferChar(buffer, '"');
939 for (tmp = value; *tmp; tmp++)
940 {
941 char ch = *tmp;
942
943 if (ch == '"' || ch == '\\')
944 appendPQExpBufferChar(buffer, '\\');
945 appendPQExpBufferChar(buffer, ch);
946 }
947 appendPQExpBufferChar(buffer, '"');
948 }
949 else
951}
952
953
954/*
955 * Format a reloptions array and append it to the given buffer.
956 *
957 * "prefix" is prepended to the option names; typically it's "" or "toast.".
958 *
959 * Returns false if the reloptions array could not be parsed (in which case
960 * nothing will have been appended to the buffer), or true on success.
961 *
962 * Note: this logic should generally match the backend's flatten_reloptions()
963 * (in adt/ruleutils.c).
964 */
965bool
966appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
967 const char *prefix, int encoding, bool std_strings)
968{
969 char **options;
970 int noptions;
971 int i;
972
973 if (!parsePGArray(reloptions, &options, &noptions))
974 {
975 free(options);
976 return false;
977 }
978
979 for (i = 0; i < noptions; i++)
980 {
981 char *option = options[i];
982 char *name;
983 char *separator;
984 char *value;
985
986 /*
987 * Each array element should have the form name=value. If the "=" is
988 * missing for some reason, treat it like an empty value.
989 */
990 name = option;
991 separator = strchr(option, '=');
992 if (separator)
993 {
994 *separator = '\0';
995 value = separator + 1;
996 }
997 else
998 value = "";
999
1000 if (i > 0)
1001 appendPQExpBufferStr(buffer, ", ");
1002 appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
1003
1004 /*
1005 * In general we need to quote the value; but to avoid unnecessary
1006 * clutter, do not quote if it is an identifier that would not need
1007 * quoting. (We could also allow numbers, but that is a bit trickier
1008 * than it looks --- for example, are leading zeroes significant? We
1009 * don't want to assume very much here about what custom reloptions
1010 * might mean.)
1011 */
1012 if (strcmp(fmtId(value), value) == 0)
1013 appendPQExpBufferStr(buffer, value);
1014 else
1015 appendStringLiteral(buffer, value, encoding, std_strings);
1016 }
1017
1018 free(options);
1019
1020 return true;
1021}
1022
1023
1024/*
1025 * processSQLNamePattern
1026 *
1027 * Scan a wildcard-pattern string and generate appropriate WHERE clauses
1028 * to limit the set of objects returned. The WHERE clauses are appended
1029 * to the already-partially-constructed query in buf. Returns whether
1030 * any clause was added.
1031 *
1032 * conn: connection query will be sent to (consulted for escaping rules).
1033 * buf: output parameter.
1034 * pattern: user-specified pattern option, or NULL if none ("*" is implied).
1035 * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
1036 * onto the existing WHERE clause).
1037 * force_escape: always quote regexp special characters, even outside
1038 * double quotes (else they are quoted only between double quotes).
1039 * schemavar: name of query variable to match against a schema-name pattern.
1040 * Can be NULL if no schema.
1041 * namevar: name of query variable to match against an object-name pattern.
1042 * altnamevar: NULL, or name of an alternative variable to match against name.
1043 * visibilityrule: clause to use if we want to restrict to visible objects
1044 * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL.
1045 * dbnamebuf: output parameter receiving the database name portion of the
1046 * pattern, if any. Can be NULL.
1047 * dotcnt: how many separators were parsed from the pattern, by reference.
1048 *
1049 * Formatting note: the text already present in buf should end with a newline.
1050 * The appended text, if any, will end with one too.
1051 */
1052bool
1054 bool have_where, bool force_escape,
1055 const char *schemavar, const char *namevar,
1056 const char *altnamevar, const char *visibilityrule,
1057 PQExpBuffer dbnamebuf, int *dotcnt)
1058{
1059 PQExpBufferData schemabuf;
1060 PQExpBufferData namebuf;
1061 bool added_clause = false;
1062 int dcnt;
1063
1064#define WHEREAND() \
1065 (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \
1066 have_where = true, added_clause = true)
1067
1068 if (dotcnt == NULL)
1069 dotcnt = &dcnt;
1070 *dotcnt = 0;
1071 if (pattern == NULL)
1072 {
1073 /* Default: select all visible objects */
1074 if (visibilityrule)
1075 {
1076 WHEREAND();
1077 appendPQExpBuffer(buf, "%s\n", visibilityrule);
1078 }
1079 return added_clause;
1080 }
1081
1082 initPQExpBuffer(&schemabuf);
1083 initPQExpBuffer(&namebuf);
1084
1085 /*
1086 * Convert shell-style 'pattern' into the regular expression(s) we want to
1087 * execute. Quoting/escaping into SQL literal format will be done below
1088 * using appendStringLiteralConn().
1089 *
1090 * If the caller provided a schemavar, we want to split the pattern on
1091 * ".", otherwise not.
1092 */
1094 (schemavar ? dbnamebuf : NULL),
1095 (schemavar ? &schemabuf : NULL),
1096 &namebuf,
1097 pattern, force_escape, true, dotcnt);
1098
1099 /*
1100 * Now decide what we need to emit. We may run under a hostile
1101 * search_path, so qualify EVERY name. Note there will be a leading "^("
1102 * in the patterns in any case.
1103 *
1104 * We want the regex matches to use the database's default collation where
1105 * collation-sensitive behavior is required (for example, which characters
1106 * match '\w'). That happened by default before PG v12, but if the server
1107 * is >= v12 then we need to force it through explicit COLLATE clauses,
1108 * otherwise the "C" collation attached to "name" catalog columns wins.
1109 */
1110 if (namevar && namebuf.len > 2)
1111 {
1112 /* We have a name pattern, so constrain the namevar(s) */
1113
1114 /* Optimize away a "*" pattern */
1115 if (strcmp(namebuf.data, "^(.*)$") != 0)
1116 {
1117 WHEREAND();
1118 if (altnamevar)
1119 {
1121 "(%s OPERATOR(pg_catalog.~) ", namevar);
1123 if (PQserverVersion(conn) >= 120000)
1124 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
1126 "\n OR %s OPERATOR(pg_catalog.~) ",
1127 altnamevar);
1129 if (PQserverVersion(conn) >= 120000)
1130 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
1131 appendPQExpBufferStr(buf, ")\n");
1132 }
1133 else
1134 {
1135 appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
1137 if (PQserverVersion(conn) >= 120000)
1138 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
1140 }
1141 }
1142 }
1143
1144 if (schemavar && schemabuf.len > 2)
1145 {
1146 /* We have a schema pattern, so constrain the schemavar */
1147
1148 /* Optimize away a "*" pattern */
1149 if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
1150 {
1151 WHEREAND();
1152 appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
1153 appendStringLiteralConn(buf, schemabuf.data, conn);
1154 if (PQserverVersion(conn) >= 120000)
1155 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
1157 }
1158 }
1159 else
1160 {
1161 /* No schema pattern given, so select only visible objects */
1162 if (visibilityrule)
1163 {
1164 WHEREAND();
1165 appendPQExpBuffer(buf, "%s\n", visibilityrule);
1166 }
1167 }
1168
1169 termPQExpBuffer(&schemabuf);
1170 termPQExpBuffer(&namebuf);
1171
1172 return added_clause;
1173#undef WHEREAND
1174}
1175
1176/*
1177 * Transform a possibly qualified shell-style object name pattern into up to
1178 * three SQL-style regular expressions, converting quotes, lower-casing
1179 * unquoted letters, and adjusting shell-style wildcard characters into regexp
1180 * notation.
1181 *
1182 * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
1183 * contains two or more dbname/schema/name separators, we parse the portions of
1184 * the pattern prior to the first and second separators into dbnamebuf and
1185 * schemabuf, and the rest into namebuf.
1186 *
1187 * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
1188 * least one separator, we parse the first portion into schemabuf and the rest
1189 * into namebuf.
1190 *
1191 * Otherwise, we parse all the pattern into namebuf.
1192 *
1193 * If the pattern contains more dotted parts than buffers to parse into, the
1194 * extra dots will be treated as literal characters and written into the
1195 * namebuf, though they will be counted. Callers should always check the value
1196 * returned by reference in dotcnt and handle this error case appropriately.
1197 *
1198 * We surround the regexps with "^(...)$" to force them to match whole strings,
1199 * as per SQL practice. We have to have parens in case strings contain "|",
1200 * else the "^" and "$" will be bound into the first and last alternatives
1201 * which is not what we want. Whether this is done for dbnamebuf is controlled
1202 * by the want_literal_dbname parameter.
1203 *
1204 * The regexps we parse into the buffers are appended to the data (if any)
1205 * already present. If we parse fewer fields than the number of buffers we
1206 * were given, the extra buffers are unaltered.
1207 *
1208 * encoding: the character encoding for the given pattern
1209 * dbnamebuf: output parameter receiving the database name portion of the
1210 * pattern, if any. Can be NULL.
1211 * schemabuf: output parameter receiving the schema name portion of the
1212 * pattern, if any. Can be NULL.
1213 * namebuf: output parameter receiving the database name portion of the
1214 * pattern, if any. Can be NULL.
1215 * pattern: user-specified pattern option, or NULL if none ("*" is implied).
1216 * force_escape: always quote regexp special characters, even outside
1217 * double quotes (else they are quoted only between double quotes).
1218 * want_literal_dbname: if true, regexp special characters within the database
1219 * name portion of the pattern will not be escaped, nor will the dbname be
1220 * converted into a regular expression.
1221 * dotcnt: output parameter receiving the number of separators parsed from the
1222 * pattern.
1223 */
1224void
1226 PQExpBuffer namebuf, const char *pattern, bool force_escape,
1227 bool want_literal_dbname, int *dotcnt)
1228{
1230 PQExpBufferData left_literal;
1231 PQExpBuffer curbuf;
1232 PQExpBuffer maxbuf;
1233 int i;
1234 bool inquotes;
1235 bool left;
1236 const char *cp;
1237
1238 Assert(pattern != NULL);
1239 Assert(namebuf != NULL);
1240
1241 /* callers should never expect "dbname.relname" format */
1242 Assert(dbnamebuf == NULL || schemabuf != NULL);
1243 Assert(dotcnt != NULL);
1244
1245 *dotcnt = 0;
1246 inquotes = false;
1247 cp = pattern;
1248
1249 if (dbnamebuf != NULL)
1250 maxbuf = &buf[2];
1251 else if (schemabuf != NULL)
1252 maxbuf = &buf[1];
1253 else
1254 maxbuf = &buf[0];
1255
1256 curbuf = &buf[0];
1257 if (want_literal_dbname)
1258 {
1259 left = true;
1260 initPQExpBuffer(&left_literal);
1261 }
1262 else
1263 left = false;
1264 initPQExpBuffer(curbuf);
1265 appendPQExpBufferStr(curbuf, "^(");
1266 while (*cp)
1267 {
1268 char ch = *cp;
1269
1270 if (ch == '"')
1271 {
1272 if (inquotes && cp[1] == '"')
1273 {
1274 /* emit one quote, stay in inquotes mode */
1275 appendPQExpBufferChar(curbuf, '"');
1276 if (left)
1277 appendPQExpBufferChar(&left_literal, '"');
1278 cp++;
1279 }
1280 else
1281 inquotes = !inquotes;
1282 cp++;
1283 }
1284 else if (!inquotes && isupper((unsigned char) ch))
1285 {
1286 appendPQExpBufferChar(curbuf,
1287 pg_tolower((unsigned char) ch));
1288 if (left)
1289 appendPQExpBufferChar(&left_literal,
1290 pg_tolower((unsigned char) ch));
1291 cp++;
1292 }
1293 else if (!inquotes && ch == '*')
1294 {
1295 appendPQExpBufferStr(curbuf, ".*");
1296 if (left)
1297 appendPQExpBufferChar(&left_literal, '*');
1298 cp++;
1299 }
1300 else if (!inquotes && ch == '?')
1301 {
1302 appendPQExpBufferChar(curbuf, '.');
1303 if (left)
1304 appendPQExpBufferChar(&left_literal, '?');
1305 cp++;
1306 }
1307 else if (!inquotes && ch == '.')
1308 {
1309 left = false;
1310 if (dotcnt)
1311 (*dotcnt)++;
1312 if (curbuf < maxbuf)
1313 {
1314 appendPQExpBufferStr(curbuf, ")$");
1315 curbuf++;
1316 initPQExpBuffer(curbuf);
1317 appendPQExpBufferStr(curbuf, "^(");
1318 cp++;
1319 }
1320 else
1321 appendPQExpBufferChar(curbuf, *cp++);
1322 }
1323 else if (ch == '$')
1324 {
1325 /*
1326 * Dollar is always quoted, whether inside quotes or not. The
1327 * reason is that it's allowed in SQL identifiers, so there's a
1328 * significant use-case for treating it literally, while because
1329 * we anchor the pattern automatically there is no use-case for
1330 * having it possess its regexp meaning.
1331 */
1332 appendPQExpBufferStr(curbuf, "\\$");
1333 if (left)
1334 appendPQExpBufferChar(&left_literal, '$');
1335 cp++;
1336 }
1337 else
1338 {
1339 /*
1340 * Ordinary data character, transfer to pattern
1341 *
1342 * Inside double quotes, or at all times if force_escape is true,
1343 * quote regexp special characters with a backslash to avoid
1344 * regexp errors. Outside quotes, however, let them pass through
1345 * as-is; this lets knowledgeable users build regexp expressions
1346 * that are more powerful than shell-style patterns.
1347 *
1348 * As an exception to that, though, always quote "[]", as that's
1349 * much more likely to be an attempt to write an array type name
1350 * than it is to be the start of a regexp bracket expression.
1351 */
1352 if ((inquotes || force_escape) &&
1353 strchr("|*+?()[]{}.^$\\", ch))
1354 appendPQExpBufferChar(curbuf, '\\');
1355 else if (ch == '[' && cp[1] == ']')
1356 appendPQExpBufferChar(curbuf, '\\');
1357 i = PQmblenBounded(cp, encoding);
1358 while (i--)
1359 {
1360 if (left)
1361 appendPQExpBufferChar(&left_literal, *cp);
1362 appendPQExpBufferChar(curbuf, *cp++);
1363 }
1364 }
1365 }
1366 appendPQExpBufferStr(curbuf, ")$");
1367
1368 if (namebuf)
1369 {
1370 appendPQExpBufferStr(namebuf, curbuf->data);
1371 termPQExpBuffer(curbuf);
1372 curbuf--;
1373 }
1374
1375 if (schemabuf && curbuf >= buf)
1376 {
1377 appendPQExpBufferStr(schemabuf, curbuf->data);
1378 termPQExpBuffer(curbuf);
1379 curbuf--;
1380 }
1381
1382 if (dbnamebuf && curbuf >= buf)
1383 {
1384 if (want_literal_dbname)
1385 appendPQExpBufferStr(dbnamebuf, left_literal.data);
1386 else
1387 appendPQExpBufferStr(dbnamebuf, curbuf->data);
1388 termPQExpBuffer(curbuf);
1389 }
1390
1391 if (want_literal_dbname)
1392 termPQExpBuffer(&left_literal);
1393}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1112
#define Assert(condition)
Definition: c.h:815
#define ESCAPE_STRING_SYNTAX
Definition: c.h:1123
#define SQL_STR_DOUBLE(ch, escape_backslash)
Definition: c.h:1120
const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS]
Definition: keywords.c:29
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
#define _(x)
Definition: elog.c:90
int PQserverVersion(const PGconn *conn)
Definition: fe-connect.c:7497
int PQclientEncoding(const PGconn *conn)
Definition: fe-connect.c:7597
size_t PQescapeStringConn(PGconn *conn, char *to, const char *from, size_t length, int *error)
Definition: fe-exec.c:4176
int PQmblen(const char *s, int encoding)
Definition: fe-misc.c:1233
int PQmblenBounded(const char *s, int encoding)
Definition: fe-misc.c:1243
const char * str
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
#define nitems(x)
Definition: indent.h:31
static struct @162 value
int remaining
Definition: informix.c:692
int i
Definition: isn.c:72
PGDLLIMPORT const ScanKeywordList ScanKeywords
#define UNRESERVED_KEYWORD
Definition: keywords.h:20
int ScanKeywordLookup(const char *str, const ScanKeywordList *keywords)
Definition: kwlookup.c:38
exit(1)
int32 encoding
Definition: pg_database.h:41
static char * connstr
Definition: pg_dumpall.c:88
static size_t noptions
static char ** options
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
static const char hextbl[]
Definition: pgp-info.c:87
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define snprintf
Definition: port.h:239
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
PQExpBuffer createPQExpBuffer(void)
Definition: pqexpbuffer.c:72
void initPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:90
int enlargePQExpBuffer(PQExpBuffer str, size_t needed)
Definition: pqexpbuffer.c:172
void resetPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:146
void appendPQExpBuffer(PQExpBuffer str, const char *fmt,...)
Definition: pqexpbuffer.c:265
void destroyPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:114
void appendPQExpBufferChar(PQExpBuffer str, char ch)
Definition: pqexpbuffer.c:378
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
Definition: pqexpbuffer.c:367
void termPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:129
PQExpBufferData * PQExpBuffer
Definition: pqexpbuffer.h:51
char * c
#define EXIT_FAILURE
Definition: settings.h:178
char * dbname
Definition: streamutil.c:50
PGconn * conn
Definition: streamutil.c:53
const char * fmtQualifiedId(const char *schema, const char *id)
Definition: string_utils.c:296
bool appendShellStringNoError(PQExpBuffer buf, const char *str)
Definition: string_utils.c:594
const char * fmtIdEnc(const char *rawid, int encoding)
Definition: string_utils.c:101
const char * fmtId(const char *rawid)
Definition: string_utils.c:248
const char * fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
Definition: string_utils.c:263
void setFmtEncoding(int encoding)
Definition: string_utils.c:69
void appendShellString(PQExpBuffer buf, const char *str)
Definition: string_utils.c:582
void appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
Definition: string_utils.c:446
void appendPGArray(PQExpBuffer buffer, const char *value)
Definition: string_utils.c:902
bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern, bool have_where, bool force_escape, const char *schemavar, const char *namevar, const char *altnamevar, const char *visibilityrule, PQExpBuffer dbnamebuf, int *dotcnt)
void appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
Definition: string_utils.c:743
void appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length, bool std_strings)
Definition: string_utils.c:527
bool parsePGArray(const char *atext, char ***itemarray, int *nitems)
Definition: string_utils.c:819
void appendStringLiteral(PQExpBuffer buf, const char *str, int encoding, bool std_strings)
Definition: string_utils.c:351
void patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, PQExpBuffer namebuf, const char *pattern, bool force_escape, bool want_literal_dbname, int *dotcnt)
bool appendReloptionsArray(PQExpBuffer buffer, const char *reloptions, const char *prefix, int encoding, bool std_strings)
Definition: string_utils.c:966
static PQExpBuffer defaultGetLocalPQExpBuffer(void)
#define WHEREAND()
static int getFmtEncoding(void)
Definition: string_utils.c:78
PQExpBuffer(* getLocalPQExpBuffer)(void)
Definition: string_utils.c:28
void appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
Definition: string_utils.c:484
void appendConnStrVal(PQExpBuffer buf, const char *str)
Definition: string_utils.c:698
int quote_all_identifiers
Definition: string_utils.c:27
char * formatPGVersionNumber(int version_number, bool include_minor, char *buf, size_t buflen)
Definition: string_utils.c:313
static ItemArray items
Definition: test_tidstore.c:48
const char * name
void pg_encoding_set_invalid(int encoding, char *dst)
Definition: wchar.c:2049
int pg_encoding_mblen(int encoding, const char *mbstr)
Definition: wchar.c:2116
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2150