PostgreSQL Source Code  git master
string_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * String-processing utility routines for frontend code
4  *
5  * Assorted utility functions that are useful in constructing SQL queries
6  * and interpreting backend output.
7  *
8  *
9  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  * src/fe_utils/string_utils.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres_fe.h"
17 
18 #include <ctype.h>
19 
20 #include "common/keywords.h"
21 #include "fe_utils/string_utils.h"
22 
24 
25 /* Globals exported by this file */
28 
29 
30 /*
31  * Returns a temporary PQExpBuffer, valid until the next call to the function.
32  * This is used by fmtId and fmtQualifiedId.
33  *
34  * Non-reentrant and non-thread-safe but reduces memory leakage. You can
35  * replace this with a custom version by setting the getLocalPQExpBuffer
36  * function pointer.
37  */
38 static PQExpBuffer
40 {
41  static PQExpBuffer id_return = NULL;
42 
43  if (id_return) /* first time through? */
44  {
45  /* same buffer, just wipe contents */
46  resetPQExpBuffer(id_return);
47  }
48  else
49  {
50  /* new buffer */
51  id_return = createPQExpBuffer();
52  }
53 
54  return id_return;
55 }
56 
57 /*
58  * Quotes input string if it's not a legitimate SQL identifier as-is.
59  *
60  * Note that the returned string must be used before calling fmtId again,
61  * since we re-use the same return buffer each time.
62  */
63 const char *
64 fmtId(const char *rawid)
65 {
66  PQExpBuffer id_return = getLocalPQExpBuffer();
67 
68  const char *cp;
69  bool need_quotes = false;
70 
71  /*
72  * These checks need to match the identifier production in scan.l. Don't
73  * use islower() etc.
74  */
76  need_quotes = true;
77  /* slightly different rules for first character */
78  else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
79  need_quotes = true;
80  else
81  {
82  /* otherwise check the entire string */
83  for (cp = rawid; *cp; cp++)
84  {
85  if (!((*cp >= 'a' && *cp <= 'z')
86  || (*cp >= '0' && *cp <= '9')
87  || (*cp == '_')))
88  {
89  need_quotes = true;
90  break;
91  }
92  }
93  }
94 
95  if (!need_quotes)
96  {
97  /*
98  * Check for keyword. We quote keywords except for unreserved ones.
99  * (In some cases we could avoid quoting a col_name or type_func_name
100  * keyword, but it seems much harder than it's worth to tell that.)
101  *
102  * Note: ScanKeywordLookup() does case-insensitive comparison, but
103  * that's fine, since we already know we have all-lower-case.
104  */
105  int kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
106 
107  if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
108  need_quotes = true;
109  }
110 
111  if (!need_quotes)
112  {
113  /* no quoting needed */
114  appendPQExpBufferStr(id_return, rawid);
115  }
116  else
117  {
118  appendPQExpBufferChar(id_return, '"');
119  for (cp = rawid; *cp; cp++)
120  {
121  /*
122  * Did we find a double-quote in the string? Then make this a
123  * double double-quote per SQL99. Before, we put in a
124  * backslash/double-quote pair. - thomas 2000-08-05
125  */
126  if (*cp == '"')
127  appendPQExpBufferChar(id_return, '"');
128  appendPQExpBufferChar(id_return, *cp);
129  }
130  appendPQExpBufferChar(id_return, '"');
131  }
132 
133  return id_return->data;
134 }
135 
136 /*
137  * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
138  *
139  * Like fmtId, use the result before calling again.
140  *
141  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
142  * use that buffer until we're finished with calling fmtId().
143  */
144 const char *
145 fmtQualifiedId(const char *schema, const char *id)
146 {
147  PQExpBuffer id_return;
148  PQExpBuffer lcl_pqexp = createPQExpBuffer();
149 
150  /* Some callers might fail to provide a schema name */
151  if (schema && *schema)
152  {
153  appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
154  }
155  appendPQExpBufferStr(lcl_pqexp, fmtId(id));
156 
157  id_return = getLocalPQExpBuffer();
158 
159  appendPQExpBufferStr(id_return, lcl_pqexp->data);
160  destroyPQExpBuffer(lcl_pqexp);
161 
162  return id_return->data;
163 }
164 
165 
166 /*
167  * Format a Postgres version number (in the PG_VERSION_NUM integer format
168  * returned by PQserverVersion()) as a string. This exists mainly to
169  * encapsulate knowledge about two-part vs. three-part version numbers.
170  *
171  * For reentrancy, caller must supply the buffer the string is put in.
172  * Recommended size of the buffer is 32 bytes.
173  *
174  * Returns address of 'buf', as a notational convenience.
175  */
176 char *
177 formatPGVersionNumber(int version_number, bool include_minor,
178  char *buf, size_t buflen)
179 {
180  if (version_number >= 100000)
181  {
182  /* New two-part style */
183  if (include_minor)
184  snprintf(buf, buflen, "%d.%d", version_number / 10000,
185  version_number % 10000);
186  else
187  snprintf(buf, buflen, "%d", version_number / 10000);
188  }
189  else
190  {
191  /* Old three-part style */
192  if (include_minor)
193  snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
194  (version_number / 100) % 100,
195  version_number % 100);
196  else
197  snprintf(buf, buflen, "%d.%d", version_number / 10000,
198  (version_number / 100) % 100);
199  }
200  return buf;
201 }
202 
203 
204 /*
205  * Convert a string value to an SQL string literal and append it to
206  * the given buffer. We assume the specified client_encoding and
207  * standard_conforming_strings settings.
208  *
209  * This is essentially equivalent to libpq's PQescapeStringInternal,
210  * except for the output buffer structure. We need it in situations
211  * where we do not have a PGconn available. Where we do,
212  * appendStringLiteralConn is a better choice.
213  */
214 void
216  int encoding, bool std_strings)
217 {
218  size_t length = strlen(str);
219  const char *source = str;
220  char *target;
221 
222  if (!enlargePQExpBuffer(buf, 2 * length + 2))
223  return;
224 
225  target = buf->data + buf->len;
226  *target++ = '\'';
227 
228  while (*source != '\0')
229  {
230  char c = *source;
231  int len;
232  int i;
233 
234  /* Fast path for plain ASCII */
235  if (!IS_HIGHBIT_SET(c))
236  {
237  /* Apply quoting if needed */
238  if (SQL_STR_DOUBLE(c, !std_strings))
239  *target++ = c;
240  /* Copy the character */
241  *target++ = c;
242  source++;
243  continue;
244  }
245 
246  /* Slow path for possible multibyte characters */
248 
249  /* Copy the character */
250  for (i = 0; i < len; i++)
251  {
252  if (*source == '\0')
253  break;
254  *target++ = *source++;
255  }
256 
257  /*
258  * If we hit premature end of string (ie, incomplete multibyte
259  * character), try to pad out to the correct length with spaces. We
260  * may not be able to pad completely, but we will always be able to
261  * insert at least one pad space (since we'd not have quoted a
262  * multibyte character). This should be enough to make a string that
263  * the server will error out on.
264  */
265  if (i < len)
266  {
267  char *stop = buf->data + buf->maxlen - 2;
268 
269  for (; i < len; i++)
270  {
271  if (target >= stop)
272  break;
273  *target++ = ' ';
274  }
275  break;
276  }
277  }
278 
279  /* Write the terminating quote and NUL character. */
280  *target++ = '\'';
281  *target = '\0';
282 
283  buf->len = target - buf->data;
284 }
285 
286 
287 /*
288  * Convert a string value to an SQL string literal and append it to
289  * the given buffer. Encoding and string syntax rules are as indicated
290  * by current settings of the PGconn.
291  */
292 void
294 {
295  size_t length = strlen(str);
296 
297  /*
298  * XXX This is a kluge to silence escape_string_warning in our utility
299  * programs. It should go away someday.
300  */
301  if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
302  {
303  /* ensure we are not adjacent to an identifier */
304  if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
308  return;
309  }
310  /* XXX end kluge */
311 
312  if (!enlargePQExpBuffer(buf, 2 * length + 2))
313  return;
314  appendPQExpBufferChar(buf, '\'');
315  buf->len += PQescapeStringConn(conn, buf->data + buf->len,
316  str, length, NULL);
317  appendPQExpBufferChar(buf, '\'');
318 }
319 
320 
321 /*
322  * Convert a string value to a dollar quoted literal and append it to
323  * the given buffer. If the dqprefix parameter is not NULL then the
324  * dollar quote delimiter will begin with that (after the opening $).
325  *
326  * No escaping is done at all on str, in compliance with the rules
327  * for parsing dollar quoted strings. Also, we need not worry about
328  * encoding issues.
329  */
330 void
331 appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
332 {
333  static const char suffixes[] = "_XXXXXXX";
334  int nextchar = 0;
335  PQExpBuffer delimBuf = createPQExpBuffer();
336 
337  /* start with $ + dqprefix if not NULL */
338  appendPQExpBufferChar(delimBuf, '$');
339  if (dqprefix)
340  appendPQExpBufferStr(delimBuf, dqprefix);
341 
342  /*
343  * Make sure we choose a delimiter which (without the trailing $) is not
344  * present in the string being quoted. We don't check with the trailing $
345  * because a string ending in $foo must not be quoted with $foo$.
346  */
347  while (strstr(str, delimBuf->data) != NULL)
348  {
349  appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
350  nextchar %= sizeof(suffixes) - 1;
351  }
352 
353  /* add trailing $ */
354  appendPQExpBufferChar(delimBuf, '$');
355 
356  /* quote it and we are all done */
357  appendPQExpBufferStr(buf, delimBuf->data);
359  appendPQExpBufferStr(buf, delimBuf->data);
360 
361  destroyPQExpBuffer(delimBuf);
362 }
363 
364 
365 /*
366  * Convert a bytea value (presented as raw bytes) to an SQL string literal
367  * and append it to the given buffer. We assume the specified
368  * standard_conforming_strings setting.
369  *
370  * This is needed in situations where we do not have a PGconn available.
371  * Where we do, PQescapeByteaConn is a better choice.
372  */
373 void
374 appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
375  bool std_strings)
376 {
377  const unsigned char *source = str;
378  char *target;
379 
380  static const char hextbl[] = "0123456789abcdef";
381 
382  /*
383  * This implementation is hard-wired to produce hex-format output. We do
384  * not know the server version the output will be loaded into, so making
385  * an intelligent format choice is impossible. It might be better to
386  * always use the old escaped format.
387  */
388  if (!enlargePQExpBuffer(buf, 2 * length + 5))
389  return;
390 
391  target = buf->data + buf->len;
392  *target++ = '\'';
393  if (!std_strings)
394  *target++ = '\\';
395  *target++ = '\\';
396  *target++ = 'x';
397 
398  while (length-- > 0)
399  {
400  unsigned char c = *source++;
401 
402  *target++ = hextbl[(c >> 4) & 0xF];
403  *target++ = hextbl[c & 0xF];
404  }
405 
406  /* Write the terminating quote and NUL character. */
407  *target++ = '\'';
408  *target = '\0';
409 
410  buf->len = target - buf->data;
411 }
412 
413 
414 /*
415  * Append the given string to the shell command being built in the buffer,
416  * with shell-style quoting as needed to create exactly one argument.
417  *
418  * Forbid LF or CR characters, which have scant practical use beyond designing
419  * security breaches. The Windows command shell is unusable as a conduit for
420  * arguments containing LF or CR characters. A future major release should
421  * reject those characters in CREATE ROLE and CREATE DATABASE, because use
422  * there eventually leads to errors here.
423  *
424  * appendShellString() simply prints an error and dies if LF or CR appears.
425  * appendShellStringNoError() omits those characters from the result, and
426  * returns false if there were any.
427  */
428 void
430 {
432  {
433  fprintf(stderr,
434  _("shell command argument contains a newline or carriage return: \"%s\"\n"),
435  str);
437  }
438 }
439 
440 bool
442 {
443 #ifdef WIN32
444  int backslash_run_length = 0;
445 #endif
446  bool ok = true;
447  const char *p;
448 
449  /*
450  * Don't bother with adding quotes if the string is nonempty and clearly
451  * contains only safe characters.
452  */
453  if (*str != '\0' &&
454  strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
455  {
457  return ok;
458  }
459 
460 #ifndef WIN32
461  appendPQExpBufferChar(buf, '\'');
462  for (p = str; *p; p++)
463  {
464  if (*p == '\n' || *p == '\r')
465  {
466  ok = false;
467  continue;
468  }
469 
470  if (*p == '\'')
471  appendPQExpBufferStr(buf, "'\"'\"'");
472  else
474  }
475  appendPQExpBufferChar(buf, '\'');
476 #else /* WIN32 */
477 
478  /*
479  * A Windows system() argument experiences two layers of interpretation.
480  * First, cmd.exe interprets the string. Its behavior is undocumented,
481  * but a caret escapes any byte except LF or CR that would otherwise have
482  * special meaning. Handling of a caret before LF or CR differs between
483  * "cmd.exe /c" and other modes, and it is unusable here.
484  *
485  * Second, the new process parses its command line to construct argv (see
486  * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats
487  * backslash-double quote sequences specially.
488  */
489  appendPQExpBufferStr(buf, "^\"");
490  for (p = str; *p; p++)
491  {
492  if (*p == '\n' || *p == '\r')
493  {
494  ok = false;
495  continue;
496  }
497 
498  /* Change N backslashes before a double quote to 2N+1 backslashes. */
499  if (*p == '"')
500  {
501  while (backslash_run_length)
502  {
503  appendPQExpBufferStr(buf, "^\\");
504  backslash_run_length--;
505  }
506  appendPQExpBufferStr(buf, "^\\");
507  }
508  else if (*p == '\\')
509  backslash_run_length++;
510  else
511  backslash_run_length = 0;
512 
513  /*
514  * Decline to caret-escape the most mundane characters, to ease
515  * debugging and lest we approach the command length limit.
516  */
517  if (!((*p >= 'a' && *p <= 'z') ||
518  (*p >= 'A' && *p <= 'Z') ||
519  (*p >= '0' && *p <= '9')))
522  }
523 
524  /*
525  * Change N backslashes at end of argument to 2N backslashes, because they
526  * precede the double quote that terminates the argument.
527  */
528  while (backslash_run_length)
529  {
530  appendPQExpBufferStr(buf, "^\\");
531  backslash_run_length--;
532  }
533  appendPQExpBufferStr(buf, "^\"");
534 #endif /* WIN32 */
535 
536  return ok;
537 }
538 
539 
540 /*
541  * Append the given string to the buffer, with suitable quoting for passing
542  * the string as a value in a keyword/value pair in a libpq connection string.
543  */
544 void
546 {
547  const char *s;
548  bool needquotes;
549 
550  /*
551  * If the string is one or more plain ASCII characters, no need to quote
552  * it. This is quite conservative, but better safe than sorry.
553  */
554  needquotes = true;
555  for (s = str; *s; s++)
556  {
557  if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
558  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
559  {
560  needquotes = true;
561  break;
562  }
563  needquotes = false;
564  }
565 
566  if (needquotes)
567  {
568  appendPQExpBufferChar(buf, '\'');
569  while (*str)
570  {
571  /* ' and \ must be escaped by to \' and \\ */
572  if (*str == '\'' || *str == '\\')
573  appendPQExpBufferChar(buf, '\\');
574 
576  str++;
577  }
578  appendPQExpBufferChar(buf, '\'');
579  }
580  else
582 }
583 
584 
585 /*
586  * Append a psql meta-command that connects to the given database with the
587  * then-current connection's user, host and port.
588  */
589 void
591 {
592  const char *s;
593  bool complex;
594 
595  /*
596  * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
597  * For other names, even many not technically requiring it, skip to the
598  * general case. No database has a zero-length name.
599  */
600  complex = false;
601 
602  for (s = dbname; *s; s++)
603  {
604  if (*s == '\n' || *s == '\r')
605  {
606  fprintf(stderr,
607  _("database name contains a newline or carriage return: \"%s\"\n"),
608  dbname);
610  }
611 
612  if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
613  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
614  {
615  complex = true;
616  }
617  }
618 
619  appendPQExpBufferStr(buf, "\\connect ");
620  if (complex)
621  {
623 
625  appendPQExpBufferStr(&connstr, "dbname=");
627 
628  appendPQExpBufferStr(buf, "-reuse-previous=on ");
629 
630  /*
631  * As long as the name does not contain a newline, SQL identifier
632  * quoting satisfies the psql meta-command parser. Prefer not to
633  * involve psql-interpreted single quotes, which behaved differently
634  * before PostgreSQL 9.2.
635  */
637 
639  }
640  else
642  appendPQExpBufferChar(buf, '\n');
643 }
644 
645 
646 /*
647  * Deconstruct the text representation of a 1-dimensional Postgres array
648  * into individual items.
649  *
650  * On success, returns true and sets *itemarray and *nitems to describe
651  * an array of individual strings. On parse failure, returns false;
652  * *itemarray may exist or be NULL.
653  *
654  * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
655  */
656 bool
657 parsePGArray(const char *atext, char ***itemarray, int *nitems)
658 {
659  int inputlen;
660  char **items;
661  char *strings;
662  int curitem;
663 
664  /*
665  * We expect input in the form of "{item,item,item}" where any item is
666  * either raw data, or surrounded by double quotes (in which case embedded
667  * characters including backslashes and quotes are backslashed).
668  *
669  * We build the result as an array of pointers followed by the actual
670  * string data, all in one malloc block for convenience of deallocation.
671  * The worst-case storage need is not more than one pointer and one
672  * character for each input character (consider "{,,,,,,,,,,}").
673  */
674  *itemarray = NULL;
675  *nitems = 0;
676  inputlen = strlen(atext);
677  if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
678  return false; /* bad input */
679  items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
680  if (items == NULL)
681  return false; /* out of memory */
682  *itemarray = items;
683  strings = (char *) (items + inputlen);
684 
685  atext++; /* advance over initial '{' */
686  curitem = 0;
687  while (*atext != '}')
688  {
689  if (*atext == '\0')
690  return false; /* premature end of string */
691  items[curitem] = strings;
692  while (*atext != '}' && *atext != ',')
693  {
694  if (*atext == '\0')
695  return false; /* premature end of string */
696  if (*atext != '"')
697  *strings++ = *atext++; /* copy unquoted data */
698  else
699  {
700  /* process quoted substring */
701  atext++;
702  while (*atext != '"')
703  {
704  if (*atext == '\0')
705  return false; /* premature end of string */
706  if (*atext == '\\')
707  {
708  atext++;
709  if (*atext == '\0')
710  return false; /* premature end of string */
711  }
712  *strings++ = *atext++; /* copy quoted data */
713  }
714  atext++;
715  }
716  }
717  *strings++ = '\0';
718  if (*atext == ',')
719  atext++;
720  curitem++;
721  }
722  if (atext[1] != '\0')
723  return false; /* bogus syntax (embedded '}') */
724  *nitems = curitem;
725  return true;
726 }
727 
728 
729 /*
730  * Append one element to the text representation of a 1-dimensional Postgres
731  * array.
732  *
733  * The caller must provide the initial '{' and closing '}' of the array.
734  * This function handles all else, including insertion of commas and
735  * quoting of values.
736  *
737  * We assume that typdelim is ','.
738  */
739 void
740 appendPGArray(PQExpBuffer buffer, const char *value)
741 {
742  bool needquote;
743  const char *tmp;
744 
745  if (buffer->data[buffer->len - 1] != '{')
746  appendPQExpBufferChar(buffer, ',');
747 
748  /* Decide if we need quotes; this should match array_out()'s choices. */
749  if (value[0] == '\0')
750  needquote = true; /* force quotes for empty string */
751  else if (pg_strcasecmp(value, "NULL") == 0)
752  needquote = true; /* force quotes for literal NULL */
753  else
754  needquote = false;
755 
756  if (!needquote)
757  {
758  for (tmp = value; *tmp; tmp++)
759  {
760  char ch = *tmp;
761 
762  if (ch == '"' || ch == '\\' ||
763  ch == '{' || ch == '}' || ch == ',' ||
764  /* these match scanner_isspace(): */
765  ch == ' ' || ch == '\t' || ch == '\n' ||
766  ch == '\r' || ch == '\v' || ch == '\f')
767  {
768  needquote = true;
769  break;
770  }
771  }
772  }
773 
774  if (needquote)
775  {
776  appendPQExpBufferChar(buffer, '"');
777  for (tmp = value; *tmp; tmp++)
778  {
779  char ch = *tmp;
780 
781  if (ch == '"' || ch == '\\')
782  appendPQExpBufferChar(buffer, '\\');
783  appendPQExpBufferChar(buffer, ch);
784  }
785  appendPQExpBufferChar(buffer, '"');
786  }
787  else
788  appendPQExpBufferStr(buffer, value);
789 }
790 
791 
792 /*
793  * Format a reloptions array and append it to the given buffer.
794  *
795  * "prefix" is prepended to the option names; typically it's "" or "toast.".
796  *
797  * Returns false if the reloptions array could not be parsed (in which case
798  * nothing will have been appended to the buffer), or true on success.
799  *
800  * Note: this logic should generally match the backend's flatten_reloptions()
801  * (in adt/ruleutils.c).
802  */
803 bool
804 appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
805  const char *prefix, int encoding, bool std_strings)
806 {
807  char **options;
808  int noptions;
809  int i;
810 
811  if (!parsePGArray(reloptions, &options, &noptions))
812  {
813  free(options);
814  return false;
815  }
816 
817  for (i = 0; i < noptions; i++)
818  {
819  char *option = options[i];
820  char *name;
821  char *separator;
822  char *value;
823 
824  /*
825  * Each array element should have the form name=value. If the "=" is
826  * missing for some reason, treat it like an empty value.
827  */
828  name = option;
829  separator = strchr(option, '=');
830  if (separator)
831  {
832  *separator = '\0';
833  value = separator + 1;
834  }
835  else
836  value = "";
837 
838  if (i > 0)
839  appendPQExpBufferStr(buffer, ", ");
840  appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
841 
842  /*
843  * In general we need to quote the value; but to avoid unnecessary
844  * clutter, do not quote if it is an identifier that would not need
845  * quoting. (We could also allow numbers, but that is a bit trickier
846  * than it looks --- for example, are leading zeroes significant? We
847  * don't want to assume very much here about what custom reloptions
848  * might mean.)
849  */
850  if (strcmp(fmtId(value), value) == 0)
851  appendPQExpBufferStr(buffer, value);
852  else
853  appendStringLiteral(buffer, value, encoding, std_strings);
854  }
855 
856  free(options);
857 
858  return true;
859 }
860 
861 
862 /*
863  * processSQLNamePattern
864  *
865  * Scan a wildcard-pattern string and generate appropriate WHERE clauses
866  * to limit the set of objects returned. The WHERE clauses are appended
867  * to the already-partially-constructed query in buf. Returns whether
868  * any clause was added.
869  *
870  * conn: connection query will be sent to (consulted for escaping rules).
871  * buf: output parameter.
872  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
873  * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
874  * onto the existing WHERE clause).
875  * force_escape: always quote regexp special characters, even outside
876  * double quotes (else they are quoted only between double quotes).
877  * schemavar: name of query variable to match against a schema-name pattern.
878  * Can be NULL if no schema.
879  * namevar: name of query variable to match against an object-name pattern.
880  * altnamevar: NULL, or name of an alternative variable to match against name.
881  * visibilityrule: clause to use if we want to restrict to visible objects
882  * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL.
883  * dbnamebuf: output parameter receiving the database name portion of the
884  * pattern, if any. Can be NULL.
885  * dotcnt: how many separators were parsed from the pattern, by reference.
886  *
887  * Formatting note: the text already present in buf should end with a newline.
888  * The appended text, if any, will end with one too.
889  */
890 bool
892  bool have_where, bool force_escape,
893  const char *schemavar, const char *namevar,
894  const char *altnamevar, const char *visibilityrule,
895  PQExpBuffer dbnamebuf, int *dotcnt)
896 {
897  PQExpBufferData schemabuf;
898  PQExpBufferData namebuf;
899  bool added_clause = false;
900  int dcnt;
901 
902 #define WHEREAND() \
903  (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \
904  have_where = true, added_clause = true)
905 
906  if (dotcnt == NULL)
907  dotcnt = &dcnt;
908  *dotcnt = 0;
909  if (pattern == NULL)
910  {
911  /* Default: select all visible objects */
912  if (visibilityrule)
913  {
914  WHEREAND();
915  appendPQExpBuffer(buf, "%s\n", visibilityrule);
916  }
917  return added_clause;
918  }
919 
920  initPQExpBuffer(&schemabuf);
921  initPQExpBuffer(&namebuf);
922 
923  /*
924  * Convert shell-style 'pattern' into the regular expression(s) we want to
925  * execute. Quoting/escaping into SQL literal format will be done below
926  * using appendStringLiteralConn().
927  *
928  * If the caller provided a schemavar, we want to split the pattern on
929  * ".", otherwise not.
930  */
932  (schemavar ? dbnamebuf : NULL),
933  (schemavar ? &schemabuf : NULL),
934  &namebuf,
935  pattern, force_escape, true, dotcnt);
936 
937  /*
938  * Now decide what we need to emit. We may run under a hostile
939  * search_path, so qualify EVERY name. Note there will be a leading "^("
940  * in the patterns in any case.
941  *
942  * We want the regex matches to use the database's default collation where
943  * collation-sensitive behavior is required (for example, which characters
944  * match '\w'). That happened by default before PG v12, but if the server
945  * is >= v12 then we need to force it through explicit COLLATE clauses,
946  * otherwise the "C" collation attached to "name" catalog columns wins.
947  */
948  if (namevar && namebuf.len > 2)
949  {
950  /* We have a name pattern, so constrain the namevar(s) */
951 
952  /* Optimize away a "*" pattern */
953  if (strcmp(namebuf.data, "^(.*)$") != 0)
954  {
955  WHEREAND();
956  if (altnamevar)
957  {
959  "(%s OPERATOR(pg_catalog.~) ", namevar);
961  if (PQserverVersion(conn) >= 120000)
962  appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
964  "\n OR %s OPERATOR(pg_catalog.~) ",
965  altnamevar);
967  if (PQserverVersion(conn) >= 120000)
968  appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
969  appendPQExpBufferStr(buf, ")\n");
970  }
971  else
972  {
973  appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
975  if (PQserverVersion(conn) >= 120000)
976  appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
977  appendPQExpBufferChar(buf, '\n');
978  }
979  }
980  }
981 
982  if (schemavar && schemabuf.len > 2)
983  {
984  /* We have a schema pattern, so constrain the schemavar */
985 
986  /* Optimize away a "*" pattern */
987  if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
988  {
989  WHEREAND();
990  appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
991  appendStringLiteralConn(buf, schemabuf.data, conn);
992  if (PQserverVersion(conn) >= 120000)
993  appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
994  appendPQExpBufferChar(buf, '\n');
995  }
996  }
997  else
998  {
999  /* No schema pattern given, so select only visible objects */
1000  if (visibilityrule)
1001  {
1002  WHEREAND();
1003  appendPQExpBuffer(buf, "%s\n", visibilityrule);
1004  }
1005  }
1006 
1007  termPQExpBuffer(&schemabuf);
1008  termPQExpBuffer(&namebuf);
1009 
1010  return added_clause;
1011 #undef WHEREAND
1012 }
1013 
1014 /*
1015  * Transform a possibly qualified shell-style object name pattern into up to
1016  * three SQL-style regular expressions, converting quotes, lower-casing
1017  * unquoted letters, and adjusting shell-style wildcard characters into regexp
1018  * notation.
1019  *
1020  * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
1021  * contains two or more dbname/schema/name separators, we parse the portions of
1022  * the pattern prior to the first and second separators into dbnamebuf and
1023  * schemabuf, and the rest into namebuf.
1024  *
1025  * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
1026  * least one separator, we parse the first portion into schemabuf and the rest
1027  * into namebuf.
1028  *
1029  * Otherwise, we parse all the pattern into namebuf.
1030  *
1031  * If the pattern contains more dotted parts than buffers to parse into, the
1032  * extra dots will be treated as literal characters and written into the
1033  * namebuf, though they will be counted. Callers should always check the value
1034  * returned by reference in dotcnt and handle this error case appropriately.
1035  *
1036  * We surround the regexps with "^(...)$" to force them to match whole strings,
1037  * as per SQL practice. We have to have parens in case strings contain "|",
1038  * else the "^" and "$" will be bound into the first and last alternatives
1039  * which is not what we want. Whether this is done for dbnamebuf is controlled
1040  * by the want_literal_dbname parameter.
1041  *
1042  * The regexps we parse into the buffers are appended to the data (if any)
1043  * already present. If we parse fewer fields than the number of buffers we
1044  * were given, the extra buffers are unaltered.
1045  *
1046  * encoding: the character encoding for the given pattern
1047  * dbnamebuf: output parameter receiving the database name portion of the
1048  * pattern, if any. Can be NULL.
1049  * schemabuf: output parameter receiving the schema name portion of the
1050  * pattern, if any. Can be NULL.
1051  * namebuf: output parameter receiving the database name portion of the
1052  * pattern, if any. Can be NULL.
1053  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
1054  * force_escape: always quote regexp special characters, even outside
1055  * double quotes (else they are quoted only between double quotes).
1056  * want_literal_dbname: if true, regexp special characters within the database
1057  * name portion of the pattern will not be escaped, nor will the dbname be
1058  * converted into a regular expression.
1059  * dotcnt: output parameter receiving the number of separators parsed from the
1060  * pattern.
1061  */
1062 void
1064  PQExpBuffer namebuf, const char *pattern, bool force_escape,
1065  bool want_literal_dbname, int *dotcnt)
1066 {
1067  PQExpBufferData buf[3];
1068  PQExpBufferData left_literal;
1069  PQExpBuffer curbuf;
1070  PQExpBuffer maxbuf;
1071  int i;
1072  bool inquotes;
1073  bool left;
1074  const char *cp;
1075 
1076  Assert(pattern != NULL);
1077  Assert(namebuf != NULL);
1078 
1079  /* callers should never expect "dbname.relname" format */
1080  Assert(dbnamebuf == NULL || schemabuf != NULL);
1081  Assert(dotcnt != NULL);
1082 
1083  *dotcnt = 0;
1084  inquotes = false;
1085  cp = pattern;
1086 
1087  if (dbnamebuf != NULL)
1088  maxbuf = &buf[2];
1089  else if (schemabuf != NULL)
1090  maxbuf = &buf[1];
1091  else
1092  maxbuf = &buf[0];
1093 
1094  curbuf = &buf[0];
1095  if (want_literal_dbname)
1096  {
1097  left = true;
1098  initPQExpBuffer(&left_literal);
1099  }
1100  else
1101  left = false;
1102  initPQExpBuffer(curbuf);
1103  appendPQExpBufferStr(curbuf, "^(");
1104  while (*cp)
1105  {
1106  char ch = *cp;
1107 
1108  if (ch == '"')
1109  {
1110  if (inquotes && cp[1] == '"')
1111  {
1112  /* emit one quote, stay in inquotes mode */
1113  appendPQExpBufferChar(curbuf, '"');
1114  if (left)
1115  appendPQExpBufferChar(&left_literal, '"');
1116  cp++;
1117  }
1118  else
1119  inquotes = !inquotes;
1120  cp++;
1121  }
1122  else if (!inquotes && isupper((unsigned char) ch))
1123  {
1124  appendPQExpBufferChar(curbuf,
1125  pg_tolower((unsigned char) ch));
1126  if (left)
1127  appendPQExpBufferChar(&left_literal,
1128  pg_tolower((unsigned char) ch));
1129  cp++;
1130  }
1131  else if (!inquotes && ch == '*')
1132  {
1133  appendPQExpBufferStr(curbuf, ".*");
1134  if (left)
1135  appendPQExpBufferChar(&left_literal, '*');
1136  cp++;
1137  }
1138  else if (!inquotes && ch == '?')
1139  {
1140  appendPQExpBufferChar(curbuf, '.');
1141  if (left)
1142  appendPQExpBufferChar(&left_literal, '?');
1143  cp++;
1144  }
1145  else if (!inquotes && ch == '.')
1146  {
1147  left = false;
1148  if (dotcnt)
1149  (*dotcnt)++;
1150  if (curbuf < maxbuf)
1151  {
1152  appendPQExpBufferStr(curbuf, ")$");
1153  curbuf++;
1154  initPQExpBuffer(curbuf);
1155  appendPQExpBufferStr(curbuf, "^(");
1156  cp++;
1157  }
1158  else
1159  appendPQExpBufferChar(curbuf, *cp++);
1160  }
1161  else if (ch == '$')
1162  {
1163  /*
1164  * Dollar is always quoted, whether inside quotes or not. The
1165  * reason is that it's allowed in SQL identifiers, so there's a
1166  * significant use-case for treating it literally, while because
1167  * we anchor the pattern automatically there is no use-case for
1168  * having it possess its regexp meaning.
1169  */
1170  appendPQExpBufferStr(curbuf, "\\$");
1171  if (left)
1172  appendPQExpBufferChar(&left_literal, '$');
1173  cp++;
1174  }
1175  else
1176  {
1177  /*
1178  * Ordinary data character, transfer to pattern
1179  *
1180  * Inside double quotes, or at all times if force_escape is true,
1181  * quote regexp special characters with a backslash to avoid
1182  * regexp errors. Outside quotes, however, let them pass through
1183  * as-is; this lets knowledgeable users build regexp expressions
1184  * that are more powerful than shell-style patterns.
1185  *
1186  * As an exception to that, though, always quote "[]", as that's
1187  * much more likely to be an attempt to write an array type name
1188  * than it is to be the start of a regexp bracket expression.
1189  */
1190  if ((inquotes || force_escape) &&
1191  strchr("|*+?()[]{}.^$\\", ch))
1192  appendPQExpBufferChar(curbuf, '\\');
1193  else if (ch == '[' && cp[1] == ']')
1194  appendPQExpBufferChar(curbuf, '\\');
1195  i = PQmblenBounded(cp, encoding);
1196  while (i--)
1197  {
1198  if (left)
1199  appendPQExpBufferChar(&left_literal, *cp);
1200  appendPQExpBufferChar(curbuf, *cp++);
1201  }
1202  }
1203  }
1204  appendPQExpBufferStr(curbuf, ")$");
1205 
1206  if (namebuf)
1207  {
1208  appendPQExpBufferStr(namebuf, curbuf->data);
1209  termPQExpBuffer(curbuf);
1210  curbuf--;
1211  }
1212 
1213  if (schemabuf && curbuf >= buf)
1214  {
1215  appendPQExpBufferStr(schemabuf, curbuf->data);
1216  termPQExpBuffer(curbuf);
1217  curbuf--;
1218  }
1219 
1220  if (dbnamebuf && curbuf >= buf)
1221  {
1222  if (want_literal_dbname)
1223  appendPQExpBufferStr(dbnamebuf, left_literal.data);
1224  else
1225  appendPQExpBufferStr(dbnamebuf, curbuf->data);
1226  termPQExpBuffer(curbuf);
1227  }
1228 
1229  if (want_literal_dbname)
1230  termPQExpBuffer(&left_literal);
1231 }
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1158
#define Assert(condition)
Definition: c.h:861
#define ESCAPE_STRING_SYNTAX
Definition: c.h:1169
#define SQL_STR_DOUBLE(ch, escape_backslash)
Definition: c.h:1166
const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS]
Definition: keywords.c:29
#define _(x)
Definition: elog.c:90
int PQserverVersion(const PGconn *conn)
Definition: fe-connect.c:7202
int PQclientEncoding(const PGconn *conn)
Definition: fe-connect.c:7300
size_t PQescapeStringConn(PGconn *conn, char *to, const char *from, size_t length, int *error)
Definition: fe-exec.c:4145
int PQmblen(const char *s, int encoding)
Definition: fe-misc.c:1224
int PQmblenBounded(const char *s, int encoding)
Definition: fe-misc.c:1234
const char * str
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
#define nitems(x)
Definition: indent.h:31
static struct @157 value
int i
Definition: isn.c:73
PGDLLIMPORT const ScanKeywordList ScanKeywords
#define UNRESERVED_KEYWORD
Definition: keywords.h:20
int ScanKeywordLookup(const char *str, const ScanKeywordList *keywords)
Definition: kwlookup.c:38
exit(1)
const void size_t len
int32 encoding
Definition: pg_database.h:41
static char * connstr
Definition: pg_dumpall.c:88
static size_t noptions
static char ** options
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:73
static const char hextbl[]
Definition: pgp-info.c:87
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define snprintf
Definition: port.h:238
#define fprintf
Definition: port.h:242
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
PQExpBuffer createPQExpBuffer(void)
Definition: pqexpbuffer.c:72
void initPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:90
int enlargePQExpBuffer(PQExpBuffer str, size_t needed)
Definition: pqexpbuffer.c:172
void resetPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:146
void appendPQExpBuffer(PQExpBuffer str, const char *fmt,...)
Definition: pqexpbuffer.c:265
void destroyPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:114
void appendPQExpBufferChar(PQExpBuffer str, char ch)
Definition: pqexpbuffer.c:378
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
Definition: pqexpbuffer.c:367
void termPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:129
PQExpBufferData * PQExpBuffer
Definition: pqexpbuffer.h:51
char * c
#define EXIT_FAILURE
Definition: settings.h:178
char * dbname
Definition: streamutil.c:52
PGconn * conn
Definition: streamutil.c:55
bool appendShellStringNoError(PQExpBuffer buf, const char *str)
Definition: string_utils.c:441
void appendShellString(PQExpBuffer buf, const char *str)
Definition: string_utils.c:429
void appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
Definition: string_utils.c:293
void appendPGArray(PQExpBuffer buffer, const char *value)
Definition: string_utils.c:740
bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern, bool have_where, bool force_escape, const char *schemavar, const char *namevar, const char *altnamevar, const char *visibilityrule, PQExpBuffer dbnamebuf, int *dotcnt)
Definition: string_utils.c:891
void appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
Definition: string_utils.c:590
void appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length, bool std_strings)
Definition: string_utils.c:374
const char * fmtId(const char *rawid)
Definition: string_utils.c:64
bool parsePGArray(const char *atext, char ***itemarray, int *nitems)
Definition: string_utils.c:657
void appendStringLiteral(PQExpBuffer buf, const char *str, int encoding, bool std_strings)
Definition: string_utils.c:215
void patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, PQExpBuffer namebuf, const char *pattern, bool force_escape, bool want_literal_dbname, int *dotcnt)
bool appendReloptionsArray(PQExpBuffer buffer, const char *reloptions, const char *prefix, int encoding, bool std_strings)
Definition: string_utils.c:804
static PQExpBuffer defaultGetLocalPQExpBuffer(void)
#define WHEREAND()
const char * fmtQualifiedId(const char *schema, const char *id)
Definition: string_utils.c:145
PQExpBuffer(* getLocalPQExpBuffer)(void)
Definition: string_utils.c:27
void appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
Definition: string_utils.c:331
void appendConnStrVal(PQExpBuffer buf, const char *str)
Definition: string_utils.c:545
char * formatPGVersionNumber(int version_number, bool include_minor, char *buf, size_t buflen)
Definition: string_utils.c:177
int quote_all_identifiers
Definition: string_utils.c:26
static ItemArray items
Definition: test_tidstore.c:49
const char * name