PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
encode.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * encode.c
4  * Various data encoding/decoding things.
5  *
6  * Copyright (c) 2001-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/encode.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <ctype.h>
17 
18 #include "utils/builtins.h"
19 
20 
22 {
23  unsigned (*encode_len) (const char *data, unsigned dlen);
24  unsigned (*decode_len) (const char *data, unsigned dlen);
25  unsigned (*encode) (const char *data, unsigned dlen, char *res);
26  unsigned (*decode) (const char *data, unsigned dlen, char *res);
27 };
28 
29 static const struct pg_encoding *pg_find_encoding(const char *name);
30 
31 /*
32  * SQL functions.
33  */
34 
35 Datum
37 {
38  bytea *data = PG_GETARG_BYTEA_PP(0);
40  text *result;
41  char *namebuf;
42  int datalen,
43  resultlen,
44  res;
45  const struct pg_encoding *enc;
46 
47  datalen = VARSIZE_ANY_EXHDR(data);
48 
49  namebuf = TextDatumGetCString(name);
50 
51  enc = pg_find_encoding(namebuf);
52  if (enc == NULL)
53  ereport(ERROR,
54  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
55  errmsg("unrecognized encoding: \"%s\"", namebuf)));
56 
57  resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
58  result = palloc(VARHDRSZ + resultlen);
59 
60  res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
61 
62  /* Make this FATAL 'cause we've trodden on memory ... */
63  if (res > resultlen)
64  elog(FATAL, "overflow - encode estimate too small");
65 
66  SET_VARSIZE(result, VARHDRSZ + res);
67 
68  PG_RETURN_TEXT_P(result);
69 }
70 
71 Datum
73 {
74  text *data = PG_GETARG_TEXT_PP(0);
76  bytea *result;
77  char *namebuf;
78  int datalen,
79  resultlen,
80  res;
81  const struct pg_encoding *enc;
82 
83  datalen = VARSIZE_ANY_EXHDR(data);
84 
85  namebuf = TextDatumGetCString(name);
86 
87  enc = pg_find_encoding(namebuf);
88  if (enc == NULL)
89  ereport(ERROR,
90  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
91  errmsg("unrecognized encoding: \"%s\"", namebuf)));
92 
93  resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
94  result = palloc(VARHDRSZ + resultlen);
95 
96  res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
97 
98  /* Make this FATAL 'cause we've trodden on memory ... */
99  if (res > resultlen)
100  elog(FATAL, "overflow - decode estimate too small");
101 
102  SET_VARSIZE(result, VARHDRSZ + res);
103 
104  PG_RETURN_BYTEA_P(result);
105 }
106 
107 
108 /*
109  * HEX
110  */
111 
112 static const char hextbl[] = "0123456789abcdef";
113 
114 static const int8 hexlookup[128] = {
115  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
116  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
119  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 };
124 
125 unsigned
126 hex_encode(const char *src, unsigned len, char *dst)
127 {
128  const char *end = src + len;
129 
130  while (src < end)
131  {
132  *dst++ = hextbl[(*src >> 4) & 0xF];
133  *dst++ = hextbl[*src & 0xF];
134  src++;
135  }
136  return len * 2;
137 }
138 
139 static inline char
140 get_hex(char c)
141 {
142  int res = -1;
143 
144  if (c > 0 && c < 127)
145  res = hexlookup[(unsigned char) c];
146 
147  if (res < 0)
148  ereport(ERROR,
149  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
150  errmsg("invalid hexadecimal digit: \"%c\"", c)));
151 
152  return (char) res;
153 }
154 
155 unsigned
156 hex_decode(const char *src, unsigned len, char *dst)
157 {
158  const char *s,
159  *srcend;
160  char v1,
161  v2,
162  *p;
163 
164  srcend = src + len;
165  s = src;
166  p = dst;
167  while (s < srcend)
168  {
169  if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
170  {
171  s++;
172  continue;
173  }
174  v1 = get_hex(*s++) << 4;
175  if (s >= srcend)
176  ereport(ERROR,
177  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
178  errmsg("invalid hexadecimal data: odd number of digits")));
179 
180  v2 = get_hex(*s++);
181  *p++ = v1 | v2;
182  }
183 
184  return p - dst;
185 }
186 
187 static unsigned
188 hex_enc_len(const char *src, unsigned srclen)
189 {
190  return srclen << 1;
191 }
192 
193 static unsigned
194 hex_dec_len(const char *src, unsigned srclen)
195 {
196  return srclen >> 1;
197 }
198 
199 /*
200  * BASE64
201  */
202 
203 static const char _base64[] =
204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
205 
206 static const int8 b64lookup[128] = {
207  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
208  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
209  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
210  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
211  -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
212  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
213  -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
214  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
215 };
216 
217 static unsigned
218 b64_encode(const char *src, unsigned len, char *dst)
219 {
220  char *p,
221  *lend = dst + 76;
222  const char *s,
223  *end = src + len;
224  int pos = 2;
225  uint32 buf = 0;
226 
227  s = src;
228  p = dst;
229 
230  while (s < end)
231  {
232  buf |= (unsigned char) *s << (pos << 3);
233  pos--;
234  s++;
235 
236  /* write it out */
237  if (pos < 0)
238  {
239  *p++ = _base64[(buf >> 18) & 0x3f];
240  *p++ = _base64[(buf >> 12) & 0x3f];
241  *p++ = _base64[(buf >> 6) & 0x3f];
242  *p++ = _base64[buf & 0x3f];
243 
244  pos = 2;
245  buf = 0;
246  }
247  if (p >= lend)
248  {
249  *p++ = '\n';
250  lend = p + 76;
251  }
252  }
253  if (pos != 2)
254  {
255  *p++ = _base64[(buf >> 18) & 0x3f];
256  *p++ = _base64[(buf >> 12) & 0x3f];
257  *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
258  *p++ = '=';
259  }
260 
261  return p - dst;
262 }
263 
264 static unsigned
265 b64_decode(const char *src, unsigned len, char *dst)
266 {
267  const char *srcend = src + len,
268  *s = src;
269  char *p = dst;
270  char c;
271  int b = 0;
272  uint32 buf = 0;
273  int pos = 0,
274  end = 0;
275 
276  while (s < srcend)
277  {
278  c = *s++;
279 
280  if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
281  continue;
282 
283  if (c == '=')
284  {
285  /* end sequence */
286  if (!end)
287  {
288  if (pos == 2)
289  end = 1;
290  else if (pos == 3)
291  end = 2;
292  else
293  ereport(ERROR,
294  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
295  errmsg("unexpected \"=\" while decoding base64 sequence")));
296  }
297  b = 0;
298  }
299  else
300  {
301  b = -1;
302  if (c > 0 && c < 127)
303  b = b64lookup[(unsigned char) c];
304  if (b < 0)
305  ereport(ERROR,
306  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
307  errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
308  }
309  /* add it to buffer */
310  buf = (buf << 6) + b;
311  pos++;
312  if (pos == 4)
313  {
314  *p++ = (buf >> 16) & 255;
315  if (end == 0 || end > 1)
316  *p++ = (buf >> 8) & 255;
317  if (end == 0 || end > 2)
318  *p++ = buf & 255;
319  buf = 0;
320  pos = 0;
321  }
322  }
323 
324  if (pos != 0)
325  ereport(ERROR,
326  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
327  errmsg("invalid base64 end sequence"),
328  errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
329 
330  return p - dst;
331 }
332 
333 
334 static unsigned
335 b64_enc_len(const char *src, unsigned srclen)
336 {
337  /* 3 bytes will be converted to 4, linefeed after 76 chars */
338  return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
339 }
340 
341 static unsigned
342 b64_dec_len(const char *src, unsigned srclen)
343 {
344  return (srclen * 3) >> 2;
345 }
346 
347 /*
348  * Escape
349  * Minimally escape bytea to text.
350  * De-escape text to bytea.
351  *
352  * We must escape zero bytes and high-bit-set bytes to avoid generating
353  * text that might be invalid in the current encoding, or that might
354  * change to something else if passed through an encoding conversion
355  * (leading to failing to de-escape to the original bytea value).
356  * Also of course backslash itself has to be escaped.
357  *
358  * De-escaping processes \\ and any \### octal
359  */
360 
361 #define VAL(CH) ((CH) - '0')
362 #define DIG(VAL) ((VAL) + '0')
363 
364 static unsigned
365 esc_encode(const char *src, unsigned srclen, char *dst)
366 {
367  const char *end = src + srclen;
368  char *rp = dst;
369  int len = 0;
370 
371  while (src < end)
372  {
373  unsigned char c = (unsigned char) *src;
374 
375  if (c == '\0' || IS_HIGHBIT_SET(c))
376  {
377  rp[0] = '\\';
378  rp[1] = DIG(c >> 6);
379  rp[2] = DIG((c >> 3) & 7);
380  rp[3] = DIG(c & 7);
381  rp += 4;
382  len += 4;
383  }
384  else if (c == '\\')
385  {
386  rp[0] = '\\';
387  rp[1] = '\\';
388  rp += 2;
389  len += 2;
390  }
391  else
392  {
393  *rp++ = c;
394  len++;
395  }
396 
397  src++;
398  }
399 
400  return len;
401 }
402 
403 static unsigned
404 esc_decode(const char *src, unsigned srclen, char *dst)
405 {
406  const char *end = src + srclen;
407  char *rp = dst;
408  int len = 0;
409 
410  while (src < end)
411  {
412  if (src[0] != '\\')
413  *rp++ = *src++;
414  else if (src + 3 < end &&
415  (src[1] >= '0' && src[1] <= '3') &&
416  (src[2] >= '0' && src[2] <= '7') &&
417  (src[3] >= '0' && src[3] <= '7'))
418  {
419  int val;
420 
421  val = VAL(src[1]);
422  val <<= 3;
423  val += VAL(src[2]);
424  val <<= 3;
425  *rp++ = val + VAL(src[3]);
426  src += 4;
427  }
428  else if (src + 1 < end &&
429  (src[1] == '\\'))
430  {
431  *rp++ = '\\';
432  src += 2;
433  }
434  else
435  {
436  /*
437  * One backslash, not followed by ### valid octal. Should never
438  * get here, since esc_dec_len does same check.
439  */
440  ereport(ERROR,
441  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
442  errmsg("invalid input syntax for type %s", "bytea")));
443  }
444 
445  len++;
446  }
447 
448  return len;
449 }
450 
451 static unsigned
452 esc_enc_len(const char *src, unsigned srclen)
453 {
454  const char *end = src + srclen;
455  int len = 0;
456 
457  while (src < end)
458  {
459  if (*src == '\0' || IS_HIGHBIT_SET(*src))
460  len += 4;
461  else if (*src == '\\')
462  len += 2;
463  else
464  len++;
465 
466  src++;
467  }
468 
469  return len;
470 }
471 
472 static unsigned
473 esc_dec_len(const char *src, unsigned srclen)
474 {
475  const char *end = src + srclen;
476  int len = 0;
477 
478  while (src < end)
479  {
480  if (src[0] != '\\')
481  src++;
482  else if (src + 3 < end &&
483  (src[1] >= '0' && src[1] <= '3') &&
484  (src[2] >= '0' && src[2] <= '7') &&
485  (src[3] >= '0' && src[3] <= '7'))
486  {
487  /*
488  * backslash + valid octal
489  */
490  src += 4;
491  }
492  else if (src + 1 < end &&
493  (src[1] == '\\'))
494  {
495  /*
496  * two backslashes = backslash
497  */
498  src += 2;
499  }
500  else
501  {
502  /*
503  * one backslash, not followed by ### valid octal
504  */
505  ereport(ERROR,
506  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
507  errmsg("invalid input syntax for type %s", "bytea")));
508  }
509 
510  len++;
511  }
512  return len;
513 }
514 
515 /*
516  * Common
517  */
518 
519 static const struct
520 {
521  const char *name;
522  struct pg_encoding enc;
523 } enclist[] =
524 
525 {
526  {
527  "hex",
528  {
530  }
531  },
532  {
533  "base64",
534  {
536  }
537  },
538  {
539  "escape",
540  {
542  }
543  },
544  {
545  NULL,
546  {
547  NULL, NULL, NULL, NULL
548  }
549  }
550 };
551 
552 static const struct pg_encoding *
553 pg_find_encoding(const char *name)
554 {
555  int i;
556 
557  for (i = 0; enclist[i].name; i++)
558  if (pg_strcasecmp(enclist[i].name, name) == 0)
559  return &enclist[i].enc;
560 
561  return NULL;
562 }
int errhint(const char *fmt,...)
Definition: elog.c:987
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define VARDATA(PTR)
Definition: postgres.h:303
static unsigned b64_enc_len(const char *src, unsigned srclen)
Definition: encode.c:335
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:225
#define VARHDRSZ
Definition: c.h:445
static unsigned b64_dec_len(const char *src, unsigned srclen)
Definition: encode.c:342
static unsigned esc_enc_len(const char *src, unsigned srclen)
Definition: encode.c:452
static unsigned b64_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:218
int errcode(int sqlerrcode)
Definition: elog.c:575
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:321
static const char _base64[]
Definition: encode.c:203
static const int8 hexlookup[128]
Definition: encode.c:114
unsigned(* encode_len)(const char *data, unsigned dlen)
Definition: encode.c:23
unsigned(* decode)(const char *data, unsigned dlen, char *res)
Definition: encode.c:26
static unsigned esc_encode(const char *src, unsigned srclen, char *dst)
Definition: encode.c:365
unsigned hex_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:156
Datum binary_encode(PG_FUNCTION_ARGS)
Definition: encode.c:36
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:265
unsigned hex_encode(const char *src, unsigned len, char *dst)
Definition: encode.c:126
#define IS_HIGHBIT_SET(ch)
Definition: c.h:973
#define ERROR
Definition: elog.h:43
unsigned(* decode_len)(const char *data, unsigned dlen)
Definition: encode.c:24
struct pg_encoding enc
Definition: encode.c:522
#define FATAL
Definition: elog.h:52
unsigned(* encode)(const char *data, unsigned dlen, char *res)
Definition: encode.c:25
static unsigned b64_decode(const char *src, unsigned len, char *dst)
Definition: encode.c:265
static unsigned hex_dec_len(const char *src, unsigned srclen)
Definition: encode.c:194
char * c
static char * buf
Definition: pg_test_fsync.c:65
static const char hextbl[]
Definition: encode.c:112
unsigned int uint32
Definition: c.h:268
static const struct @26 enclist[]
static const struct pg_encoding * pg_find_encoding(const char *name)
Definition: encode.c:553
#define ereport(elevel, rest)
Definition: elog.h:122
signed char int8
Definition: c.h:254
#define TextDatumGetCString(d)
Definition: builtins.h:92
uintptr_t Datum
Definition: postgres.h:372
static char get_hex(char c)
Definition: encode.c:140
#define VAL(CH)
Definition: encode.c:361
static unsigned hex_enc_len(const char *src, unsigned srclen)
Definition: encode.c:188
static unsigned esc_decode(const char *src, unsigned srclen, char *dst)
Definition: encode.c:404
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:322
#define NULL
Definition: c.h:229
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:264
const char * name
Definition: encode.c:521
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define DIG(VAL)
Definition: encode.c:362
static unsigned esc_dec_len(const char *src, unsigned srclen)
Definition: encode.c:473
int i
Definition: c.h:439
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
Datum binary_decode(PG_FUNCTION_ARGS)
Definition: encode.c:72
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
#define elog
Definition: elog.h:219
static const int8 b64lookup[128]
Definition: encode.c:206
long val
Definition: informix.c:689