PostgreSQL Source Code  git master
saslprep.c File Reference
#include "postgres.h"
#include "common/saslprep.h"
#include "common/string.h"
#include "common/unicode_norm.h"
#include "mb/pg_wchar.h"
Include dependency graph for saslprep.c:

Go to the source code of this file.

Macros

#define STRDUP(s)   pstrdup(s)
 
#define ALLOC(size)   palloc(size)
 
#define FREE(size)   pfree(size)
 
#define IS_CODE_IN_TABLE(code, map)   is_code_in_table(code, map, lengthof(map))
 

Functions

static int codepoint_range_cmp (const void *a, const void *b)
 
static bool is_code_in_table (pg_wchar code, const pg_wchar *map, int mapsize)
 
static int pg_utf8_string_len (const char *source)
 
pg_saslprep_rc pg_saslprep (const char *input, char **output)
 

Variables

static const pg_wchar non_ascii_space_ranges []
 
static const pg_wchar commonly_mapped_to_nothing_ranges []
 
static const pg_wchar prohibited_output_ranges []
 
static const pg_wchar unassigned_codepoint_ranges []
 
static const pg_wchar RandALCat_codepoint_ranges []
 
static const pg_wchar LCat_codepoint_ranges []
 

Macro Definition Documentation

◆ ALLOC

#define ALLOC (   size)    palloc(size)

Definition at line 39 of file saslprep.c.

◆ FREE

#define FREE (   size)    pfree(size)

Definition at line 40 of file saslprep.c.

◆ IS_CODE_IN_TABLE

#define IS_CODE_IN_TABLE (   code,
  map 
)    is_code_in_table(code, map, lengthof(map))

Definition at line 965 of file saslprep.c.

◆ STRDUP

#define STRDUP (   s)    pstrdup(s)

Definition at line 38 of file saslprep.c.

Function Documentation

◆ codepoint_range_cmp()

static int codepoint_range_cmp ( const void *  a,
const void *  b 
)
static

Definition at line 968 of file saslprep.c.

969 {
970  const pg_wchar *key = (const pg_wchar *) a;
971  const pg_wchar *range = (const pg_wchar *) b;
972 
973  if (*key < range[0])
974  return -1; /* less than lower bound */
975  if (*key > range[1])
976  return 1; /* greater than upper bound */
977 
978  return 0; /* within range */
979 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69
unsigned int pg_wchar
Definition: mbprint.c:31
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412

References a, b, sort-test::key, and range().

Referenced by is_code_in_table().

◆ is_code_in_table()

static bool is_code_in_table ( pg_wchar  code,
const pg_wchar map,
int  mapsize 
)
static

Definition at line 982 of file saslprep.c.

983 {
984  Assert(mapsize % 2 == 0);
985 
986  if (code < map[0] || code > map[mapsize - 1])
987  return false;
988 
989  if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
991  return true;
992  else
993  return false;
994 }
#define Assert(condition)
Definition: c.h:858
static int codepoint_range_cmp(const void *a, const void *b)
Definition: saslprep.c:968

References Assert, and codepoint_range_cmp().

◆ pg_saslprep()

pg_saslprep_rc pg_saslprep ( const char *  input,
char **  output 
)

Definition at line 1044 of file saslprep.c.

1045 {
1046  pg_wchar *input_chars = NULL;
1047  pg_wchar *output_chars = NULL;
1048  int input_size;
1049  char *result;
1050  int result_size;
1051  int count;
1052  int i;
1053  bool contains_RandALCat;
1054  unsigned char *p;
1055  pg_wchar *wp;
1056 
1057  /* Ensure we return *output as NULL on failure */
1058  *output = NULL;
1059 
1060  /*
1061  * Quick check if the input is pure ASCII. An ASCII string requires no
1062  * further processing.
1063  */
1064  if (pg_is_ascii(input))
1065  {
1066  *output = STRDUP(input);
1067  if (!(*output))
1068  goto oom;
1069  return SASLPREP_SUCCESS;
1070  }
1071 
1072  /*
1073  * Convert the input from UTF-8 to an array of Unicode codepoints.
1074  *
1075  * This also checks that the input is a legal UTF-8 string.
1076  */
1077  input_size = pg_utf8_string_len(input);
1078  if (input_size < 0)
1079  return SASLPREP_INVALID_UTF8;
1080 
1081  input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1082  if (!input_chars)
1083  goto oom;
1084 
1085  p = (unsigned char *) input;
1086  for (i = 0; i < input_size; i++)
1087  {
1088  input_chars[i] = utf8_to_unicode(p);
1089  p += pg_utf_mblen(p);
1090  }
1091  input_chars[i] = (pg_wchar) '\0';
1092 
1093  /*
1094  * The steps below correspond to the steps listed in [RFC3454], Section
1095  * "2. Preparation Overview"
1096  */
1097 
1098  /*
1099  * 1) Map -- For each character in the input, check if it has a mapping
1100  * and, if so, replace it with its mapping.
1101  */
1102  count = 0;
1103  for (i = 0; i < input_size; i++)
1104  {
1105  pg_wchar code = input_chars[i];
1106 
1108  input_chars[count++] = 0x0020;
1110  {
1111  /* map to nothing */
1112  }
1113  else
1114  input_chars[count++] = code;
1115  }
1116  input_chars[count] = (pg_wchar) '\0';
1117  input_size = count;
1118 
1119  if (input_size == 0)
1120  goto prohibited; /* don't allow empty password */
1121 
1122  /*
1123  * 2) Normalize -- Normalize the result of step 1 using Unicode
1124  * normalization.
1125  */
1126  output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
1127  if (!output_chars)
1128  goto oom;
1129 
1130  /*
1131  * 3) Prohibit -- Check for any characters that are not allowed in the
1132  * output. If any are found, return an error.
1133  */
1134  for (i = 0; i < input_size; i++)
1135  {
1136  pg_wchar code = input_chars[i];
1137 
1139  goto prohibited;
1141  goto prohibited;
1142  }
1143 
1144  /*
1145  * 4) Check bidi -- Possibly check for right-to-left characters, and if
1146  * any are found, make sure that the whole string satisfies the
1147  * requirements for bidirectional strings. If the string does not satisfy
1148  * the requirements for bidirectional strings, return an error.
1149  *
1150  * [RFC3454], Section "6. Bidirectional Characters" explains in more
1151  * detail what that means:
1152  *
1153  * "In any profile that specifies bidirectional character handling, all
1154  * three of the following requirements MUST be met:
1155  *
1156  * 1) The characters in section 5.8 MUST be prohibited.
1157  *
1158  * 2) If a string contains any RandALCat character, the string MUST NOT
1159  * contain any LCat character.
1160  *
1161  * 3) If a string contains any RandALCat character, a RandALCat character
1162  * MUST be the first character of the string, and a RandALCat character
1163  * MUST be the last character of the string."
1164  */
1165  contains_RandALCat = false;
1166  for (i = 0; i < input_size; i++)
1167  {
1168  pg_wchar code = input_chars[i];
1169 
1171  {
1172  contains_RandALCat = true;
1173  break;
1174  }
1175  }
1176 
1177  if (contains_RandALCat)
1178  {
1179  pg_wchar first = input_chars[0];
1180  pg_wchar last = input_chars[input_size - 1];
1181 
1182  for (i = 0; i < input_size; i++)
1183  {
1184  pg_wchar code = input_chars[i];
1185 
1187  goto prohibited;
1188  }
1189 
1192  goto prohibited;
1193  }
1194 
1195  /*
1196  * Finally, convert the result back to UTF-8.
1197  */
1198  result_size = 0;
1199  for (wp = output_chars; *wp; wp++)
1200  {
1201  unsigned char buf[4];
1202 
1203  unicode_to_utf8(*wp, buf);
1204  result_size += pg_utf_mblen(buf);
1205  }
1206 
1207  result = ALLOC(result_size + 1);
1208  if (!result)
1209  goto oom;
1210 
1211  /*
1212  * There are no error exits below here, so the error exit paths don't need
1213  * to worry about possibly freeing "result".
1214  */
1215  p = (unsigned char *) result;
1216  for (wp = output_chars; *wp; wp++)
1217  {
1218  unicode_to_utf8(*wp, p);
1219  p += pg_utf_mblen(p);
1220  }
1221  Assert((char *) p == result + result_size);
1222  *p = '\0';
1223 
1224  FREE(input_chars);
1225  FREE(output_chars);
1226 
1227  *output = result;
1228  return SASLPREP_SUCCESS;
1229 
1230 prohibited:
1231  if (input_chars)
1232  FREE(input_chars);
1233  if (output_chars)
1234  FREE(output_chars);
1235 
1236  return SASLPREP_PROHIBITED;
1237 
1238 oom:
1239  if (input_chars)
1240  FREE(input_chars);
1241  if (output_chars)
1242  FREE(output_chars);
1243 
1244  return SASLPREP_OOM;
1245 }
FILE * input
FILE * output
int i
Definition: isn.c:73
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
static char * buf
Definition: pg_test_fsync.c:73
#define pg_utf_mblen
Definition: pg_wchar.h:633
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static const pg_wchar unassigned_codepoint_ranges[]
Definition: saslprep.c:157
static const pg_wchar non_ascii_space_ranges[]
Definition: saslprep.c:66
static const pg_wchar RandALCat_codepoint_ranges[]
Definition: saslprep.c:558
#define STRDUP(s)
Definition: saslprep.c:38
#define IS_CODE_IN_TABLE(code, map)
Definition: saslprep.c:965
#define ALLOC(size)
Definition: saslprep.c:39
#define FREE(size)
Definition: saslprep.c:40
static const pg_wchar LCat_codepoint_ranges[]
Definition: saslprep.c:597
static const pg_wchar commonly_mapped_to_nothing_ranges[]
Definition: saslprep.c:81
static const pg_wchar prohibited_output_ranges[]
Definition: saslprep.c:116
static int pg_utf8_string_len(const char *source)
Definition: saslprep.c:1002
@ SASLPREP_INVALID_UTF8
Definition: saslprep.h:24
@ SASLPREP_PROHIBITED
Definition: saslprep.h:25
@ SASLPREP_OOM
Definition: saslprep.h:23
@ SASLPREP_SUCCESS
Definition: saslprep.h:22
bool pg_is_ascii(const char *str)
Definition: string.c:133
pg_wchar * unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
Definition: unicode_norm.c:402
@ UNICODE_NFKC
Definition: unicode_norm.h:23

References ALLOC, Assert, buf, commonly_mapped_to_nothing_ranges, FREE, i, input, IS_CODE_IN_TABLE, LCat_codepoint_ranges, non_ascii_space_ranges, output, pg_is_ascii(), pg_utf8_string_len(), pg_utf_mblen, prohibited_output_ranges, RandALCat_codepoint_ranges, SASLPREP_INVALID_UTF8, SASLPREP_OOM, SASLPREP_PROHIBITED, SASLPREP_SUCCESS, STRDUP, unassigned_codepoint_ranges, UNICODE_NFKC, unicode_normalize(), unicode_to_utf8(), and utf8_to_unicode().

Referenced by pg_be_scram_build_secret(), pg_fe_scram_build_secret(), scram_init(), and scram_verify_plain_password().

◆ pg_utf8_string_len()

static int pg_utf8_string_len ( const char *  source)
static

Definition at line 1002 of file saslprep.c.

1003 {
1004  const unsigned char *p = (const unsigned char *) source;
1005  int l;
1006  int num_chars = 0;
1007 
1008  while (*p)
1009  {
1010  l = pg_utf_mblen(p);
1011 
1012  if (!pg_utf8_islegal(p, l))
1013  return -1;
1014 
1015  p += l;
1016  num_chars++;
1017  }
1018 
1019  return num_chars;
1020 }
static rewind_source * source
Definition: pg_rewind.c:89
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1953

References pg_utf8_islegal(), pg_utf_mblen, and source.

Referenced by pg_saslprep().

Variable Documentation

◆ commonly_mapped_to_nothing_ranges

const pg_wchar commonly_mapped_to_nothing_ranges[]
static
Initial value:
=
{
0x00AD, 0x00AD,
0x034F, 0x034F,
0x1806, 0x1806,
0x180B, 0x180D,
0x200B, 0x200D,
0x2060, 0x2060,
0xFE00, 0xFE0F,
0xFEFF, 0xFEFF
}

Definition at line 81 of file saslprep.c.

Referenced by pg_saslprep().

◆ LCat_codepoint_ranges

const pg_wchar LCat_codepoint_ranges[]
static

Definition at line 597 of file saslprep.c.

Referenced by pg_saslprep().

◆ non_ascii_space_ranges

const pg_wchar non_ascii_space_ranges[]
static
Initial value:
=
{
0x00A0, 0x00A0,
0x1680, 0x1680,
0x2000, 0x200B,
0x202F, 0x202F,
0x205F, 0x205F,
0x3000, 0x3000
}

Definition at line 66 of file saslprep.c.

Referenced by pg_saslprep().

◆ prohibited_output_ranges

const pg_wchar prohibited_output_ranges[]
static

Definition at line 116 of file saslprep.c.

Referenced by pg_saslprep().

◆ RandALCat_codepoint_ranges

const pg_wchar RandALCat_codepoint_ranges[]
static

Definition at line 558 of file saslprep.c.

Referenced by pg_saslprep().

◆ unassigned_codepoint_ranges

const pg_wchar unassigned_codepoint_ranges[]
static

Definition at line 157 of file saslprep.c.

Referenced by pg_saslprep().