PostgreSQL Source Code git master
saslprep.c File Reference
#include "postgres.h"
#include "utils/memutils.h"
#include "common/saslprep.h"
#include "common/string.h"
#include "common/unicode_norm.h"
#include "mb/pg_wchar.h"
Include dependency graph for saslprep.c:

Go to the source code of this file.

Macros

#define STRDUP(s)   pstrdup(s)
 
#define ALLOC(size)   palloc(size)
 
#define FREE(size)   pfree(size)
 
#define IS_CODE_IN_TABLE(code, map)   is_code_in_table(code, map, lengthof(map))
 

Functions

static int codepoint_range_cmp (const void *a, const void *b)
 
static bool is_code_in_table (char32_t code, const char32_t *map, int mapsize)
 
static int pg_utf8_string_len (const char *source)
 
pg_saslprep_rc pg_saslprep (const char *input, char **output)
 

Variables

static const char32_t non_ascii_space_ranges []
 
static const char32_t commonly_mapped_to_nothing_ranges []
 
static const char32_t prohibited_output_ranges []
 
static const char32_t unassigned_codepoint_ranges []
 
static const char32_t RandALCat_codepoint_ranges []
 
static const char32_t LCat_codepoint_ranges []
 

Macro Definition Documentation

◆ ALLOC

#define ALLOC (   size)    palloc(size)

Definition at line 40 of file saslprep.c.

◆ FREE

#define FREE (   size)    pfree(size)

Definition at line 41 of file saslprep.c.

◆ IS_CODE_IN_TABLE

#define IS_CODE_IN_TABLE (   code,
  map 
)    is_code_in_table(code, map, lengthof(map))

Definition at line 966 of file saslprep.c.

◆ STRDUP

#define STRDUP (   s)    pstrdup(s)

Definition at line 39 of file saslprep.c.

Function Documentation

◆ codepoint_range_cmp()

static int codepoint_range_cmp ( const void *  a,
const void *  b 
)
static

Definition at line 969 of file saslprep.c.

970{
971 const char32_t *key = (const char32_t *) a;
972 const char32_t *range = (const char32_t *) b;
973
974 if (*key < range[0])
975 return -1; /* less than lower bound */
976 if (*key > range[1])
977 return 1; /* greater than upper bound */
978
979 return 0; /* within range */
980}
int b
Definition: isn.c:74
int a
Definition: isn.c:73
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412

References a, b, sort-test::key, and range().

Referenced by is_code_in_table().

◆ is_code_in_table()

static bool is_code_in_table ( char32_t  code,
const char32_t map,
int  mapsize 
)
static

Definition at line 983 of file saslprep.c.

984{
985 Assert(mapsize % 2 == 0);
986
987 if (code < map[0] || code > map[mapsize - 1])
988 return false;
989
990 if (bsearch(&code, map, mapsize / 2, sizeof(char32_t) * 2,
992 return true;
993 else
994 return false;
995}
Assert(PointerIsAligned(start, uint64))
static int codepoint_range_cmp(const void *a, const void *b)
Definition: saslprep.c:969

References Assert(), and codepoint_range_cmp().

◆ pg_saslprep()

pg_saslprep_rc pg_saslprep ( const char *  input,
char **  output 
)

Definition at line 1047 of file saslprep.c.

1048{
1049 char32_t *input_chars = NULL;
1050 char32_t *output_chars = NULL;
1051 int input_size;
1052 char *result;
1053 int result_size;
1054 int count;
1055 int i;
1056 bool contains_RandALCat;
1057 const unsigned char *p;
1058 unsigned char *outp;
1059 char32_t *wp;
1060
1061 /* Ensure we return *output as NULL on failure */
1062 *output = NULL;
1063
1064 /*
1065 * Quick check if the input is pure ASCII. An ASCII string requires no
1066 * further processing.
1067 */
1068 if (pg_is_ascii(input))
1069 {
1070 *output = STRDUP(input);
1071 if (!(*output))
1072 goto oom;
1073 return SASLPREP_SUCCESS;
1074 }
1075
1076 /*
1077 * Convert the input from UTF-8 to an array of Unicode codepoints.
1078 *
1079 * This also checks that the input is a legal UTF-8 string.
1080 */
1081 input_size = pg_utf8_string_len(input);
1082 if (input_size < 0)
1083 return SASLPREP_INVALID_UTF8;
1084 if (input_size >= MaxAllocSize / sizeof(char32_t))
1085 goto oom;
1086
1087 input_chars = ALLOC((input_size + 1) * sizeof(char32_t));
1088 if (!input_chars)
1089 goto oom;
1090
1091 p = (const unsigned char *) input;
1092 for (i = 0; i < input_size; i++)
1093 {
1094 input_chars[i] = utf8_to_unicode(p);
1095 p += pg_utf_mblen(p);
1096 }
1097 input_chars[i] = (char32_t) '\0';
1098
1099 /*
1100 * The steps below correspond to the steps listed in [RFC3454], Section
1101 * "2. Preparation Overview"
1102 */
1103
1104 /*
1105 * 1) Map -- For each character in the input, check if it has a mapping
1106 * and, if so, replace it with its mapping.
1107 */
1108 count = 0;
1109 for (i = 0; i < input_size; i++)
1110 {
1111 char32_t code = input_chars[i];
1112
1114 input_chars[count++] = 0x0020;
1116 {
1117 /* map to nothing */
1118 }
1119 else
1120 input_chars[count++] = code;
1121 }
1122 input_chars[count] = (char32_t) '\0';
1123 input_size = count;
1124
1125 if (input_size == 0)
1126 goto prohibited; /* don't allow empty password */
1127
1128 /*
1129 * 2) Normalize -- Normalize the result of step 1 using Unicode
1130 * normalization.
1131 */
1132 output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
1133 if (!output_chars)
1134 goto oom;
1135
1136 /*
1137 * 3) Prohibit -- Check for any characters that are not allowed in the
1138 * output. If any are found, return an error.
1139 */
1140 for (i = 0; i < input_size; i++)
1141 {
1142 char32_t code = input_chars[i];
1143
1145 goto prohibited;
1147 goto prohibited;
1148 }
1149
1150 /*
1151 * 4) Check bidi -- Possibly check for right-to-left characters, and if
1152 * any are found, make sure that the whole string satisfies the
1153 * requirements for bidirectional strings. If the string does not satisfy
1154 * the requirements for bidirectional strings, return an error.
1155 *
1156 * [RFC3454], Section "6. Bidirectional Characters" explains in more
1157 * detail what that means:
1158 *
1159 * "In any profile that specifies bidirectional character handling, all
1160 * three of the following requirements MUST be met:
1161 *
1162 * 1) The characters in section 5.8 MUST be prohibited.
1163 *
1164 * 2) If a string contains any RandALCat character, the string MUST NOT
1165 * contain any LCat character.
1166 *
1167 * 3) If a string contains any RandALCat character, a RandALCat character
1168 * MUST be the first character of the string, and a RandALCat character
1169 * MUST be the last character of the string."
1170 */
1171 contains_RandALCat = false;
1172 for (i = 0; i < input_size; i++)
1173 {
1174 char32_t code = input_chars[i];
1175
1177 {
1178 contains_RandALCat = true;
1179 break;
1180 }
1181 }
1182
1183 if (contains_RandALCat)
1184 {
1185 char32_t first = input_chars[0];
1186 char32_t last = input_chars[input_size - 1];
1187
1188 for (i = 0; i < input_size; i++)
1189 {
1190 char32_t code = input_chars[i];
1191
1193 goto prohibited;
1194 }
1195
1198 goto prohibited;
1199 }
1200
1201 /*
1202 * Finally, convert the result back to UTF-8.
1203 */
1204 result_size = 0;
1205 for (wp = output_chars; *wp; wp++)
1206 {
1207 unsigned char buf[4];
1208
1209 unicode_to_utf8(*wp, buf);
1210 result_size += pg_utf_mblen(buf);
1211 }
1212
1213 result = ALLOC(result_size + 1);
1214 if (!result)
1215 goto oom;
1216
1217 /*
1218 * There are no error exits below here, so the error exit paths don't need
1219 * to worry about possibly freeing "result".
1220 */
1221 outp = (unsigned char *) result;
1222 for (wp = output_chars; *wp; wp++)
1223 {
1224 unicode_to_utf8(*wp, outp);
1225 outp += pg_utf_mblen(outp);
1226 }
1227 Assert((char *) outp == result + result_size);
1228 *outp = '\0';
1229
1230 FREE(input_chars);
1231 FREE(output_chars);
1232
1233 *output = result;
1234 return SASLPREP_SUCCESS;
1235
1236prohibited:
1237 if (input_chars)
1238 FREE(input_chars);
1239 if (output_chars)
1240 FREE(output_chars);
1241
1242 return SASLPREP_PROHIBITED;
1243
1244oom:
1245 if (input_chars)
1246 FREE(input_chars);
1247 if (output_chars)
1248 FREE(output_chars);
1249
1250 return SASLPREP_OOM;
1251}
uint32_t char32_t
Definition: c.h:1377
#define MaxAllocSize
Definition: fe_memutils.h:22
FILE * input
FILE * output
int i
Definition: isn.c:77
static char32_t utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
#define pg_utf_mblen
Definition: pg_wchar.h:633
static unsigned char * unicode_to_utf8(char32_t c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static const char32_t LCat_codepoint_ranges[]
Definition: saslprep.c:598
static const char32_t prohibited_output_ranges[]
Definition: saslprep.c:117
static const char32_t commonly_mapped_to_nothing_ranges[]
Definition: saslprep.c:82
#define STRDUP(s)
Definition: saslprep.c:39
#define IS_CODE_IN_TABLE(code, map)
Definition: saslprep.c:966
static const char32_t unassigned_codepoint_ranges[]
Definition: saslprep.c:158
#define ALLOC(size)
Definition: saslprep.c:40
#define FREE(size)
Definition: saslprep.c:41
static const char32_t RandALCat_codepoint_ranges[]
Definition: saslprep.c:559
static int pg_utf8_string_len(const char *source)
Definition: saslprep.c:1003
static const char32_t non_ascii_space_ranges[]
Definition: saslprep.c:67
@ SASLPREP_INVALID_UTF8
Definition: saslprep.h:24
@ SASLPREP_PROHIBITED
Definition: saslprep.h:25
@ SASLPREP_OOM
Definition: saslprep.h:23
@ SASLPREP_SUCCESS
Definition: saslprep.h:22
bool pg_is_ascii(const char *str)
Definition: string.c:132
char32_t * unicode_normalize(UnicodeNormalizationForm form, const char32_t *input)
Definition: unicode_norm.c:402
@ UNICODE_NFKC
Definition: unicode_norm.h:21

References ALLOC, Assert(), buf, commonly_mapped_to_nothing_ranges, FREE, i, input, IS_CODE_IN_TABLE, LCat_codepoint_ranges, MaxAllocSize, non_ascii_space_ranges, output, pg_is_ascii(), pg_utf8_string_len(), pg_utf_mblen, prohibited_output_ranges, RandALCat_codepoint_ranges, SASLPREP_INVALID_UTF8, SASLPREP_OOM, SASLPREP_PROHIBITED, SASLPREP_SUCCESS, STRDUP, unassigned_codepoint_ranges, UNICODE_NFKC, unicode_normalize(), unicode_to_utf8(), and utf8_to_unicode().

Referenced by pg_be_scram_build_secret(), pg_fe_scram_build_secret(), scram_init(), and scram_verify_plain_password().

◆ pg_utf8_string_len()

static int pg_utf8_string_len ( const char *  source)
static

Definition at line 1003 of file saslprep.c.

1004{
1005 const unsigned char *p = (const unsigned char *) source;
1006 int l;
1007 int num_chars = 0;
1008 size_t len = strlen(source);
1009
1010 while (len)
1011 {
1012 l = pg_utf_mblen(p);
1013
1014 if (len < l || !pg_utf8_islegal(p, l))
1015 return -1;
1016
1017 p += l;
1018 len -= l;
1019 num_chars++;
1020 }
1021
1022 return num_chars;
1023}
const void size_t len
static rewind_source * source
Definition: pg_rewind.c:89
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:1989

References len, pg_utf8_islegal(), pg_utf_mblen, and source.

Referenced by pg_saslprep().

Variable Documentation

◆ commonly_mapped_to_nothing_ranges

const char32_t commonly_mapped_to_nothing_ranges[]
static
Initial value:
=
{
0x00AD, 0x00AD,
0x034F, 0x034F,
0x1806, 0x1806,
0x180B, 0x180D,
0x200B, 0x200D,
0x2060, 0x2060,
0xFE00, 0xFE0F,
0xFEFF, 0xFEFF
}

Definition at line 82 of file saslprep.c.

Referenced by pg_saslprep().

◆ LCat_codepoint_ranges

const char32_t LCat_codepoint_ranges[]
static

Definition at line 598 of file saslprep.c.

Referenced by pg_saslprep().

◆ non_ascii_space_ranges

const char32_t non_ascii_space_ranges[]
static
Initial value:
=
{
0x00A0, 0x00A0,
0x1680, 0x1680,
0x2000, 0x200B,
0x202F, 0x202F,
0x205F, 0x205F,
0x3000, 0x3000
}

Definition at line 67 of file saslprep.c.

Referenced by pg_saslprep().

◆ prohibited_output_ranges

const char32_t prohibited_output_ranges[]
static

Definition at line 117 of file saslprep.c.

Referenced by pg_saslprep().

◆ RandALCat_codepoint_ranges

const char32_t RandALCat_codepoint_ranges[]
static

Definition at line 559 of file saslprep.c.

Referenced by pg_saslprep().

◆ unassigned_codepoint_ranges

const char32_t unassigned_codepoint_ranges[]
static

Definition at line 158 of file saslprep.c.

Referenced by pg_saslprep().