PostgreSQL Source Code  git master
saslprep.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  * saslprep.c
3  * SASLprep normalization, for SCRAM authentication
4  *
5  * The SASLprep algorithm is used to process a user-supplied password into
6  * canonical form. For more details, see:
7  *
8  * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
9  * http://www.ietf.org/rfc/rfc3454.txt
10  *
11  * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
12  * http://www.ietf.org/rfc/rfc4013.txt
13  *
14  *
15  * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
16  *
17  * IDENTIFICATION
18  * src/common/saslprep.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 #ifndef FRONTEND
23 #include "postgres.h"
24 #else
25 #include "postgres_fe.h"
26 #endif
27 
28 #include "common/saslprep.h"
29 #include "common/string.h"
30 #include "common/unicode_norm.h"
31 #include "mb/pg_wchar.h"
32 
33 /*
34  * In backend, we will use palloc/pfree. In frontend, use malloc, and
35  * return SASLPREP_OOM on out-of-memory.
36  */
37 #ifndef FRONTEND
38 #define STRDUP(s) pstrdup(s)
39 #define ALLOC(size) palloc(size)
40 #define FREE(size) pfree(size)
41 #else
42 #define STRDUP(s) strdup(s)
43 #define ALLOC(size) malloc(size)
44 #define FREE(size) free(size)
45 #endif
46 
47 /* Prototypes for local functions */
48 static int codepoint_range_cmp(const void *a, const void *b);
49 static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
50 static int pg_utf8_string_len(const char *source);
51 
52 /*
53  * Stringprep Mapping Tables.
54  *
55  * The stringprep specification includes a number of tables of Unicode
56  * codepoints, used in different parts of the algorithm. They are below,
57  * as arrays of codepoint ranges. Each range is a pair of codepoints,
58  * for the first and last codepoint included the range (inclusive!).
59  */
60 
61 /*
62  * C.1.2 Non-ASCII space characters
63  *
64  * These are all mapped to the ASCII space character (U+00A0).
65  */
67 {
68  0x00A0, 0x00A0,
69  0x1680, 0x1680,
70  0x2000, 0x200B,
71  0x202F, 0x202F,
72  0x205F, 0x205F,
73  0x3000, 0x3000
74 };
75 
76 /*
77  * B.1 Commonly mapped to nothing
78  *
79  * If any of these appear in the input, they are removed.
80  */
82 {
83  0x00AD, 0x00AD,
84  0x034F, 0x034F,
85  0x1806, 0x1806,
86  0x180B, 0x180D,
87  0x200B, 0x200D,
88  0x2060, 0x2060,
89  0xFE00, 0xFE0F,
90  0xFEFF, 0xFEFF
91 };
92 
93 /*
94  * prohibited_output_ranges is a union of all the characters from
95  * the following tables:
96  *
97  * C.1.2 Non-ASCII space characters
98  * C.2.1 ASCII control characters
99  * C.2.2 Non-ASCII control characters
100  * C.3 Private Use characters
101  * C.4 Non-character code points
102  * C.5 Surrogate code points
103  * C.6 Inappropriate for plain text characters
104  * C.7 Inappropriate for canonical representation characters
105  * C.7 Change display properties or deprecated characters
106  * C.8 Tagging characters
107  *
108  * These are the tables that are listed as "prohibited output"
109  * characters in the SASLprep profile.
110  *
111  * The comment after each code range indicates which source table
112  * the code came from. Note that there is some overlap in the source
113  * tables, so one code might originate from multiple source tables.
114  * Adjacent ranges have also been merged together, to save space.
115  */
117 {
118  0x0000, 0x001F, /* C.2.1 */
119  0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
120  0x0340, 0x0341, /* C.8 */
121  0x06DD, 0x06DD, /* C.2.2 */
122  0x070F, 0x070F, /* C.2.2 */
123  0x1680, 0x1680, /* C.1.2 */
124  0x180E, 0x180E, /* C.2.2 */
125  0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
126  0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
127  0x205F, 0x2063, /* C.1.2, C.2.2 */
128  0x206A, 0x206F, /* C.2.2, C.8 */
129  0x2FF0, 0x2FFB, /* C.7 */
130  0x3000, 0x3000, /* C.1.2 */
131  0xD800, 0xF8FF, /* C.3, C.5 */
132  0xFDD0, 0xFDEF, /* C.4 */
133  0xFEFF, 0xFEFF, /* C.2.2 */
134  0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
135  0x1D173, 0x1D17A, /* C.2.2 */
136  0x1FFFE, 0x1FFFF, /* C.4 */
137  0x2FFFE, 0x2FFFF, /* C.4 */
138  0x3FFFE, 0x3FFFF, /* C.4 */
139  0x4FFFE, 0x4FFFF, /* C.4 */
140  0x5FFFE, 0x5FFFF, /* C.4 */
141  0x6FFFE, 0x6FFFF, /* C.4 */
142  0x7FFFE, 0x7FFFF, /* C.4 */
143  0x8FFFE, 0x8FFFF, /* C.4 */
144  0x9FFFE, 0x9FFFF, /* C.4 */
145  0xAFFFE, 0xAFFFF, /* C.4 */
146  0xBFFFE, 0xBFFFF, /* C.4 */
147  0xCFFFE, 0xCFFFF, /* C.4 */
148  0xDFFFE, 0xDFFFF, /* C.4 */
149  0xE0001, 0xE0001, /* C.9 */
150  0xE0020, 0xE007F, /* C.9 */
151  0xEFFFE, 0xEFFFF, /* C.4 */
152  0xF0000, 0xFFFFF, /* C.3, C.4 */
153  0x100000, 0x10FFFF /* C.3, C.4 */
154 };
155 
156 /* A.1 Unassigned code points in Unicode 3.2 */
158 {
159  0x0221, 0x0221,
160  0x0234, 0x024F,
161  0x02AE, 0x02AF,
162  0x02EF, 0x02FF,
163  0x0350, 0x035F,
164  0x0370, 0x0373,
165  0x0376, 0x0379,
166  0x037B, 0x037D,
167  0x037F, 0x0383,
168  0x038B, 0x038B,
169  0x038D, 0x038D,
170  0x03A2, 0x03A2,
171  0x03CF, 0x03CF,
172  0x03F7, 0x03FF,
173  0x0487, 0x0487,
174  0x04CF, 0x04CF,
175  0x04F6, 0x04F7,
176  0x04FA, 0x04FF,
177  0x0510, 0x0530,
178  0x0557, 0x0558,
179  0x0560, 0x0560,
180  0x0588, 0x0588,
181  0x058B, 0x0590,
182  0x05A2, 0x05A2,
183  0x05BA, 0x05BA,
184  0x05C5, 0x05CF,
185  0x05EB, 0x05EF,
186  0x05F5, 0x060B,
187  0x060D, 0x061A,
188  0x061C, 0x061E,
189  0x0620, 0x0620,
190  0x063B, 0x063F,
191  0x0656, 0x065F,
192  0x06EE, 0x06EF,
193  0x06FF, 0x06FF,
194  0x070E, 0x070E,
195  0x072D, 0x072F,
196  0x074B, 0x077F,
197  0x07B2, 0x0900,
198  0x0904, 0x0904,
199  0x093A, 0x093B,
200  0x094E, 0x094F,
201  0x0955, 0x0957,
202  0x0971, 0x0980,
203  0x0984, 0x0984,
204  0x098D, 0x098E,
205  0x0991, 0x0992,
206  0x09A9, 0x09A9,
207  0x09B1, 0x09B1,
208  0x09B3, 0x09B5,
209  0x09BA, 0x09BB,
210  0x09BD, 0x09BD,
211  0x09C5, 0x09C6,
212  0x09C9, 0x09CA,
213  0x09CE, 0x09D6,
214  0x09D8, 0x09DB,
215  0x09DE, 0x09DE,
216  0x09E4, 0x09E5,
217  0x09FB, 0x0A01,
218  0x0A03, 0x0A04,
219  0x0A0B, 0x0A0E,
220  0x0A11, 0x0A12,
221  0x0A29, 0x0A29,
222  0x0A31, 0x0A31,
223  0x0A34, 0x0A34,
224  0x0A37, 0x0A37,
225  0x0A3A, 0x0A3B,
226  0x0A3D, 0x0A3D,
227  0x0A43, 0x0A46,
228  0x0A49, 0x0A4A,
229  0x0A4E, 0x0A58,
230  0x0A5D, 0x0A5D,
231  0x0A5F, 0x0A65,
232  0x0A75, 0x0A80,
233  0x0A84, 0x0A84,
234  0x0A8C, 0x0A8C,
235  0x0A8E, 0x0A8E,
236  0x0A92, 0x0A92,
237  0x0AA9, 0x0AA9,
238  0x0AB1, 0x0AB1,
239  0x0AB4, 0x0AB4,
240  0x0ABA, 0x0ABB,
241  0x0AC6, 0x0AC6,
242  0x0ACA, 0x0ACA,
243  0x0ACE, 0x0ACF,
244  0x0AD1, 0x0ADF,
245  0x0AE1, 0x0AE5,
246  0x0AF0, 0x0B00,
247  0x0B04, 0x0B04,
248  0x0B0D, 0x0B0E,
249  0x0B11, 0x0B12,
250  0x0B29, 0x0B29,
251  0x0B31, 0x0B31,
252  0x0B34, 0x0B35,
253  0x0B3A, 0x0B3B,
254  0x0B44, 0x0B46,
255  0x0B49, 0x0B4A,
256  0x0B4E, 0x0B55,
257  0x0B58, 0x0B5B,
258  0x0B5E, 0x0B5E,
259  0x0B62, 0x0B65,
260  0x0B71, 0x0B81,
261  0x0B84, 0x0B84,
262  0x0B8B, 0x0B8D,
263  0x0B91, 0x0B91,
264  0x0B96, 0x0B98,
265  0x0B9B, 0x0B9B,
266  0x0B9D, 0x0B9D,
267  0x0BA0, 0x0BA2,
268  0x0BA5, 0x0BA7,
269  0x0BAB, 0x0BAD,
270  0x0BB6, 0x0BB6,
271  0x0BBA, 0x0BBD,
272  0x0BC3, 0x0BC5,
273  0x0BC9, 0x0BC9,
274  0x0BCE, 0x0BD6,
275  0x0BD8, 0x0BE6,
276  0x0BF3, 0x0C00,
277  0x0C04, 0x0C04,
278  0x0C0D, 0x0C0D,
279  0x0C11, 0x0C11,
280  0x0C29, 0x0C29,
281  0x0C34, 0x0C34,
282  0x0C3A, 0x0C3D,
283  0x0C45, 0x0C45,
284  0x0C49, 0x0C49,
285  0x0C4E, 0x0C54,
286  0x0C57, 0x0C5F,
287  0x0C62, 0x0C65,
288  0x0C70, 0x0C81,
289  0x0C84, 0x0C84,
290  0x0C8D, 0x0C8D,
291  0x0C91, 0x0C91,
292  0x0CA9, 0x0CA9,
293  0x0CB4, 0x0CB4,
294  0x0CBA, 0x0CBD,
295  0x0CC5, 0x0CC5,
296  0x0CC9, 0x0CC9,
297  0x0CCE, 0x0CD4,
298  0x0CD7, 0x0CDD,
299  0x0CDF, 0x0CDF,
300  0x0CE2, 0x0CE5,
301  0x0CF0, 0x0D01,
302  0x0D04, 0x0D04,
303  0x0D0D, 0x0D0D,
304  0x0D11, 0x0D11,
305  0x0D29, 0x0D29,
306  0x0D3A, 0x0D3D,
307  0x0D44, 0x0D45,
308  0x0D49, 0x0D49,
309  0x0D4E, 0x0D56,
310  0x0D58, 0x0D5F,
311  0x0D62, 0x0D65,
312  0x0D70, 0x0D81,
313  0x0D84, 0x0D84,
314  0x0D97, 0x0D99,
315  0x0DB2, 0x0DB2,
316  0x0DBC, 0x0DBC,
317  0x0DBE, 0x0DBF,
318  0x0DC7, 0x0DC9,
319  0x0DCB, 0x0DCE,
320  0x0DD5, 0x0DD5,
321  0x0DD7, 0x0DD7,
322  0x0DE0, 0x0DF1,
323  0x0DF5, 0x0E00,
324  0x0E3B, 0x0E3E,
325  0x0E5C, 0x0E80,
326  0x0E83, 0x0E83,
327  0x0E85, 0x0E86,
328  0x0E89, 0x0E89,
329  0x0E8B, 0x0E8C,
330  0x0E8E, 0x0E93,
331  0x0E98, 0x0E98,
332  0x0EA0, 0x0EA0,
333  0x0EA4, 0x0EA4,
334  0x0EA6, 0x0EA6,
335  0x0EA8, 0x0EA9,
336  0x0EAC, 0x0EAC,
337  0x0EBA, 0x0EBA,
338  0x0EBE, 0x0EBF,
339  0x0EC5, 0x0EC5,
340  0x0EC7, 0x0EC7,
341  0x0ECE, 0x0ECF,
342  0x0EDA, 0x0EDB,
343  0x0EDE, 0x0EFF,
344  0x0F48, 0x0F48,
345  0x0F6B, 0x0F70,
346  0x0F8C, 0x0F8F,
347  0x0F98, 0x0F98,
348  0x0FBD, 0x0FBD,
349  0x0FCD, 0x0FCE,
350  0x0FD0, 0x0FFF,
351  0x1022, 0x1022,
352  0x1028, 0x1028,
353  0x102B, 0x102B,
354  0x1033, 0x1035,
355  0x103A, 0x103F,
356  0x105A, 0x109F,
357  0x10C6, 0x10CF,
358  0x10F9, 0x10FA,
359  0x10FC, 0x10FF,
360  0x115A, 0x115E,
361  0x11A3, 0x11A7,
362  0x11FA, 0x11FF,
363  0x1207, 0x1207,
364  0x1247, 0x1247,
365  0x1249, 0x1249,
366  0x124E, 0x124F,
367  0x1257, 0x1257,
368  0x1259, 0x1259,
369  0x125E, 0x125F,
370  0x1287, 0x1287,
371  0x1289, 0x1289,
372  0x128E, 0x128F,
373  0x12AF, 0x12AF,
374  0x12B1, 0x12B1,
375  0x12B6, 0x12B7,
376  0x12BF, 0x12BF,
377  0x12C1, 0x12C1,
378  0x12C6, 0x12C7,
379  0x12CF, 0x12CF,
380  0x12D7, 0x12D7,
381  0x12EF, 0x12EF,
382  0x130F, 0x130F,
383  0x1311, 0x1311,
384  0x1316, 0x1317,
385  0x131F, 0x131F,
386  0x1347, 0x1347,
387  0x135B, 0x1360,
388  0x137D, 0x139F,
389  0x13F5, 0x1400,
390  0x1677, 0x167F,
391  0x169D, 0x169F,
392  0x16F1, 0x16FF,
393  0x170D, 0x170D,
394  0x1715, 0x171F,
395  0x1737, 0x173F,
396  0x1754, 0x175F,
397  0x176D, 0x176D,
398  0x1771, 0x1771,
399  0x1774, 0x177F,
400  0x17DD, 0x17DF,
401  0x17EA, 0x17FF,
402  0x180F, 0x180F,
403  0x181A, 0x181F,
404  0x1878, 0x187F,
405  0x18AA, 0x1DFF,
406  0x1E9C, 0x1E9F,
407  0x1EFA, 0x1EFF,
408  0x1F16, 0x1F17,
409  0x1F1E, 0x1F1F,
410  0x1F46, 0x1F47,
411  0x1F4E, 0x1F4F,
412  0x1F58, 0x1F58,
413  0x1F5A, 0x1F5A,
414  0x1F5C, 0x1F5C,
415  0x1F5E, 0x1F5E,
416  0x1F7E, 0x1F7F,
417  0x1FB5, 0x1FB5,
418  0x1FC5, 0x1FC5,
419  0x1FD4, 0x1FD5,
420  0x1FDC, 0x1FDC,
421  0x1FF0, 0x1FF1,
422  0x1FF5, 0x1FF5,
423  0x1FFF, 0x1FFF,
424  0x2053, 0x2056,
425  0x2058, 0x205E,
426  0x2064, 0x2069,
427  0x2072, 0x2073,
428  0x208F, 0x209F,
429  0x20B2, 0x20CF,
430  0x20EB, 0x20FF,
431  0x213B, 0x213C,
432  0x214C, 0x2152,
433  0x2184, 0x218F,
434  0x23CF, 0x23FF,
435  0x2427, 0x243F,
436  0x244B, 0x245F,
437  0x24FF, 0x24FF,
438  0x2614, 0x2615,
439  0x2618, 0x2618,
440  0x267E, 0x267F,
441  0x268A, 0x2700,
442  0x2705, 0x2705,
443  0x270A, 0x270B,
444  0x2728, 0x2728,
445  0x274C, 0x274C,
446  0x274E, 0x274E,
447  0x2753, 0x2755,
448  0x2757, 0x2757,
449  0x275F, 0x2760,
450  0x2795, 0x2797,
451  0x27B0, 0x27B0,
452  0x27BF, 0x27CF,
453  0x27EC, 0x27EF,
454  0x2B00, 0x2E7F,
455  0x2E9A, 0x2E9A,
456  0x2EF4, 0x2EFF,
457  0x2FD6, 0x2FEF,
458  0x2FFC, 0x2FFF,
459  0x3040, 0x3040,
460  0x3097, 0x3098,
461  0x3100, 0x3104,
462  0x312D, 0x3130,
463  0x318F, 0x318F,
464  0x31B8, 0x31EF,
465  0x321D, 0x321F,
466  0x3244, 0x3250,
467  0x327C, 0x327E,
468  0x32CC, 0x32CF,
469  0x32FF, 0x32FF,
470  0x3377, 0x337A,
471  0x33DE, 0x33DF,
472  0x33FF, 0x33FF,
473  0x4DB6, 0x4DFF,
474  0x9FA6, 0x9FFF,
475  0xA48D, 0xA48F,
476  0xA4C7, 0xABFF,
477  0xD7A4, 0xD7FF,
478  0xFA2E, 0xFA2F,
479  0xFA6B, 0xFAFF,
480  0xFB07, 0xFB12,
481  0xFB18, 0xFB1C,
482  0xFB37, 0xFB37,
483  0xFB3D, 0xFB3D,
484  0xFB3F, 0xFB3F,
485  0xFB42, 0xFB42,
486  0xFB45, 0xFB45,
487  0xFBB2, 0xFBD2,
488  0xFD40, 0xFD4F,
489  0xFD90, 0xFD91,
490  0xFDC8, 0xFDCF,
491  0xFDFD, 0xFDFF,
492  0xFE10, 0xFE1F,
493  0xFE24, 0xFE2F,
494  0xFE47, 0xFE48,
495  0xFE53, 0xFE53,
496  0xFE67, 0xFE67,
497  0xFE6C, 0xFE6F,
498  0xFE75, 0xFE75,
499  0xFEFD, 0xFEFE,
500  0xFF00, 0xFF00,
501  0xFFBF, 0xFFC1,
502  0xFFC8, 0xFFC9,
503  0xFFD0, 0xFFD1,
504  0xFFD8, 0xFFD9,
505  0xFFDD, 0xFFDF,
506  0xFFE7, 0xFFE7,
507  0xFFEF, 0xFFF8,
508  0x10000, 0x102FF,
509  0x1031F, 0x1031F,
510  0x10324, 0x1032F,
511  0x1034B, 0x103FF,
512  0x10426, 0x10427,
513  0x1044E, 0x1CFFF,
514  0x1D0F6, 0x1D0FF,
515  0x1D127, 0x1D129,
516  0x1D1DE, 0x1D3FF,
517  0x1D455, 0x1D455,
518  0x1D49D, 0x1D49D,
519  0x1D4A0, 0x1D4A1,
520  0x1D4A3, 0x1D4A4,
521  0x1D4A7, 0x1D4A8,
522  0x1D4AD, 0x1D4AD,
523  0x1D4BA, 0x1D4BA,
524  0x1D4BC, 0x1D4BC,
525  0x1D4C1, 0x1D4C1,
526  0x1D4C4, 0x1D4C4,
527  0x1D506, 0x1D506,
528  0x1D50B, 0x1D50C,
529  0x1D515, 0x1D515,
530  0x1D51D, 0x1D51D,
531  0x1D53A, 0x1D53A,
532  0x1D53F, 0x1D53F,
533  0x1D545, 0x1D545,
534  0x1D547, 0x1D549,
535  0x1D551, 0x1D551,
536  0x1D6A4, 0x1D6A7,
537  0x1D7CA, 0x1D7CD,
538  0x1D800, 0x1FFFD,
539  0x2A6D7, 0x2F7FF,
540  0x2FA1E, 0x2FFFD,
541  0x30000, 0x3FFFD,
542  0x40000, 0x4FFFD,
543  0x50000, 0x5FFFD,
544  0x60000, 0x6FFFD,
545  0x70000, 0x7FFFD,
546  0x80000, 0x8FFFD,
547  0x90000, 0x9FFFD,
548  0xA0000, 0xAFFFD,
549  0xB0000, 0xBFFFD,
550  0xC0000, 0xCFFFD,
551  0xD0000, 0xDFFFD,
552  0xE0000, 0xE0000,
553  0xE0002, 0xE001F,
554  0xE0080, 0xEFFFD
555 };
556 
557 /* D.1 Characters with bidirectional property "R" or "AL" */
559 {
560  0x05BE, 0x05BE,
561  0x05C0, 0x05C0,
562  0x05C3, 0x05C3,
563  0x05D0, 0x05EA,
564  0x05F0, 0x05F4,
565  0x061B, 0x061B,
566  0x061F, 0x061F,
567  0x0621, 0x063A,
568  0x0640, 0x064A,
569  0x066D, 0x066F,
570  0x0671, 0x06D5,
571  0x06DD, 0x06DD,
572  0x06E5, 0x06E6,
573  0x06FA, 0x06FE,
574  0x0700, 0x070D,
575  0x0710, 0x0710,
576  0x0712, 0x072C,
577  0x0780, 0x07A5,
578  0x07B1, 0x07B1,
579  0x200F, 0x200F,
580  0xFB1D, 0xFB1D,
581  0xFB1F, 0xFB28,
582  0xFB2A, 0xFB36,
583  0xFB38, 0xFB3C,
584  0xFB3E, 0xFB3E,
585  0xFB40, 0xFB41,
586  0xFB43, 0xFB44,
587  0xFB46, 0xFBB1,
588  0xFBD3, 0xFD3D,
589  0xFD50, 0xFD8F,
590  0xFD92, 0xFDC7,
591  0xFDF0, 0xFDFC,
592  0xFE70, 0xFE74,
593  0xFE76, 0xFEFC
594 };
595 
596 /* D.2 Characters with bidirectional property "L" */
598 {
599  0x0041, 0x005A,
600  0x0061, 0x007A,
601  0x00AA, 0x00AA,
602  0x00B5, 0x00B5,
603  0x00BA, 0x00BA,
604  0x00C0, 0x00D6,
605  0x00D8, 0x00F6,
606  0x00F8, 0x0220,
607  0x0222, 0x0233,
608  0x0250, 0x02AD,
609  0x02B0, 0x02B8,
610  0x02BB, 0x02C1,
611  0x02D0, 0x02D1,
612  0x02E0, 0x02E4,
613  0x02EE, 0x02EE,
614  0x037A, 0x037A,
615  0x0386, 0x0386,
616  0x0388, 0x038A,
617  0x038C, 0x038C,
618  0x038E, 0x03A1,
619  0x03A3, 0x03CE,
620  0x03D0, 0x03F5,
621  0x0400, 0x0482,
622  0x048A, 0x04CE,
623  0x04D0, 0x04F5,
624  0x04F8, 0x04F9,
625  0x0500, 0x050F,
626  0x0531, 0x0556,
627  0x0559, 0x055F,
628  0x0561, 0x0587,
629  0x0589, 0x0589,
630  0x0903, 0x0903,
631  0x0905, 0x0939,
632  0x093D, 0x0940,
633  0x0949, 0x094C,
634  0x0950, 0x0950,
635  0x0958, 0x0961,
636  0x0964, 0x0970,
637  0x0982, 0x0983,
638  0x0985, 0x098C,
639  0x098F, 0x0990,
640  0x0993, 0x09A8,
641  0x09AA, 0x09B0,
642  0x09B2, 0x09B2,
643  0x09B6, 0x09B9,
644  0x09BE, 0x09C0,
645  0x09C7, 0x09C8,
646  0x09CB, 0x09CC,
647  0x09D7, 0x09D7,
648  0x09DC, 0x09DD,
649  0x09DF, 0x09E1,
650  0x09E6, 0x09F1,
651  0x09F4, 0x09FA,
652  0x0A05, 0x0A0A,
653  0x0A0F, 0x0A10,
654  0x0A13, 0x0A28,
655  0x0A2A, 0x0A30,
656  0x0A32, 0x0A33,
657  0x0A35, 0x0A36,
658  0x0A38, 0x0A39,
659  0x0A3E, 0x0A40,
660  0x0A59, 0x0A5C,
661  0x0A5E, 0x0A5E,
662  0x0A66, 0x0A6F,
663  0x0A72, 0x0A74,
664  0x0A83, 0x0A83,
665  0x0A85, 0x0A8B,
666  0x0A8D, 0x0A8D,
667  0x0A8F, 0x0A91,
668  0x0A93, 0x0AA8,
669  0x0AAA, 0x0AB0,
670  0x0AB2, 0x0AB3,
671  0x0AB5, 0x0AB9,
672  0x0ABD, 0x0AC0,
673  0x0AC9, 0x0AC9,
674  0x0ACB, 0x0ACC,
675  0x0AD0, 0x0AD0,
676  0x0AE0, 0x0AE0,
677  0x0AE6, 0x0AEF,
678  0x0B02, 0x0B03,
679  0x0B05, 0x0B0C,
680  0x0B0F, 0x0B10,
681  0x0B13, 0x0B28,
682  0x0B2A, 0x0B30,
683  0x0B32, 0x0B33,
684  0x0B36, 0x0B39,
685  0x0B3D, 0x0B3E,
686  0x0B40, 0x0B40,
687  0x0B47, 0x0B48,
688  0x0B4B, 0x0B4C,
689  0x0B57, 0x0B57,
690  0x0B5C, 0x0B5D,
691  0x0B5F, 0x0B61,
692  0x0B66, 0x0B70,
693  0x0B83, 0x0B83,
694  0x0B85, 0x0B8A,
695  0x0B8E, 0x0B90,
696  0x0B92, 0x0B95,
697  0x0B99, 0x0B9A,
698  0x0B9C, 0x0B9C,
699  0x0B9E, 0x0B9F,
700  0x0BA3, 0x0BA4,
701  0x0BA8, 0x0BAA,
702  0x0BAE, 0x0BB5,
703  0x0BB7, 0x0BB9,
704  0x0BBE, 0x0BBF,
705  0x0BC1, 0x0BC2,
706  0x0BC6, 0x0BC8,
707  0x0BCA, 0x0BCC,
708  0x0BD7, 0x0BD7,
709  0x0BE7, 0x0BF2,
710  0x0C01, 0x0C03,
711  0x0C05, 0x0C0C,
712  0x0C0E, 0x0C10,
713  0x0C12, 0x0C28,
714  0x0C2A, 0x0C33,
715  0x0C35, 0x0C39,
716  0x0C41, 0x0C44,
717  0x0C60, 0x0C61,
718  0x0C66, 0x0C6F,
719  0x0C82, 0x0C83,
720  0x0C85, 0x0C8C,
721  0x0C8E, 0x0C90,
722  0x0C92, 0x0CA8,
723  0x0CAA, 0x0CB3,
724  0x0CB5, 0x0CB9,
725  0x0CBE, 0x0CBE,
726  0x0CC0, 0x0CC4,
727  0x0CC7, 0x0CC8,
728  0x0CCA, 0x0CCB,
729  0x0CD5, 0x0CD6,
730  0x0CDE, 0x0CDE,
731  0x0CE0, 0x0CE1,
732  0x0CE6, 0x0CEF,
733  0x0D02, 0x0D03,
734  0x0D05, 0x0D0C,
735  0x0D0E, 0x0D10,
736  0x0D12, 0x0D28,
737  0x0D2A, 0x0D39,
738  0x0D3E, 0x0D40,
739  0x0D46, 0x0D48,
740  0x0D4A, 0x0D4C,
741  0x0D57, 0x0D57,
742  0x0D60, 0x0D61,
743  0x0D66, 0x0D6F,
744  0x0D82, 0x0D83,
745  0x0D85, 0x0D96,
746  0x0D9A, 0x0DB1,
747  0x0DB3, 0x0DBB,
748  0x0DBD, 0x0DBD,
749  0x0DC0, 0x0DC6,
750  0x0DCF, 0x0DD1,
751  0x0DD8, 0x0DDF,
752  0x0DF2, 0x0DF4,
753  0x0E01, 0x0E30,
754  0x0E32, 0x0E33,
755  0x0E40, 0x0E46,
756  0x0E4F, 0x0E5B,
757  0x0E81, 0x0E82,
758  0x0E84, 0x0E84,
759  0x0E87, 0x0E88,
760  0x0E8A, 0x0E8A,
761  0x0E8D, 0x0E8D,
762  0x0E94, 0x0E97,
763  0x0E99, 0x0E9F,
764  0x0EA1, 0x0EA3,
765  0x0EA5, 0x0EA5,
766  0x0EA7, 0x0EA7,
767  0x0EAA, 0x0EAB,
768  0x0EAD, 0x0EB0,
769  0x0EB2, 0x0EB3,
770  0x0EBD, 0x0EBD,
771  0x0EC0, 0x0EC4,
772  0x0EC6, 0x0EC6,
773  0x0ED0, 0x0ED9,
774  0x0EDC, 0x0EDD,
775  0x0F00, 0x0F17,
776  0x0F1A, 0x0F34,
777  0x0F36, 0x0F36,
778  0x0F38, 0x0F38,
779  0x0F3E, 0x0F47,
780  0x0F49, 0x0F6A,
781  0x0F7F, 0x0F7F,
782  0x0F85, 0x0F85,
783  0x0F88, 0x0F8B,
784  0x0FBE, 0x0FC5,
785  0x0FC7, 0x0FCC,
786  0x0FCF, 0x0FCF,
787  0x1000, 0x1021,
788  0x1023, 0x1027,
789  0x1029, 0x102A,
790  0x102C, 0x102C,
791  0x1031, 0x1031,
792  0x1038, 0x1038,
793  0x1040, 0x1057,
794  0x10A0, 0x10C5,
795  0x10D0, 0x10F8,
796  0x10FB, 0x10FB,
797  0x1100, 0x1159,
798  0x115F, 0x11A2,
799  0x11A8, 0x11F9,
800  0x1200, 0x1206,
801  0x1208, 0x1246,
802  0x1248, 0x1248,
803  0x124A, 0x124D,
804  0x1250, 0x1256,
805  0x1258, 0x1258,
806  0x125A, 0x125D,
807  0x1260, 0x1286,
808  0x1288, 0x1288,
809  0x128A, 0x128D,
810  0x1290, 0x12AE,
811  0x12B0, 0x12B0,
812  0x12B2, 0x12B5,
813  0x12B8, 0x12BE,
814  0x12C0, 0x12C0,
815  0x12C2, 0x12C5,
816  0x12C8, 0x12CE,
817  0x12D0, 0x12D6,
818  0x12D8, 0x12EE,
819  0x12F0, 0x130E,
820  0x1310, 0x1310,
821  0x1312, 0x1315,
822  0x1318, 0x131E,
823  0x1320, 0x1346,
824  0x1348, 0x135A,
825  0x1361, 0x137C,
826  0x13A0, 0x13F4,
827  0x1401, 0x1676,
828  0x1681, 0x169A,
829  0x16A0, 0x16F0,
830  0x1700, 0x170C,
831  0x170E, 0x1711,
832  0x1720, 0x1731,
833  0x1735, 0x1736,
834  0x1740, 0x1751,
835  0x1760, 0x176C,
836  0x176E, 0x1770,
837  0x1780, 0x17B6,
838  0x17BE, 0x17C5,
839  0x17C7, 0x17C8,
840  0x17D4, 0x17DA,
841  0x17DC, 0x17DC,
842  0x17E0, 0x17E9,
843  0x1810, 0x1819,
844  0x1820, 0x1877,
845  0x1880, 0x18A8,
846  0x1E00, 0x1E9B,
847  0x1EA0, 0x1EF9,
848  0x1F00, 0x1F15,
849  0x1F18, 0x1F1D,
850  0x1F20, 0x1F45,
851  0x1F48, 0x1F4D,
852  0x1F50, 0x1F57,
853  0x1F59, 0x1F59,
854  0x1F5B, 0x1F5B,
855  0x1F5D, 0x1F5D,
856  0x1F5F, 0x1F7D,
857  0x1F80, 0x1FB4,
858  0x1FB6, 0x1FBC,
859  0x1FBE, 0x1FBE,
860  0x1FC2, 0x1FC4,
861  0x1FC6, 0x1FCC,
862  0x1FD0, 0x1FD3,
863  0x1FD6, 0x1FDB,
864  0x1FE0, 0x1FEC,
865  0x1FF2, 0x1FF4,
866  0x1FF6, 0x1FFC,
867  0x200E, 0x200E,
868  0x2071, 0x2071,
869  0x207F, 0x207F,
870  0x2102, 0x2102,
871  0x2107, 0x2107,
872  0x210A, 0x2113,
873  0x2115, 0x2115,
874  0x2119, 0x211D,
875  0x2124, 0x2124,
876  0x2126, 0x2126,
877  0x2128, 0x2128,
878  0x212A, 0x212D,
879  0x212F, 0x2131,
880  0x2133, 0x2139,
881  0x213D, 0x213F,
882  0x2145, 0x2149,
883  0x2160, 0x2183,
884  0x2336, 0x237A,
885  0x2395, 0x2395,
886  0x249C, 0x24E9,
887  0x3005, 0x3007,
888  0x3021, 0x3029,
889  0x3031, 0x3035,
890  0x3038, 0x303C,
891  0x3041, 0x3096,
892  0x309D, 0x309F,
893  0x30A1, 0x30FA,
894  0x30FC, 0x30FF,
895  0x3105, 0x312C,
896  0x3131, 0x318E,
897  0x3190, 0x31B7,
898  0x31F0, 0x321C,
899  0x3220, 0x3243,
900  0x3260, 0x327B,
901  0x327F, 0x32B0,
902  0x32C0, 0x32CB,
903  0x32D0, 0x32FE,
904  0x3300, 0x3376,
905  0x337B, 0x33DD,
906  0x33E0, 0x33FE,
907  0x3400, 0x4DB5,
908  0x4E00, 0x9FA5,
909  0xA000, 0xA48C,
910  0xAC00, 0xD7A3,
911  0xD800, 0xFA2D,
912  0xFA30, 0xFA6A,
913  0xFB00, 0xFB06,
914  0xFB13, 0xFB17,
915  0xFF21, 0xFF3A,
916  0xFF41, 0xFF5A,
917  0xFF66, 0xFFBE,
918  0xFFC2, 0xFFC7,
919  0xFFCA, 0xFFCF,
920  0xFFD2, 0xFFD7,
921  0xFFDA, 0xFFDC,
922  0x10300, 0x1031E,
923  0x10320, 0x10323,
924  0x10330, 0x1034A,
925  0x10400, 0x10425,
926  0x10428, 0x1044D,
927  0x1D000, 0x1D0F5,
928  0x1D100, 0x1D126,
929  0x1D12A, 0x1D166,
930  0x1D16A, 0x1D172,
931  0x1D183, 0x1D184,
932  0x1D18C, 0x1D1A9,
933  0x1D1AE, 0x1D1DD,
934  0x1D400, 0x1D454,
935  0x1D456, 0x1D49C,
936  0x1D49E, 0x1D49F,
937  0x1D4A2, 0x1D4A2,
938  0x1D4A5, 0x1D4A6,
939  0x1D4A9, 0x1D4AC,
940  0x1D4AE, 0x1D4B9,
941  0x1D4BB, 0x1D4BB,
942  0x1D4BD, 0x1D4C0,
943  0x1D4C2, 0x1D4C3,
944  0x1D4C5, 0x1D505,
945  0x1D507, 0x1D50A,
946  0x1D50D, 0x1D514,
947  0x1D516, 0x1D51C,
948  0x1D51E, 0x1D539,
949  0x1D53B, 0x1D53E,
950  0x1D540, 0x1D544,
951  0x1D546, 0x1D546,
952  0x1D54A, 0x1D550,
953  0x1D552, 0x1D6A3,
954  0x1D6A8, 0x1D7C9,
955  0x20000, 0x2A6D6,
956  0x2F800, 0x2FA1D,
957  0xF0000, 0xFFFFD,
958  0x100000, 0x10FFFD
959 };
960 
961 /* End of stringprep tables */
962 
963 
964 /* Is the given Unicode codepoint in the given table of ranges? */
965 #define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
966 
967 static int
968 codepoint_range_cmp(const void *a, const void *b)
969 {
970  const pg_wchar *key = (const pg_wchar *) a;
971  const pg_wchar *range = (const pg_wchar *) b;
972 
973  if (*key < range[0])
974  return -1; /* less than lower bound */
975  if (*key > range[1])
976  return 1; /* greater than upper bound */
977 
978  return 0; /* within range */
979 }
980 
981 static bool
982 is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
983 {
984  Assert(mapsize % 2 == 0);
985 
986  if (code < map[0] || code > map[mapsize - 1])
987  return false;
988 
989  if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
991  return true;
992  else
993  return false;
994 }
995 
996 /*
997  * Calculate the length in characters of a null-terminated UTF-8 string.
998  *
999  * Returns -1 if the input is not valid UTF-8.
1000  */
1001 static int
1003 {
1004  const unsigned char *p = (const unsigned char *) source;
1005  int l;
1006  int num_chars = 0;
1007 
1008  while (*p)
1009  {
1010  l = pg_utf_mblen(p);
1011 
1012  if (!pg_utf8_islegal(p, l))
1013  return -1;
1014 
1015  p += l;
1016  num_chars++;
1017  }
1018 
1019  return num_chars;
1020 }
1021 
1022 
1023 /*
1024  * pg_saslprep - Normalize a password with SASLprep.
1025  *
1026  * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
1027  * supports many encodings, so we don't blindly assume that. pg_saslprep
1028  * will check if the input looks like valid UTF-8, and returns
1029  * SASLPREP_INVALID_UTF8 if not.
1030  *
1031  * If the string contains prohibited characters (or more precisely, if the
1032  * output string would contain prohibited characters after normalization),
1033  * returns SASLPREP_PROHIBITED.
1034  *
1035  * On success, returns SASLPREP_SUCCESS, and the normalized string in
1036  * *output.
1037  *
1038  * In frontend, the normalized string is malloc'd, and the caller is
1039  * responsible for freeing it. If an allocation fails, returns
1040  * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
1041  * and a failed allocation leads to ereport(ERROR).
1042  */
1044 pg_saslprep(const char *input, char **output)
1045 {
1046  pg_wchar *input_chars = NULL;
1047  pg_wchar *output_chars = NULL;
1048  int input_size;
1049  char *result;
1050  int result_size;
1051  int count;
1052  int i;
1053  bool contains_RandALCat;
1054  unsigned char *p;
1055  pg_wchar *wp;
1056 
1057  /* Ensure we return *output as NULL on failure */
1058  *output = NULL;
1059 
1060  /*
1061  * Quick check if the input is pure ASCII. An ASCII string requires no
1062  * further processing.
1063  */
1064  if (pg_is_ascii(input))
1065  {
1066  *output = STRDUP(input);
1067  if (!(*output))
1068  goto oom;
1069  return SASLPREP_SUCCESS;
1070  }
1071 
1072  /*
1073  * Convert the input from UTF-8 to an array of Unicode codepoints.
1074  *
1075  * This also checks that the input is a legal UTF-8 string.
1076  */
1077  input_size = pg_utf8_string_len(input);
1078  if (input_size < 0)
1079  return SASLPREP_INVALID_UTF8;
1080 
1081  input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1082  if (!input_chars)
1083  goto oom;
1084 
1085  p = (unsigned char *) input;
1086  for (i = 0; i < input_size; i++)
1087  {
1088  input_chars[i] = utf8_to_unicode(p);
1089  p += pg_utf_mblen(p);
1090  }
1091  input_chars[i] = (pg_wchar) '\0';
1092 
1093  /*
1094  * The steps below correspond to the steps listed in [RFC3454], Section
1095  * "2. Preparation Overview"
1096  */
1097 
1098  /*
1099  * 1) Map -- For each character in the input, check if it has a mapping
1100  * and, if so, replace it with its mapping.
1101  */
1102  count = 0;
1103  for (i = 0; i < input_size; i++)
1104  {
1105  pg_wchar code = input_chars[i];
1106 
1108  input_chars[count++] = 0x0020;
1110  {
1111  /* map to nothing */
1112  }
1113  else
1114  input_chars[count++] = code;
1115  }
1116  input_chars[count] = (pg_wchar) '\0';
1117  input_size = count;
1118 
1119  if (input_size == 0)
1120  goto prohibited; /* don't allow empty password */
1121 
1122  /*
1123  * 2) Normalize -- Normalize the result of step 1 using Unicode
1124  * normalization.
1125  */
1126  output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
1127  if (!output_chars)
1128  goto oom;
1129 
1130  /*
1131  * 3) Prohibit -- Check for any characters that are not allowed in the
1132  * output. If any are found, return an error.
1133  */
1134  for (i = 0; i < input_size; i++)
1135  {
1136  pg_wchar code = input_chars[i];
1137 
1139  goto prohibited;
1141  goto prohibited;
1142  }
1143 
1144  /*
1145  * 4) Check bidi -- Possibly check for right-to-left characters, and if
1146  * any are found, make sure that the whole string satisfies the
1147  * requirements for bidirectional strings. If the string does not satisfy
1148  * the requirements for bidirectional strings, return an error.
1149  *
1150  * [RFC3454], Section "6. Bidirectional Characters" explains in more
1151  * detail what that means:
1152  *
1153  * "In any profile that specifies bidirectional character handling, all
1154  * three of the following requirements MUST be met:
1155  *
1156  * 1) The characters in section 5.8 MUST be prohibited.
1157  *
1158  * 2) If a string contains any RandALCat character, the string MUST NOT
1159  * contain any LCat character.
1160  *
1161  * 3) If a string contains any RandALCat character, a RandALCat character
1162  * MUST be the first character of the string, and a RandALCat character
1163  * MUST be the last character of the string."
1164  */
1165  contains_RandALCat = false;
1166  for (i = 0; i < input_size; i++)
1167  {
1168  pg_wchar code = input_chars[i];
1169 
1171  {
1172  contains_RandALCat = true;
1173  break;
1174  }
1175  }
1176 
1177  if (contains_RandALCat)
1178  {
1179  pg_wchar first = input_chars[0];
1180  pg_wchar last = input_chars[input_size - 1];
1181 
1182  for (i = 0; i < input_size; i++)
1183  {
1184  pg_wchar code = input_chars[i];
1185 
1187  goto prohibited;
1188  }
1189 
1192  goto prohibited;
1193  }
1194 
1195  /*
1196  * Finally, convert the result back to UTF-8.
1197  */
1198  result_size = 0;
1199  for (wp = output_chars; *wp; wp++)
1200  {
1201  unsigned char buf[4];
1202 
1203  unicode_to_utf8(*wp, buf);
1204  result_size += pg_utf_mblen(buf);
1205  }
1206 
1207  result = ALLOC(result_size + 1);
1208  if (!result)
1209  goto oom;
1210 
1211  /*
1212  * There are no error exits below here, so the error exit paths don't need
1213  * to worry about possibly freeing "result".
1214  */
1215  p = (unsigned char *) result;
1216  for (wp = output_chars; *wp; wp++)
1217  {
1218  unicode_to_utf8(*wp, p);
1219  p += pg_utf_mblen(p);
1220  }
1221  Assert((char *) p == result + result_size);
1222  *p = '\0';
1223 
1224  FREE(input_chars);
1225  FREE(output_chars);
1226 
1227  *output = result;
1228  return SASLPREP_SUCCESS;
1229 
1230 prohibited:
1231  if (input_chars)
1232  FREE(input_chars);
1233  if (output_chars)
1234  FREE(output_chars);
1235 
1236  return SASLPREP_PROHIBITED;
1237 
1238 oom:
1239  if (input_chars)
1240  FREE(input_chars);
1241  if (output_chars)
1242  FREE(output_chars);
1243 
1244  return SASLPREP_OOM;
1245 }
FILE * input
FILE * output
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
unsigned int pg_wchar
Definition: mbprint.c:31
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:73
#define pg_utf_mblen
Definition: pg_wchar.h:572
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
static int codepoint_range_cmp(const void *a, const void *b)
Definition: saslprep.c:968
static const pg_wchar unassigned_codepoint_ranges[]
Definition: saslprep.c:157
static const pg_wchar non_ascii_space_ranges[]
Definition: saslprep.c:66
static const pg_wchar RandALCat_codepoint_ranges[]
Definition: saslprep.c:558
#define STRDUP(s)
Definition: saslprep.c:38
#define IS_CODE_IN_TABLE(code, map)
Definition: saslprep.c:965
pg_saslprep_rc pg_saslprep(const char *input, char **output)
Definition: saslprep.c:1044
#define ALLOC(size)
Definition: saslprep.c:39
#define FREE(size)
Definition: saslprep.c:40
static const pg_wchar LCat_codepoint_ranges[]
Definition: saslprep.c:597
static const pg_wchar commonly_mapped_to_nothing_ranges[]
Definition: saslprep.c:81
static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
Definition: saslprep.c:982
static const pg_wchar prohibited_output_ranges[]
Definition: saslprep.c:116
static int pg_utf8_string_len(const char *source)
Definition: saslprep.c:1002
pg_saslprep_rc
Definition: saslprep.h:21
@ SASLPREP_INVALID_UTF8
Definition: saslprep.h:24
@ SASLPREP_PROHIBITED
Definition: saslprep.h:25
@ SASLPREP_OOM
Definition: saslprep.h:23
@ SASLPREP_SUCCESS
Definition: saslprep.h:22
bool pg_is_ascii(const char *str)
Definition: string.c:133
pg_wchar * unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
Definition: unicode_norm.c:402
@ UNICODE_NFKC
Definition: unicode_norm.h:23
bool pg_utf8_islegal(const unsigned char *source, int length)
Definition: wchar.c:2014
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:484
pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: wchar.c:680