PostgreSQL Source Code  git master
unicode_category.c File Reference
Include dependency graph for unicode_category.c:

Go to the source code of this file.

Functions

pg_unicode_category unicode_category (pg_wchar ucs)
 
const char * unicode_category_string (pg_unicode_category category)
 
const char * unicode_category_abbrev (pg_unicode_category category)
 

Function Documentation

◆ unicode_category()

pg_unicode_category unicode_category ( pg_wchar  ucs)

Definition at line 25 of file unicode_category.c.

26 {
27  int min = 0;
28  int mid;
29  int max = lengthof(unicode_categories) - 1;
30 
31  Assert(ucs <= 0x10ffff);
32 
33  while (max >= min)
34  {
35  mid = (min + max) / 2;
36  if (ucs > unicode_categories[mid].last)
37  min = mid + 1;
38  else if (ucs < unicode_categories[mid].first)
39  max = mid - 1;
40  else
41  return unicode_categories[mid].category;
42  }
43 
44  return PG_U_UNASSIGNED;
45 }
#define lengthof(array)
Definition: c.h:777
Assert(fmt[strlen(fmt) - 1] !='\n')
@ PG_U_UNASSIGNED
static const pg_category_range unicode_categories[3302]

References Assert(), pg_category_range::category, lengthof, PG_U_UNASSIGNED, and unicode_categories.

Referenced by main(), and unicode_assigned().

◆ unicode_category_abbrev()

const char* unicode_category_abbrev ( pg_unicode_category  category)

Definition at line 125 of file unicode_category.c.

126 {
127  switch (category)
128  {
129  case PG_U_UNASSIGNED:
130  return "Cn";
132  return "Lu";
134  return "Ll";
136  return "Lt";
138  return "Lm";
139  case PG_U_OTHER_LETTER:
140  return "Lo";
142  return "Mn";
143  case PG_U_ENCLOSING_MARK:
144  return "Me";
145  case PG_U_SPACING_MARK:
146  return "Mc";
147  case PG_U_DECIMAL_NUMBER:
148  return "Nd";
149  case PG_U_LETTER_NUMBER:
150  return "Nl";
151  case PG_U_OTHER_NUMBER:
152  return "No";
154  return "Zs";
155  case PG_U_LINE_SEPARATOR:
156  return "Zl";
158  return "Zp";
159  case PG_U_CONTROL:
160  return "Cc";
161  case PG_U_FORMAT:
162  return "Cf";
163  case PG_U_PRIVATE_USE:
164  return "Co";
165  case PG_U_SURROGATE:
166  return "Cs";
168  return "Pd";
170  return "Ps";
172  return "Pe";
174  return "Pc";
176  return "Po";
177  case PG_U_MATH_SYMBOL:
178  return "Sm";
180  return "Sc";
182  return "Sk";
183  case PG_U_OTHER_SYMBOL:
184  return "So";
186  return "Pi";
188  return "Pf";
189  }
190 
191  Assert(false);
192  return "??"; /* keep compiler quiet */
193 }
@ PG_U_CONNECTOR_PUNCTUATION
@ PG_U_OTHER_SYMBOL
@ PG_U_DASH_PUNCTUATION
@ PG_U_UPPERCASE_LETTER
@ PG_U_DECIMAL_NUMBER
@ PG_U_CLOSE_PUNCTUATION
@ PG_U_NONSPACING_MARK
@ PG_U_INITIAL_PUNCTUATION
@ PG_U_CURRENCY_SYMBOL
@ PG_U_LETTER_NUMBER
@ PG_U_MODIFIER_SYMBOL
@ PG_U_SPACE_SEPARATOR
@ PG_U_OPEN_PUNCTUATION
@ PG_U_FORMAT
@ PG_U_PRIVATE_USE
@ PG_U_OTHER_LETTER
@ PG_U_PARAGRAPH_SEPARATOR
@ PG_U_CONTROL
@ PG_U_SPACING_MARK
@ PG_U_TITLECASE_LETTER
@ PG_U_OTHER_NUMBER
@ PG_U_MATH_SYMBOL
@ PG_U_LOWERCASE_LETTER
@ PG_U_LINE_SEPARATOR
@ PG_U_SURROGATE
@ PG_U_FINAL_PUNCTUATION
@ PG_U_MODIFIER_LETTER
@ PG_U_OTHER_PUNCTUATION
@ PG_U_ENCLOSING_MARK

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

Referenced by main().

◆ unicode_category_string()

const char* unicode_category_string ( pg_unicode_category  category)

Definition at line 51 of file unicode_category.c.

52 {
53  switch (category)
54  {
55  case PG_U_UNASSIGNED:
56  return "Unassigned";
58  return "Uppercase_Letter";
60  return "Lowercase_Letter";
62  return "Titlecase_Letter";
64  return "Modifier_Letter";
65  case PG_U_OTHER_LETTER:
66  return "Other_Letter";
68  return "Nonspacing_Mark";
70  return "Enclosing_Mark";
71  case PG_U_SPACING_MARK:
72  return "Spacing_Mark";
74  return "Decimal_Number";
75  case PG_U_LETTER_NUMBER:
76  return "Letter_Number";
77  case PG_U_OTHER_NUMBER:
78  return "Other_Number";
80  return "Space_Separator";
82  return "Line_Separator";
84  return "Paragraph_Separator";
85  case PG_U_CONTROL:
86  return "Control";
87  case PG_U_FORMAT:
88  return "Format";
89  case PG_U_PRIVATE_USE:
90  return "Private_Use";
91  case PG_U_SURROGATE:
92  return "Surrogate";
94  return "Dash_Punctuation";
96  return "Open_Punctuation";
98  return "Close_Punctuation";
100  return "Connector_Punctuation";
102  return "Other_Punctuation";
103  case PG_U_MATH_SYMBOL:
104  return "Math_Symbol";
106  return "Currency_Symbol";
108  return "Modifier_Symbol";
109  case PG_U_OTHER_SYMBOL:
110  return "Other_Symbol";
112  return "Initial_Punctuation";
114  return "Final_Punctuation";
115  }
116 
117  Assert(false);
118  return "Unrecognized"; /* keep compiler quiet */
119 }

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

Referenced by main().