PostgreSQL Source Code  git master
unicode_category.h File Reference
#include "mb/pg_wchar.h"
Include dependency graph for unicode_category.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Typedefs

typedef enum pg_unicode_category pg_unicode_category
 

Enumerations

enum  pg_unicode_category {
  PG_U_UNASSIGNED = 0 , PG_U_UPPERCASE_LETTER = 1 , PG_U_LOWERCASE_LETTER = 2 , PG_U_TITLECASE_LETTER = 3 ,
  PG_U_MODIFIER_LETTER = 4 , PG_U_OTHER_LETTER = 5 , PG_U_NONSPACING_MARK = 6 , PG_U_ENCLOSING_MARK = 7 ,
  PG_U_SPACING_MARK = 8 , PG_U_DECIMAL_NUMBER = 9 , PG_U_LETTER_NUMBER = 10 , PG_U_OTHER_NUMBER = 11 ,
  PG_U_SPACE_SEPARATOR = 12 , PG_U_LINE_SEPARATOR = 13 , PG_U_PARAGRAPH_SEPARATOR = 14 , PG_U_CONTROL = 15 ,
  PG_U_FORMAT = 16 , PG_U_PRIVATE_USE = 17 , PG_U_SURROGATE = 18 , PG_U_DASH_PUNCTUATION = 19 ,
  PG_U_OPEN_PUNCTUATION = 20 , PG_U_CLOSE_PUNCTUATION = 21 , PG_U_CONNECTOR_PUNCTUATION = 22 , PG_U_OTHER_PUNCTUATION = 23 ,
  PG_U_MATH_SYMBOL = 24 , PG_U_CURRENCY_SYMBOL = 25 , PG_U_MODIFIER_SYMBOL = 26 , PG_U_OTHER_SYMBOL = 27 ,
  PG_U_INITIAL_PUNCTUATION = 28 , PG_U_FINAL_PUNCTUATION = 29
}
 

Functions

pg_unicode_category unicode_category (pg_wchar ucs)
 
const char * unicode_category_string (pg_unicode_category category)
 
const char * unicode_category_abbrev (pg_unicode_category category)
 

Typedef Documentation

◆ pg_unicode_category

Enumeration Type Documentation

◆ pg_unicode_category

Enumerator
PG_U_UNASSIGNED 
PG_U_UPPERCASE_LETTER 
PG_U_LOWERCASE_LETTER 
PG_U_TITLECASE_LETTER 
PG_U_MODIFIER_LETTER 
PG_U_OTHER_LETTER 
PG_U_NONSPACING_MARK 
PG_U_ENCLOSING_MARK 
PG_U_SPACING_MARK 
PG_U_DECIMAL_NUMBER 
PG_U_LETTER_NUMBER 
PG_U_OTHER_NUMBER 
PG_U_SPACE_SEPARATOR 
PG_U_LINE_SEPARATOR 
PG_U_PARAGRAPH_SEPARATOR 
PG_U_CONTROL 
PG_U_FORMAT 
PG_U_PRIVATE_USE 
PG_U_SURROGATE 
PG_U_DASH_PUNCTUATION 
PG_U_OPEN_PUNCTUATION 
PG_U_CLOSE_PUNCTUATION 
PG_U_CONNECTOR_PUNCTUATION 
PG_U_OTHER_PUNCTUATION 
PG_U_MATH_SYMBOL 
PG_U_CURRENCY_SYMBOL 
PG_U_MODIFIER_SYMBOL 
PG_U_OTHER_SYMBOL 
PG_U_INITIAL_PUNCTUATION 
PG_U_FINAL_PUNCTUATION 

Definition at line 30 of file unicode_category.h.

31 {
32  PG_U_UNASSIGNED = 0, /* Cn */
33  PG_U_UPPERCASE_LETTER = 1, /* Lu */
34  PG_U_LOWERCASE_LETTER = 2, /* Ll */
35  PG_U_TITLECASE_LETTER = 3, /* Lt */
36  PG_U_MODIFIER_LETTER = 4, /* Lm */
37  PG_U_OTHER_LETTER = 5, /* Lo */
38  PG_U_NONSPACING_MARK = 6, /* Mn */
39  PG_U_ENCLOSING_MARK = 7, /* Me */
40  PG_U_SPACING_MARK = 8, /* Mc */
41  PG_U_DECIMAL_NUMBER = 9, /* Nd */
42  PG_U_LETTER_NUMBER = 10, /* Nl */
43  PG_U_OTHER_NUMBER = 11, /* No */
44  PG_U_SPACE_SEPARATOR = 12, /* Zs */
45  PG_U_LINE_SEPARATOR = 13, /* Zl */
46  PG_U_PARAGRAPH_SEPARATOR = 14, /* Zp */
47  PG_U_CONTROL = 15, /* Cc */
48  PG_U_FORMAT = 16, /* Cf */
49  PG_U_PRIVATE_USE = 17, /* Co */
50  PG_U_SURROGATE = 18, /* Cs */
51  PG_U_DASH_PUNCTUATION = 19, /* Pd */
52  PG_U_OPEN_PUNCTUATION = 20, /* Ps */
53  PG_U_CLOSE_PUNCTUATION = 21, /* Pe */
54  PG_U_CONNECTOR_PUNCTUATION = 22, /* Pc */
55  PG_U_OTHER_PUNCTUATION = 23, /* Po */
56  PG_U_MATH_SYMBOL = 24, /* Sm */
57  PG_U_CURRENCY_SYMBOL = 25, /* Sc */
58  PG_U_MODIFIER_SYMBOL = 26, /* Sk */
59  PG_U_OTHER_SYMBOL = 27, /* So */
60  PG_U_INITIAL_PUNCTUATION = 28, /* Pi */
61  PG_U_FINAL_PUNCTUATION = 29 /* Pf */
pg_unicode_category
@ PG_U_CONNECTOR_PUNCTUATION
@ PG_U_OTHER_SYMBOL
@ PG_U_DASH_PUNCTUATION
@ PG_U_UPPERCASE_LETTER
@ PG_U_DECIMAL_NUMBER
@ PG_U_CLOSE_PUNCTUATION
@ PG_U_NONSPACING_MARK
@ PG_U_INITIAL_PUNCTUATION
@ PG_U_CURRENCY_SYMBOL
@ PG_U_LETTER_NUMBER
@ PG_U_MODIFIER_SYMBOL
@ PG_U_SPACE_SEPARATOR
@ PG_U_OPEN_PUNCTUATION
@ PG_U_FORMAT
@ PG_U_PRIVATE_USE
@ PG_U_OTHER_LETTER
@ PG_U_PARAGRAPH_SEPARATOR
@ PG_U_CONTROL
@ PG_U_SPACING_MARK
@ PG_U_TITLECASE_LETTER
@ PG_U_OTHER_NUMBER
@ PG_U_MATH_SYMBOL
@ PG_U_LOWERCASE_LETTER
@ PG_U_LINE_SEPARATOR
@ PG_U_UNASSIGNED
@ PG_U_SURROGATE
@ PG_U_FINAL_PUNCTUATION
@ PG_U_MODIFIER_LETTER
@ PG_U_OTHER_PUNCTUATION
@ PG_U_ENCLOSING_MARK

Function Documentation

◆ unicode_category()

pg_unicode_category unicode_category ( pg_wchar  ucs)

Definition at line 25 of file unicode_category.c.

26 {
27  int min = 0;
28  int mid;
29  int max = lengthof(unicode_categories) - 1;
30 
31  Assert(ucs <= 0x10ffff);
32 
33  while (max >= min)
34  {
35  mid = (min + max) / 2;
36  if (ucs > unicode_categories[mid].last)
37  min = mid + 1;
38  else if (ucs < unicode_categories[mid].first)
39  max = mid - 1;
40  else
41  return unicode_categories[mid].category;
42  }
43 
44  return PG_U_UNASSIGNED;
45 }
#define lengthof(array)
Definition: c.h:775
Assert(fmt[strlen(fmt) - 1] !='\n')
static const pg_category_range unicode_categories[3302]

References Assert(), pg_category_range::category, lengthof, PG_U_UNASSIGNED, and unicode_categories.

Referenced by main(), and unicode_assigned().

◆ unicode_category_abbrev()

const char* unicode_category_abbrev ( pg_unicode_category  category)

Definition at line 125 of file unicode_category.c.

126 {
127  switch (category)
128  {
129  case PG_U_UNASSIGNED:
130  return "Cn";
132  return "Lu";
134  return "Ll";
136  return "Lt";
138  return "Lm";
139  case PG_U_OTHER_LETTER:
140  return "Lo";
142  return "Mn";
143  case PG_U_ENCLOSING_MARK:
144  return "Me";
145  case PG_U_SPACING_MARK:
146  return "Mc";
147  case PG_U_DECIMAL_NUMBER:
148  return "Nd";
149  case PG_U_LETTER_NUMBER:
150  return "Nl";
151  case PG_U_OTHER_NUMBER:
152  return "No";
154  return "Zs";
155  case PG_U_LINE_SEPARATOR:
156  return "Zl";
158  return "Zp";
159  case PG_U_CONTROL:
160  return "Cc";
161  case PG_U_FORMAT:
162  return "Cf";
163  case PG_U_PRIVATE_USE:
164  return "Co";
165  case PG_U_SURROGATE:
166  return "Cs";
168  return "Pd";
170  return "Ps";
172  return "Pe";
174  return "Pc";
176  return "Po";
177  case PG_U_MATH_SYMBOL:
178  return "Sm";
180  return "Sc";
182  return "Sk";
183  case PG_U_OTHER_SYMBOL:
184  return "So";
186  return "Pi";
188  return "Pf";
189  }
190 
191  Assert(false);
192  return "??"; /* keep compiler quiet */
193 }

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

Referenced by main().

◆ unicode_category_string()

const char* unicode_category_string ( pg_unicode_category  category)

Definition at line 51 of file unicode_category.c.

52 {
53  switch (category)
54  {
55  case PG_U_UNASSIGNED:
56  return "Unassigned";
58  return "Uppercase_Letter";
60  return "Lowercase_Letter";
62  return "Titlecase_Letter";
64  return "Modifier_Letter";
65  case PG_U_OTHER_LETTER:
66  return "Other_Letter";
68  return "Nonspacing_Mark";
70  return "Enclosing_Mark";
71  case PG_U_SPACING_MARK:
72  return "Spacing_Mark";
74  return "Decimal_Number";
75  case PG_U_LETTER_NUMBER:
76  return "Letter_Number";
77  case PG_U_OTHER_NUMBER:
78  return "Other_Number";
80  return "Space_Separator";
82  return "Line_Separator";
84  return "Paragraph_Separator";
85  case PG_U_CONTROL:
86  return "Control";
87  case PG_U_FORMAT:
88  return "Format";
89  case PG_U_PRIVATE_USE:
90  return "Private_Use";
91  case PG_U_SURROGATE:
92  return "Surrogate";
94  return "Dash_Punctuation";
96  return "Open_Punctuation";
98  return "Close_Punctuation";
100  return "Connector_Punctuation";
102  return "Other_Punctuation";
103  case PG_U_MATH_SYMBOL:
104  return "Math_Symbol";
106  return "Currency_Symbol";
108  return "Modifier_Symbol";
109  case PG_U_OTHER_SYMBOL:
110  return "Other_Symbol";
112  return "Initial_Punctuation";
114  return "Final_Punctuation";
115  }
116 
117  Assert(false);
118  return "Unrecognized"; /* keep compiler quiet */
119 }

References Assert(), PG_U_CLOSE_PUNCTUATION, PG_U_CONNECTOR_PUNCTUATION, PG_U_CONTROL, PG_U_CURRENCY_SYMBOL, PG_U_DASH_PUNCTUATION, PG_U_DECIMAL_NUMBER, PG_U_ENCLOSING_MARK, PG_U_FINAL_PUNCTUATION, PG_U_FORMAT, PG_U_INITIAL_PUNCTUATION, PG_U_LETTER_NUMBER, PG_U_LINE_SEPARATOR, PG_U_LOWERCASE_LETTER, PG_U_MATH_SYMBOL, PG_U_MODIFIER_LETTER, PG_U_MODIFIER_SYMBOL, PG_U_NONSPACING_MARK, PG_U_OPEN_PUNCTUATION, PG_U_OTHER_LETTER, PG_U_OTHER_NUMBER, PG_U_OTHER_PUNCTUATION, PG_U_OTHER_SYMBOL, PG_U_PARAGRAPH_SEPARATOR, PG_U_PRIVATE_USE, PG_U_SPACE_SEPARATOR, PG_U_SPACING_MARK, PG_U_SURROGATE, PG_U_TITLECASE_LETTER, PG_U_UNASSIGNED, and PG_U_UPPERCASE_LETTER.

Referenced by main().