PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_locale_builtin.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for builtin provider
4 *
5 * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_builtin.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include "catalog/pg_database.h"
16#include "common/unicode_case.h"
18#include "miscadmin.h"
19#include "utils/builtins.h"
20#include "utils/pg_locale.h"
21#include "utils/syscache.h"
22
24 MemoryContext context);
26
28{
29 const char *str;
30 size_t len;
31 size_t offset;
32 bool posix;
33 bool init;
35};
36
37/*
38 * In UTF-8, pg_wchar is guaranteed to be the code point value.
39 */
40static inline char32_t
42{
44 return (char32_t) wc;
45}
46
47static inline pg_wchar
49{
51 return (pg_wchar) c32;
52}
53
54/*
55 * Simple word boundary iterator that draws boundaries each time the result of
56 * pg_u_isalnum() changes.
57 */
58static size_t
60{
62
63 while (wbstate->offset < wbstate->len)
64 {
65 char32_t u = utf8_to_unicode((const unsigned char *) wbstate->str +
66 wbstate->offset);
67 bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
68
69 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
70 {
71 size_t prev_offset = wbstate->offset;
72
73 wbstate->init = true;
74 wbstate->offset += unicode_utf8len(u);
75 wbstate->prev_alnum = curr_alnum;
76 return prev_offset;
77 }
78
79 wbstate->offset += unicode_utf8len(u);
80 }
81
82 return wbstate->len;
83}
84
85static size_t
86strlower_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
87 pg_locale_t locale)
88{
89 return unicode_strlower(dest, destsize, src, srclen,
90 locale->builtin.casemap_full);
91}
92
93static size_t
94strtitle_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
95 pg_locale_t locale)
96{
97 struct WordBoundaryState wbstate = {
98 .str = src,
99 .len = srclen,
100 .offset = 0,
101 .posix = !locale->builtin.casemap_full,
102 .init = false,
103 .prev_alnum = false,
104 };
105
106 return unicode_strtitle(dest, destsize, src, srclen,
107 locale->builtin.casemap_full,
109}
110
111static size_t
112strupper_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
113 pg_locale_t locale)
114{
115 return unicode_strupper(dest, destsize, src, srclen,
116 locale->builtin.casemap_full);
117}
118
119static size_t
120strfold_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
121 pg_locale_t locale)
122{
123 return unicode_strfold(dest, destsize, src, srclen,
124 locale->builtin.casemap_full);
125}
126
127static bool
129{
130 return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
131}
132
133static bool
135{
136 return pg_u_isalpha(to_char32(wc));
137}
138
139static bool
141{
142 return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
143}
144
145static bool
147{
148 return pg_u_isupper(to_char32(wc));
149}
150
151static bool
153{
154 return pg_u_islower(to_char32(wc));
155}
156
157static bool
159{
160 return pg_u_isgraph(to_char32(wc));
161}
162
163static bool
165{
166 return pg_u_isprint(to_char32(wc));
167}
168
169static bool
171{
172 return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
173}
174
175static bool
177{
178 return pg_u_isspace(to_char32(wc));
179}
180
181static bool
183{
184 return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
185}
186
187static bool
189{
190 return pg_u_prop_cased(to_char32(wc));
191}
192
193static pg_wchar
198
199static pg_wchar
204
207 .strtitle = strtitle_builtin,
208 .strupper = strupper_builtin,
209 .strfold = strfold_builtin,
210 /* uses plain ASCII semantics for historical reasons */
211 .downcase_ident = NULL,
212 .wc_isdigit = wc_isdigit_builtin,
213 .wc_isalpha = wc_isalpha_builtin,
214 .wc_isalnum = wc_isalnum_builtin,
215 .wc_isupper = wc_isupper_builtin,
216 .wc_islower = wc_islower_builtin,
217 .wc_isgraph = wc_isgraph_builtin,
218 .wc_isprint = wc_isprint_builtin,
219 .wc_ispunct = wc_ispunct_builtin,
220 .wc_isspace = wc_isspace_builtin,
221 .wc_isxdigit = wc_isxdigit_builtin,
222 .wc_iscased = wc_iscased_builtin,
223 .wc_tolower = wc_tolower_builtin,
224 .wc_toupper = wc_toupper_builtin,
225};
226
229{
230 const char *locstr;
232
234 {
235 HeapTuple tp;
236 Datum datum;
237
239 if (!HeapTupleIsValid(tp))
240 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
244 ReleaseSysCache(tp);
245 }
246 else
247 {
248 HeapTuple tp;
249 Datum datum;
250
252 if (!HeapTupleIsValid(tp))
253 elog(ERROR, "cache lookup failed for collation %u", collid);
257 ReleaseSysCache(tp);
258 }
259
261
262 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
263
264 result->builtin.locale = MemoryContextStrdup(context, locstr);
265 result->builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
266 result->deterministic = true;
267 result->collate_is_c = true;
268 result->ctype_is_c = (strcmp(locstr, "C") == 0);
269 if (!result->ctype_is_c)
271
272 return result;
273}
274
275char *
277{
278 /*
279 * The only two supported locales (C and C.UTF-8) are both based on memcmp
280 * and are not expected to change, but track the version anyway.
281 *
282 * Note that the character semantics may change for some locales, but the
283 * collation version only tracks changes to sort order.
284 */
285 if (strcmp(collcollate, "C") == 0)
286 return "1";
287 else if (strcmp(collcollate, "C.UTF-8") == 0)
288 return "1";
289 else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
290 return "1";
291 else
294 errmsg("invalid locale name \"%s\" for builtin provider",
295 collcollate)));
296
297 return NULL; /* keep compiler quiet */
298}
#define TextDatumGetCString(d)
Definition builtins.h:99
#define Assert(condition)
Definition c.h:943
uint32 result
Oid collid
int errcode(int sqlerrcode)
Definition elog.c:875
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
Oid MyDatabaseId
Definition globals.c:96
#define HeapTupleIsValid(tuple)
Definition htup.h:78
#define PG_UTF8
Definition mbprint.c:43
unsigned int pg_wchar
Definition mbprint.c:31
static char32_t utf8_to_unicode(const unsigned char *c)
Definition mbprint.c:53
int GetDatabaseEncoding(void)
Definition mbutils.c:1389
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition mcxt.c:1897
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1269
static char * errmsg
const char * builtin_validate_locale(int encoding, const char *locale)
Definition pg_locale.c:1685
static pg_wchar wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
static pg_wchar wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
static bool wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
static size_t initcap_wbnext(void *state)
static bool wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_iscased_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
static pg_wchar to_pg_wchar(char32_t c32)
static char32_t to_char32(pg_wchar wc)
static size_t strfold_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
char * get_collation_actual_version_builtin(const char *collcollate)
static bool wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strlower_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
static bool wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
static size_t strupper_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
static bool wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
static bool wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
static const struct ctype_methods ctype_methods_builtin
static int unicode_utf8len(char32_t c)
Definition pg_wchar.h:460
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
unsigned int Oid
static int fb(int x)
size_t(* strlower)(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
Definition pg_locale.h:101
struct pg_locale_struct::@172::@174 builtin
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:265
Datum SysCacheGetAttrNotNull(SysCacheIdentifier cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition syscache.c:626
HeapTuple SearchSysCache1(SysCacheIdentifier cacheId, Datum key1)
Definition syscache.c:221
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, size_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, size_t srclen, bool full)
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, size_t srclen, bool full)
char32_t unicode_lowercase_simple(char32_t code)
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, size_t srclen, bool full)
char32_t unicode_uppercase_simple(char32_t code)
bool pg_u_isalnum(char32_t code, bool posix)
bool pg_u_prop_cased(char32_t code)
bool pg_u_isprint(char32_t code)
bool pg_u_islower(char32_t code)
bool pg_u_isdigit(char32_t code, bool posix)
bool pg_u_isalpha(char32_t code)
bool pg_u_isxdigit(char32_t code, bool posix)
bool pg_u_ispunct(char32_t code, bool posix)
bool pg_u_isgraph(char32_t code)
bool pg_u_isspace(char32_t code)
bool pg_u_isupper(char32_t code)