PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_locale_builtin.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for builtin provider
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_builtin.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include "catalog/pg_database.h"
16#include "common/unicode_case.h"
18#include "mb/pg_wchar.h"
19#include "miscadmin.h"
20#include "utils/builtins.h"
21#include "utils/memutils.h"
22#include "utils/pg_locale.h"
23#include "utils/syscache.h"
24
26 MemoryContext context);
27extern char *get_collation_actual_version_builtin(const char *collcollate);
28extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
29 ssize_t srclen, pg_locale_t locale);
30extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
31 ssize_t srclen, pg_locale_t locale);
32extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
33 ssize_t srclen, pg_locale_t locale);
34extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
35 ssize_t srclen, pg_locale_t locale);
36
37
39{
40 const char *str;
41 size_t len;
42 size_t offset;
43 bool posix;
44 bool init;
46};
47
48/*
49 * Simple word boundary iterator that draws boundaries each time the result of
50 * pg_u_isalnum() changes.
51 */
52static size_t
54{
55 struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
56
57 while (wbstate->offset < wbstate->len &&
58 wbstate->str[wbstate->offset] != '\0')
59 {
60 pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
61 wbstate->offset);
62 bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
63
64 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
65 {
66 size_t prev_offset = wbstate->offset;
67
68 wbstate->init = true;
69 wbstate->offset += unicode_utf8len(u);
70 wbstate->prev_alnum = curr_alnum;
71 return prev_offset;
72 }
73
74 wbstate->offset += unicode_utf8len(u);
75 }
76
77 return wbstate->len;
78}
79
80size_t
81strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
83{
84 return unicode_strlower(dest, destsize, src, srclen,
85 locale->info.builtin.casemap_full);
86}
87
88size_t
89strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
91{
92 struct WordBoundaryState wbstate = {
93 .str = src,
94 .len = srclen,
95 .offset = 0,
96 .posix = !locale->info.builtin.casemap_full,
97 .init = false,
98 .prev_alnum = false,
99 };
100
101 return unicode_strtitle(dest, destsize, src, srclen,
102 locale->info.builtin.casemap_full,
103 initcap_wbnext, &wbstate);
104}
105
106size_t
107strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
109{
110 return unicode_strupper(dest, destsize, src, srclen,
111 locale->info.builtin.casemap_full);
112}
113
114size_t
115strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
117{
118 return unicode_strfold(dest, destsize, src, srclen,
119 locale->info.builtin.casemap_full);
120}
121
124{
125 const char *locstr;
126 pg_locale_t result;
127
128 if (collid == DEFAULT_COLLATION_OID)
129 {
130 HeapTuple tp;
131 Datum datum;
132
134 if (!HeapTupleIsValid(tp))
135 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
136 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
137 Anum_pg_database_datlocale);
138 locstr = TextDatumGetCString(datum);
139 ReleaseSysCache(tp);
140 }
141 else
142 {
143 HeapTuple tp;
144 Datum datum;
145
147 if (!HeapTupleIsValid(tp))
148 elog(ERROR, "cache lookup failed for collation %u", collid);
149 datum = SysCacheGetAttrNotNull(COLLOID, tp,
150 Anum_pg_collation_colllocale);
151 locstr = TextDatumGetCString(datum);
152 ReleaseSysCache(tp);
153 }
154
156
157 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
158
159 result->info.builtin.locale = MemoryContextStrdup(context, locstr);
160 result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
161 result->provider = COLLPROVIDER_BUILTIN;
162 result->deterministic = true;
163 result->collate_is_c = true;
164 result->ctype_is_c = (strcmp(locstr, "C") == 0);
165
166 return result;
167}
168
169char *
171{
172 /*
173 * The only two supported locales (C and C.UTF-8) are both based on memcmp
174 * and are not expected to change, but track the version anyway.
175 *
176 * Note that the character semantics may change for some locales, but the
177 * collation version only tracks changes to sort order.
178 */
179 if (strcmp(collcollate, "C") == 0)
180 return "1";
181 else if (strcmp(collcollate, "C.UTF-8") == 0)
182 return "1";
183 else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
184 return "1";
185 else
187 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
188 errmsg("invalid locale name \"%s\" for builtin provider",
189 collcollate)));
190
191 return NULL; /* keep compiler quiet */
192}
#define TextDatumGetCString(d)
Definition: builtins.h:98
Oid collid
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
Oid MyDatabaseId
Definition: globals.c:95
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static char * locale
Definition: initdb.c:140
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:2312
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1294
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1495
size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
static size_t initcap_wbnext(void *state)
size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_builtin(const char *collcollate)
size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
static int unicode_utf8len(pg_wchar c)
Definition: pg_wchar.h:607
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
unsigned int Oid
Definition: postgres_ext.h:30
struct pg_locale_struct::@161::@162 builtin
const char * locale
Definition: pg_locale.h:110
bool deterministic
Definition: pg_locale.h:99
union pg_locale_struct::@161 info
Definition: regguts.h:323
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:631
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:165
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:101
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
Definition: unicode_case.c:138
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:189
bool pg_u_isalnum(pg_wchar code, bool posix)