PostgreSQL Source Code git master
pg_locale_builtin.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities for builtin provider
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale_builtin.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12#include "postgres.h"
13
14#include "catalog/pg_database.h"
16#include "common/unicode_case.h"
18#include "mb/pg_wchar.h"
19#include "miscadmin.h"
20#include "utils/builtins.h"
21#include "utils/memutils.h"
22#include "utils/pg_locale.h"
23#include "utils/syscache.h"
24
26 MemoryContext context);
27extern char *get_collation_actual_version_builtin(const char *collcollate);
28extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
29 ssize_t srclen, pg_locale_t locale);
30extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
31 ssize_t srclen, pg_locale_t locale);
32extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
33 ssize_t srclen, pg_locale_t locale);
34extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
35 ssize_t srclen, pg_locale_t locale);
36
37
39{
40 const char *str;
41 size_t len;
42 size_t offset;
43 bool init;
45};
46
47/*
48 * Simple word boundary iterator that draws boundaries each time the result of
49 * pg_u_isalnum() changes.
50 */
51static size_t
53{
54 struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
55
56 while (wbstate->offset < wbstate->len &&
57 wbstate->str[wbstate->offset] != '\0')
58 {
59 pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
60 wbstate->offset);
61 bool curr_alnum = pg_u_isalnum(u, true);
62
63 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
64 {
65 size_t prev_offset = wbstate->offset;
66
67 wbstate->init = true;
68 wbstate->offset += unicode_utf8len(u);
69 wbstate->prev_alnum = curr_alnum;
70 return prev_offset;
71 }
72
73 wbstate->offset += unicode_utf8len(u);
74 }
75
76 return wbstate->len;
77}
78
79size_t
80strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
82{
83 return unicode_strlower(dest, destsize, src, srclen,
84 locale->info.builtin.casemap_full);
85}
86
87size_t
88strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
90{
91 struct WordBoundaryState wbstate = {
92 .str = src,
93 .len = srclen,
94 .offset = 0,
95 .init = false,
96 .prev_alnum = false,
97 };
98
99 return unicode_strtitle(dest, destsize, src, srclen,
100 locale->info.builtin.casemap_full,
101 initcap_wbnext, &wbstate);
102}
103
104size_t
105strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
107{
108 return unicode_strupper(dest, destsize, src, srclen,
109 locale->info.builtin.casemap_full);
110}
111
112size_t
113strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
115{
116 return unicode_strfold(dest, destsize, src, srclen,
117 locale->info.builtin.casemap_full);
118}
119
122{
123 const char *locstr;
124 pg_locale_t result;
125
126 if (collid == DEFAULT_COLLATION_OID)
127 {
128 HeapTuple tp;
129 Datum datum;
130
132 if (!HeapTupleIsValid(tp))
133 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
134 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
135 Anum_pg_database_datlocale);
136 locstr = TextDatumGetCString(datum);
137 ReleaseSysCache(tp);
138 }
139 else
140 {
141 HeapTuple tp;
142 Datum datum;
143
145 if (!HeapTupleIsValid(tp))
146 elog(ERROR, "cache lookup failed for collation %u", collid);
147 datum = SysCacheGetAttrNotNull(COLLOID, tp,
148 Anum_pg_collation_colllocale);
149 locstr = TextDatumGetCString(datum);
150 ReleaseSysCache(tp);
151 }
152
154
155 result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
156
157 result->info.builtin.locale = MemoryContextStrdup(context, locstr);
158 result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
159 result->provider = COLLPROVIDER_BUILTIN;
160 result->deterministic = true;
161 result->collate_is_c = true;
162 result->ctype_is_c = (strcmp(locstr, "C") == 0);
163
164 return result;
165}
166
167char *
169{
170 /*
171 * The only two supported locales (C and C.UTF-8) are both based on memcmp
172 * and are not expected to change, but track the version anyway.
173 *
174 * Note that the character semantics may change for some locales, but the
175 * collation version only tracks changes to sort order.
176 */
177 if (strcmp(collcollate, "C") == 0)
178 return "1";
179 else if (strcmp(collcollate, "C.UTF-8") == 0)
180 return "1";
181 else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
182 return "1";
183 else
185 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
186 errmsg("invalid locale name \"%s\" for builtin provider",
187 collcollate)));
188
189 return NULL; /* keep compiler quiet */
190}
#define TextDatumGetCString(d)
Definition: builtins.h:98
Oid collid
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
Oid MyDatabaseId
Definition: globals.c:93
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static char * locale
Definition: initdb.c:140
static pg_wchar utf8_to_unicode(const unsigned char *c)
Definition: mbprint.c:53
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1261
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1683
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1215
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1637
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
static size_t initcap_wbnext(void *state)
size_t strupper_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strlower_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
char * get_collation_actual_version_builtin(const char *collcollate)
size_t strfold_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
static int unicode_utf8len(pg_wchar c)
Definition: pg_wchar.h:607
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
unsigned int Oid
Definition: postgres_ext.h:32
struct pg_locale_struct::@158::@159 builtin
union pg_locale_struct::@158 info
const char * locale
Definition: pg_locale.h:110
bool deterministic
Definition: pg_locale.h:99
Definition: regguts.h:323
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:631
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:146
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:82
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
Definition: unicode_case.c:119
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
Definition: unicode_case.c:170
bool pg_u_isalnum(pg_wchar code, bool posix)