PostgreSQL Source Code git master
hstore_compat.c
Go to the documentation of this file.
1/*
2 * contrib/hstore/hstore_compat.c
3 *
4 * Notes on old/new hstore format disambiguation.
5 *
6 * There are three formats to consider:
7 * 1) old contrib/hstore (referred to as hstore-old)
8 * 2) prerelease pgfoundry hstore
9 * 3) new contrib/hstore
10 *
11 * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12 * bit, which is set in (3) but not (2).
13 *
14 * Values that are already in format (3), or which are
15 * unambiguously in format (2), are handled by the first
16 * "return immediately" test in hstoreUpgrade().
17 *
18 * To stress a point: we ONLY get here with possibly-ambiguous
19 * values if we're doing some sort of in-place migration from an
20 * old prerelease pgfoundry hstore-new; and we explicitly don't
21 * support that without fixing up any potentially padded values
22 * first. Most of the code here is serious overkill, but the
23 * performance penalty isn't serious (especially compared to the
24 * palloc() that we have to do anyway) and the belt-and-braces
25 * validity checks provide some reassurance. (If for some reason
26 * we get a value that would have worked on the old code, but
27 * which would be botched by the conversion code, the validity
28 * checks will fail it first so we get an error rather than bad
29 * data.)
30 *
31 * Note also that empty hstores are the same in (2) and (3), so
32 * there are some special-case paths for them.
33 *
34 * We tell the difference between formats (2) and (3) as follows (but
35 * note that there are some edge cases where we can't tell; see
36 * comments in hstoreUpgrade):
37 *
38 * First, since there must be at least one entry, we look at
39 * how the bits line up. The new format looks like:
40 *
41 * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42 * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43 *
44 * The old format looks like one of these, depending on endianness
45 * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46 * n = isnull)
47 *
48 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49 * nppppppppppppppppppppppppppppppp
50 *
51 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52 * pppppppppppppppppppppppppppppppn
53 *
54 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55 * nppppppppppppppppppppppppppppppp
56 *
57 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58 * pppppppppppppppppppppppppppppppn (usual i386 format)
59 *
60 * If the entry is in old format, for the first entry "pos" must be 0.
61 * We can obviously see that either keylen or vallen must be >32768
62 * for there to be any ambiguity (which is why lengths less than that
63 * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64 * new-format interpretation can only be 0 or 1, which constrains all
65 * but three bits of the old-format's k and v fields. But in addition
66 * to all of this, the data length implied by the keylen and vallen
67 * must fit in the varlena size. So the only ambiguous edge case for
68 * hstores with only one entry occurs between a new-format entry with
69 * an excess (~32k) of padding, and an old-format entry. But we know
70 * which format to use in that case based on how we were compiled, so
71 * no actual data corruption can occur.
72 *
73 * If there is more than one entry, the requirement that keys do not
74 * decrease in length, and that positions increase contiguously, and
75 * that the end of the data not be beyond the end of the varlena
76 * itself, disambiguates in almost all other cases. There is a small
77 * set of ambiguous cases which could occur if the old-format value
78 * has a large excess of padding and just the right pattern of key
79 * sizes, but these are also handled based on how we were compiled.
80 *
81 * The otherwise undocumented function hstore_version_diag is provided
82 * for testing purposes.
83 */
84#include "postgres.h"
85
86
87#include "hstore.h"
88
89/*
90 * This is the structure used for entries in the old contrib/hstore
91 * implementation. Notice that this is the same size as the new entry
92 * (two 32-bit words per key/value pair) and that the header is the
93 * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94 * etc. are compatible.
95 *
96 * If the above statement isn't true on some bizarre platform, we're
97 * a bit hosed.
98 */
99typedef struct
100{
103 uint32
105 pos:31;
106} HOldEntry;
107
108StaticAssertDecl(sizeof(HOldEntry) == 2 * sizeof(HEntry),
109 "old hstore format is not upward-compatible");
110
111static int hstoreValidNewFormat(HStore *hs);
112static int hstoreValidOldFormat(HStore *hs);
113
114
115/*
116 * Validity test for a new-format hstore.
117 * 0 = not valid
118 * 1 = valid but with "slop" in the length
119 * 2 = exactly valid
120 */
121static int
123{
124 int count = HS_COUNT(hs);
125 HEntry *entries = ARRPTR(hs);
126 int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
127 int vsize = CALCDATASIZE(count, buflen);
128 int i;
129
130 if (hs->size_ & HS_FLAG_NEWVERSION)
131 return 2;
132
133 if (count == 0)
134 return 2;
135
136 if (!HSE_ISFIRST(entries[0]))
137 return 0;
138
139 if (vsize > VARSIZE(hs))
140 return 0;
141
142 /* entry position must be nondecreasing */
143
144 for (i = 1; i < 2 * count; ++i)
145 {
146 if (HSE_ISFIRST(entries[i])
147 || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
148 return 0;
149 }
150
151 /* key length must be nondecreasing and keys must not be null */
152
153 for (i = 1; i < count; ++i)
154 {
155 if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
156 return 0;
157 if (HSE_ISNULL(entries[2 * i]))
158 return 0;
159 }
160
161 if (vsize != VARSIZE(hs))
162 return 1;
163
164 return 2;
165}
166
167/*
168 * Validity test for an old-format hstore.
169 * 0 = not valid
170 * 1 = valid but with "slop" in the length
171 * 2 = exactly valid
172 */
173static int
175{
176 int count = hs->size_;
177 HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
178 int vsize;
179 int lastpos = 0;
180 int i;
181
182 if (hs->size_ & HS_FLAG_NEWVERSION)
183 return 0;
184
185 if (count == 0)
186 return 2;
187
188 if (count > 0xFFFFFFF)
189 return 0;
190
191 if (CALCDATASIZE(count, 0) > VARSIZE(hs))
192 return 0;
193
194 if (entries[0].pos != 0)
195 return 0;
196
197 /* key length must be nondecreasing */
198
199 for (i = 1; i < count; ++i)
200 {
201 if (entries[i].keylen < entries[i - 1].keylen)
202 return 0;
203 }
204
205 /*
206 * entry position must be strictly increasing, except for the first entry
207 * (which can be ""=>"" and thus zero-length); and all entries must be
208 * properly contiguous
209 */
210
211 for (i = 0; i < count; ++i)
212 {
213 if (entries[i].pos != lastpos)
214 return 0;
215 lastpos += (entries[i].keylen
216 + ((entries[i].valisnull) ? 0 : entries[i].vallen));
217 }
218
219 vsize = CALCDATASIZE(count, lastpos);
220
221 if (vsize > VARSIZE(hs))
222 return 0;
223
224 if (vsize != VARSIZE(hs))
225 return 1;
226
227 return 2;
228}
229
230
231/*
232 * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
233 */
234HStore *
236{
237 HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
238 int valid_new;
239 int valid_old;
240
241 /* Return immediately if no conversion needed */
242 if (hs->size_ & HS_FLAG_NEWVERSION)
243 return hs;
244
245 /* Do we have a writable copy? If not, make one. */
246 if ((void *) hs == (void *) DatumGetPointer(orig))
247 hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
248
249 if (hs->size_ == 0 ||
250 (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
251 {
252 HS_SETCOUNT(hs, HS_COUNT(hs));
253 HS_FIXSIZE(hs, HS_COUNT(hs));
254 return hs;
255 }
256
257 valid_new = hstoreValidNewFormat(hs);
258 valid_old = hstoreValidOldFormat(hs);
259
260 if (!valid_old || hs->size_ == 0)
261 {
262 if (valid_new)
263 {
264 /*
265 * force the "new version" flag and the correct varlena length.
266 */
267 HS_SETCOUNT(hs, HS_COUNT(hs));
268 HS_FIXSIZE(hs, HS_COUNT(hs));
269 return hs;
270 }
271 else
272 {
273 elog(ERROR, "invalid hstore value found");
274 }
275 }
276
277 /*
278 * this is the tricky edge case. It is only possible in some quite extreme
279 * cases (the hstore must have had a lot of wasted padding space at the
280 * end). But the only way a "new" hstore value could get here is if we're
281 * upgrading in place from a pre-release version of hstore-new (NOT
282 * contrib/hstore), so we work off the following assumptions: 1. If you're
283 * moving from old contrib/hstore to hstore-new, you're required to fix up
284 * any potential conflicts first, e.g. by running ALTER TABLE ... USING
285 * col::text::hstore; on all hstore columns before upgrading. 2. If you're
286 * moving from old contrib/hstore to new contrib/hstore, then "new" values
287 * are impossible here 3. If you're moving from pre-release hstore-new to
288 * hstore-new, then "old" values are impossible here 4. If you're moving
289 * from pre-release hstore-new to new contrib/hstore, you're not doing so
290 * as an in-place upgrade, so there is no issue So the upshot of all this
291 * is that we can treat all the edge cases as "new" if we're being built
292 * as hstore-new, and "old" if we're being built as contrib/hstore.
293 *
294 * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
295 * beta-tested. But for now, it would be very useful to know if anyone can
296 * actually reach this case in a non-contrived setting.
297 */
298
299 if (valid_new)
300 {
301#ifdef HSTORE_IS_HSTORE_NEW
302 elog(WARNING, "ambiguous hstore value resolved as hstore-new");
303
304 /*
305 * force the "new version" flag and the correct varlena length.
306 */
307 HS_SETCOUNT(hs, HS_COUNT(hs));
308 HS_FIXSIZE(hs, HS_COUNT(hs));
309 return hs;
310#else
311 elog(WARNING, "ambiguous hstore value resolved as hstore-old");
312#endif
313 }
314
315 /*
316 * must have an old-style value. Overwrite it in place as a new-style one.
317 */
318 {
319 int count = hs->size_;
320 HEntry *new_entries = ARRPTR(hs);
321 HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
322 int i;
323
324 for (i = 0; i < count; ++i)
325 {
326 uint32 pos = old_entries[i].pos;
327 uint32 keylen = old_entries[i].keylen;
328 uint32 vallen = old_entries[i].vallen;
329 bool isnull = old_entries[i].valisnull;
330
331 if (isnull)
332 vallen = 0;
333
334 new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
335 new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
336 | ((isnull) ? HENTRY_ISNULL : 0));
337 }
338
339 if (count)
340 new_entries[0].entry |= HENTRY_ISFIRST;
341 HS_SETCOUNT(hs, count);
342 HS_FIXSIZE(hs, count);
343 }
344
345 return hs;
346}
347
348
350Datum
352{
354 int valid_new = hstoreValidNewFormat(hs);
355 int valid_old = hstoreValidOldFormat(hs);
356
357 PG_RETURN_INT32(valid_old * 10 + valid_new);
358}
uint16_t uint16
Definition: c.h:551
uint32_t uint32
Definition: c.h:552
#define ARRPTR(x)
Definition: cube.c:28
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define PG_DETOAST_DATUM_COPY(datum)
Definition: fmgr.h:242
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
#define HS_COUNT(hsp_)
Definition: hstore.h:61
#define HENTRY_POSMASK
Definition: hstore.h:25
#define HS_FIXSIZE(hsp_, count_)
Definition: hstore.h:143
#define HENTRY_ISFIRST
Definition: hstore.h:23
#define HSE_ISFIRST(he_)
Definition: hstore.h:28
#define HS_FLAG_NEWVERSION
Definition: hstore.h:59
#define HSE_ISNULL(he_)
Definition: hstore.h:29
#define HSTORE_KEYLEN(arr_, i_)
Definition: hstore.h:81
#define HS_SETCOUNT(hsp_, c_)
Definition: hstore.h:62
#define HENTRY_ISNULL
Definition: hstore.h:24
#define HSE_ENDPOS(he_)
Definition: hstore.h:30
PG_FUNCTION_INFO_V1(hstore_version_diag)
static int hstoreValidOldFormat(HStore *hs)
HStore * hstoreUpgrade(Datum orig)
StaticAssertDecl(sizeof(HOldEntry)==2 *sizeof(HEntry), "old hstore format is not upward-compatible")
Datum hstore_version_diag(PG_FUNCTION_ARGS)
static int hstoreValidNewFormat(HStore *hs)
int i
Definition: isn.c:77
uint64_t Datum
Definition: postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
Definition: hstore.h:19
uint32 entry
Definition: hstore.h:20
uint16 vallen
uint16 keylen
uint32 pos
uint32 valisnull
Definition: hstore.h:45
uint32 size_
Definition: hstore.h:47
static Size VARSIZE(const void *PTR)
Definition: varatt.h:298