PostgreSQL Source Code git master
hstore_compat.c
Go to the documentation of this file.
1/*
2 * contrib/hstore/hstore_compat.c
3 *
4 * Notes on old/new hstore format disambiguation.
5 *
6 * There are three formats to consider:
7 * 1) old contrib/hstore (referred to as hstore-old)
8 * 2) prerelease pgfoundry hstore
9 * 3) new contrib/hstore
10 *
11 * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12 * bit, which is set in (3) but not (2).
13 *
14 * Values that are already in format (3), or which are
15 * unambiguously in format (2), are handled by the first
16 * "return immediately" test in hstoreUpgrade().
17 *
18 * To stress a point: we ONLY get here with possibly-ambiguous
19 * values if we're doing some sort of in-place migration from an
20 * old prerelease pgfoundry hstore-new; and we explicitly don't
21 * support that without fixing up any potentially padded values
22 * first. Most of the code here is serious overkill, but the
23 * performance penalty isn't serious (especially compared to the
24 * palloc() that we have to do anyway) and the belt-and-braces
25 * validity checks provide some reassurance. (If for some reason
26 * we get a value that would have worked on the old code, but
27 * which would be botched by the conversion code, the validity
28 * checks will fail it first so we get an error rather than bad
29 * data.)
30 *
31 * Note also that empty hstores are the same in (2) and (3), so
32 * there are some special-case paths for them.
33 *
34 * We tell the difference between formats (2) and (3) as follows (but
35 * note that there are some edge cases where we can't tell; see
36 * comments in hstoreUpgrade):
37 *
38 * First, since there must be at least one entry, we look at
39 * how the bits line up. The new format looks like:
40 *
41 * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42 * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43 *
44 * The old format looks like one of these, depending on endianness
45 * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46 * n = isnull)
47 *
48 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49 * nppppppppppppppppppppppppppppppp
50 *
51 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52 * pppppppppppppppppppppppppppppppn
53 *
54 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55 * nppppppppppppppppppppppppppppppp
56 *
57 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58 * pppppppppppppppppppppppppppppppn (usual i386 format)
59 *
60 * If the entry is in old format, for the first entry "pos" must be 0.
61 * We can obviously see that either keylen or vallen must be >32768
62 * for there to be any ambiguity (which is why lengths less than that
63 * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64 * new-format interpretation can only be 0 or 1, which constrains all
65 * but three bits of the old-format's k and v fields. But in addition
66 * to all of this, the data length implied by the keylen and vallen
67 * must fit in the varlena size. So the only ambiguous edge case for
68 * hstores with only one entry occurs between a new-format entry with
69 * an excess (~32k) of padding, and an old-format entry. But we know
70 * which format to use in that case based on how we were compiled, so
71 * no actual data corruption can occur.
72 *
73 * If there is more than one entry, the requirement that keys do not
74 * decrease in length, and that positions increase contiguously, and
75 * that the end of the data not be beyond the end of the varlena
76 * itself, disambiguates in almost all other cases. There is a small
77 * set of ambiguous cases which could occur if the old-format value
78 * has a large excess of padding and just the right pattern of key
79 * sizes, but these are also handled based on how we were compiled.
80 *
81 * The otherwise undocumented function hstore_version_diag is provided
82 * for testing purposes.
83 */
84#include "postgres.h"
85
86
87#include "hstore.h"
88
89/*
90 * This is the structure used for entries in the old contrib/hstore
91 * implementation. Notice that this is the same size as the new entry
92 * (two 32-bit words per key/value pair) and that the header is the
93 * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94 * etc. are compatible.
95 *
96 * If the above statement isn't true on some bizarre platform, we're
97 * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
98 */
99typedef struct
100{
103 uint32
105 pos:31;
106} HOldEntry;
107
108static int hstoreValidNewFormat(HStore *hs);
109static int hstoreValidOldFormat(HStore *hs);
110
111
112/*
113 * Validity test for a new-format hstore.
114 * 0 = not valid
115 * 1 = valid but with "slop" in the length
116 * 2 = exactly valid
117 */
118static int
120{
121 int count = HS_COUNT(hs);
122 HEntry *entries = ARRPTR(hs);
123 int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
124 int vsize = CALCDATASIZE(count, buflen);
125 int i;
126
127 if (hs->size_ & HS_FLAG_NEWVERSION)
128 return 2;
129
130 if (count == 0)
131 return 2;
132
133 if (!HSE_ISFIRST(entries[0]))
134 return 0;
135
136 if (vsize > VARSIZE(hs))
137 return 0;
138
139 /* entry position must be nondecreasing */
140
141 for (i = 1; i < 2 * count; ++i)
142 {
143 if (HSE_ISFIRST(entries[i])
144 || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
145 return 0;
146 }
147
148 /* key length must be nondecreasing and keys must not be null */
149
150 for (i = 1; i < count; ++i)
151 {
152 if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
153 return 0;
154 if (HSE_ISNULL(entries[2 * i]))
155 return 0;
156 }
157
158 if (vsize != VARSIZE(hs))
159 return 1;
160
161 return 2;
162}
163
164/*
165 * Validity test for an old-format hstore.
166 * 0 = not valid
167 * 1 = valid but with "slop" in the length
168 * 2 = exactly valid
169 */
170static int
172{
173 int count = hs->size_;
174 HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
175 int vsize;
176 int lastpos = 0;
177 int i;
178
179 if (hs->size_ & HS_FLAG_NEWVERSION)
180 return 0;
181
182 /* New format uses an HEntry for key and another for value */
183 StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
184 "old hstore format is not upward-compatible");
185
186 if (count == 0)
187 return 2;
188
189 if (count > 0xFFFFFFF)
190 return 0;
191
192 if (CALCDATASIZE(count, 0) > VARSIZE(hs))
193 return 0;
194
195 if (entries[0].pos != 0)
196 return 0;
197
198 /* key length must be nondecreasing */
199
200 for (i = 1; i < count; ++i)
201 {
202 if (entries[i].keylen < entries[i - 1].keylen)
203 return 0;
204 }
205
206 /*
207 * entry position must be strictly increasing, except for the first entry
208 * (which can be ""=>"" and thus zero-length); and all entries must be
209 * properly contiguous
210 */
211
212 for (i = 0; i < count; ++i)
213 {
214 if (entries[i].pos != lastpos)
215 return 0;
216 lastpos += (entries[i].keylen
217 + ((entries[i].valisnull) ? 0 : entries[i].vallen));
218 }
219
220 vsize = CALCDATASIZE(count, lastpos);
221
222 if (vsize > VARSIZE(hs))
223 return 0;
224
225 if (vsize != VARSIZE(hs))
226 return 1;
227
228 return 2;
229}
230
231
232/*
233 * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
234 */
235HStore *
237{
238 HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
239 int valid_new;
240 int valid_old;
241
242 /* Return immediately if no conversion needed */
243 if (hs->size_ & HS_FLAG_NEWVERSION)
244 return hs;
245
246 /* Do we have a writable copy? If not, make one. */
247 if ((void *) hs == (void *) DatumGetPointer(orig))
248 hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
249
250 if (hs->size_ == 0 ||
251 (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
252 {
253 HS_SETCOUNT(hs, HS_COUNT(hs));
254 HS_FIXSIZE(hs, HS_COUNT(hs));
255 return hs;
256 }
257
258 valid_new = hstoreValidNewFormat(hs);
259 valid_old = hstoreValidOldFormat(hs);
260
261 if (!valid_old || hs->size_ == 0)
262 {
263 if (valid_new)
264 {
265 /*
266 * force the "new version" flag and the correct varlena length.
267 */
268 HS_SETCOUNT(hs, HS_COUNT(hs));
269 HS_FIXSIZE(hs, HS_COUNT(hs));
270 return hs;
271 }
272 else
273 {
274 elog(ERROR, "invalid hstore value found");
275 }
276 }
277
278 /*
279 * this is the tricky edge case. It is only possible in some quite extreme
280 * cases (the hstore must have had a lot of wasted padding space at the
281 * end). But the only way a "new" hstore value could get here is if we're
282 * upgrading in place from a pre-release version of hstore-new (NOT
283 * contrib/hstore), so we work off the following assumptions: 1. If you're
284 * moving from old contrib/hstore to hstore-new, you're required to fix up
285 * any potential conflicts first, e.g. by running ALTER TABLE ... USING
286 * col::text::hstore; on all hstore columns before upgrading. 2. If you're
287 * moving from old contrib/hstore to new contrib/hstore, then "new" values
288 * are impossible here 3. If you're moving from pre-release hstore-new to
289 * hstore-new, then "old" values are impossible here 4. If you're moving
290 * from pre-release hstore-new to new contrib/hstore, you're not doing so
291 * as an in-place upgrade, so there is no issue So the upshot of all this
292 * is that we can treat all the edge cases as "new" if we're being built
293 * as hstore-new, and "old" if we're being built as contrib/hstore.
294 *
295 * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
296 * beta-tested. But for now, it would be very useful to know if anyone can
297 * actually reach this case in a non-contrived setting.
298 */
299
300 if (valid_new)
301 {
302#ifdef HSTORE_IS_HSTORE_NEW
303 elog(WARNING, "ambiguous hstore value resolved as hstore-new");
304
305 /*
306 * force the "new version" flag and the correct varlena length.
307 */
308 HS_SETCOUNT(hs, HS_COUNT(hs));
309 HS_FIXSIZE(hs, HS_COUNT(hs));
310 return hs;
311#else
312 elog(WARNING, "ambiguous hstore value resolved as hstore-old");
313#endif
314 }
315
316 /*
317 * must have an old-style value. Overwrite it in place as a new-style one.
318 */
319 {
320 int count = hs->size_;
321 HEntry *new_entries = ARRPTR(hs);
322 HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
323 int i;
324
325 for (i = 0; i < count; ++i)
326 {
327 uint32 pos = old_entries[i].pos;
328 uint32 keylen = old_entries[i].keylen;
329 uint32 vallen = old_entries[i].vallen;
330 bool isnull = old_entries[i].valisnull;
331
332 if (isnull)
333 vallen = 0;
334
335 new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
336 new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
337 | ((isnull) ? HENTRY_ISNULL : 0));
338 }
339
340 if (count)
341 new_entries[0].entry |= HENTRY_ISFIRST;
342 HS_SETCOUNT(hs, count);
343 HS_FIXSIZE(hs, count);
344 }
345
346 return hs;
347}
348
349
351Datum
353{
355 int valid_new = hstoreValidNewFormat(hs);
356 int valid_old = hstoreValidOldFormat(hs);
357
358 PG_RETURN_INT32(valid_old * 10 + valid_new);
359}
uint16_t uint16
Definition: c.h:501
uint32_t uint32
Definition: c.h:502
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:909
#define ARRPTR(x)
Definition: cube.c:25
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_DETOAST_DATUM_COPY(datum)
Definition: fmgr.h:242
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
#define HS_COUNT(hsp_)
Definition: hstore.h:61
#define HENTRY_POSMASK
Definition: hstore.h:25
#define HS_FIXSIZE(hsp_, count_)
Definition: hstore.h:143
#define HENTRY_ISFIRST
Definition: hstore.h:23
#define HSE_ISFIRST(he_)
Definition: hstore.h:28
#define HS_FLAG_NEWVERSION
Definition: hstore.h:59
#define HSE_ISNULL(he_)
Definition: hstore.h:29
#define HSTORE_KEYLEN(arr_, i_)
Definition: hstore.h:81
#define HS_SETCOUNT(hsp_, c_)
Definition: hstore.h:62
#define HENTRY_ISNULL
Definition: hstore.h:24
#define HSE_ENDPOS(he_)
Definition: hstore.h:30
PG_FUNCTION_INFO_V1(hstore_version_diag)
static int hstoreValidOldFormat(HStore *hs)
HStore * hstoreUpgrade(Datum orig)
Datum hstore_version_diag(PG_FUNCTION_ARGS)
static int hstoreValidNewFormat(HStore *hs)
int i
Definition: isn.c:74
uintptr_t Datum
Definition: postgres.h:69
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
Definition: hstore.h:19
uint32 entry
Definition: hstore.h:20
uint16 vallen
uint16 keylen
uint32 pos
uint32 valisnull
Definition: hstore.h:45
uint32 size_
Definition: hstore.h:47
#define VARSIZE(PTR)
Definition: varatt.h:279