PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
hstore_compat.c
Go to the documentation of this file.
1 /*
2  * contrib/hstore/hstore_compat.c
3  *
4  * Notes on old/new hstore format disambiguation.
5  *
6  * There are three formats to consider:
7  * 1) old contrib/hstore (referred to as hstore-old)
8  * 2) prerelease pgfoundry hstore
9  * 3) new contrib/hstore
10  *
11  * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12  * bit, which is set in (3) but not (2).
13  *
14  * Values that are already in format (3), or which are
15  * unambiguously in format (2), are handled by the first
16  * "return immediately" test in hstoreUpgrade().
17  *
18  * To stress a point: we ONLY get here with possibly-ambiguous
19  * values if we're doing some sort of in-place migration from an
20  * old prerelease pgfoundry hstore-new; and we explicitly don't
21  * support that without fixing up any potentially padded values
22  * first. Most of the code here is serious overkill, but the
23  * performance penalty isn't serious (especially compared to the
24  * palloc() that we have to do anyway) and the belt-and-braces
25  * validity checks provide some reassurance. (If for some reason
26  * we get a value that would have worked on the old code, but
27  * which would be botched by the conversion code, the validity
28  * checks will fail it first so we get an error rather than bad
29  * data.)
30  *
31  * Note also that empty hstores are the same in (2) and (3), so
32  * there are some special-case paths for them.
33  *
34  * We tell the difference between formats (2) and (3) as follows (but
35  * note that there are some edge cases where we can't tell; see
36  * comments in hstoreUpgrade):
37  *
38  * First, since there must be at least one entry, we look at
39  * how the bits line up. The new format looks like:
40  *
41  * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42  * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43  *
44  * The old format looks like one of these, depending on endianness
45  * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46  * n = isnull)
47  *
48  * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49  * nppppppppppppppppppppppppppppppp
50  *
51  * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52  * pppppppppppppppppppppppppppppppn
53  *
54  * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55  * nppppppppppppppppppppppppppppppp
56  *
57  * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58  * pppppppppppppppppppppppppppppppn (usual i386 format)
59  *
60  * If the entry is in old format, for the first entry "pos" must be 0.
61  * We can obviously see that either keylen or vallen must be >32768
62  * for there to be any ambiguity (which is why lengths less than that
63  * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64  * new-format interpretation can only be 0 or 1, which constrains all
65  * but three bits of the old-format's k and v fields. But in addition
66  * to all of this, the data length implied by the keylen and vallen
67  * must fit in the varlena size. So the only ambiguous edge case for
68  * hstores with only one entry occurs between a new-format entry with
69  * an excess (~32k) of padding, and an old-format entry. But we know
70  * which format to use in that case based on how we were compiled, so
71  * no actual data corruption can occur.
72  *
73  * If there is more than one entry, the requirement that keys do not
74  * decrease in length, and that positions increase contiguously, and
75  * that the end of the data not be beyond the end of the varlena
76  * itself, disambiguates in almost all other cases. There is a small
77  * set of ambiguous cases which could occur if the old-format value
78  * has a large excess of padding and just the right pattern of key
79  * sizes, but these are also handled based on how we were compiled.
80  *
81  * The otherwise undocumented function hstore_version_diag is provided
82  * for testing purposes.
83  */
84 #include "postgres.h"
85 
86 
87 #include "hstore.h"
88 
89 /*
90  * This is the structure used for entries in the old contrib/hstore
91  * implementation. Notice that this is the same size as the new entry
92  * (two 32-bit words per key/value pair) and that the header is the
93  * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94  * etc. are compatible.
95  *
96  * If the above statement isn't true on some bizarre platform, we're
97  * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
98  */
99 typedef struct
100 {
103  uint32
104  valisnull:1,
105  pos:31;
106 } HOldEntry;
107 
108 static int hstoreValidNewFormat(HStore *hs);
109 static int hstoreValidOldFormat(HStore *hs);
110 
111 
112 /*
113  * Validity test for a new-format hstore.
114  * 0 = not valid
115  * 1 = valid but with "slop" in the length
116  * 2 = exactly valid
117  */
118 static int
120 {
121  int count = HS_COUNT(hs);
122  HEntry *entries = ARRPTR(hs);
123  int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
124  int vsize = CALCDATASIZE(count, buflen);
125  int i;
126 
127  if (hs->size_ & HS_FLAG_NEWVERSION)
128  return 2;
129 
130  if (count == 0)
131  return 2;
132 
133  if (!HSE_ISFIRST(entries[0]))
134  return 0;
135 
136  if (vsize > VARSIZE(hs))
137  return 0;
138 
139  /* entry position must be nondecreasing */
140 
141  for (i = 1; i < 2 * count; ++i)
142  {
143  if (HSE_ISFIRST(entries[i])
144  || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
145  return 0;
146  }
147 
148  /* key length must be nondecreasing and keys must not be null */
149 
150  for (i = 1; i < count; ++i)
151  {
152  if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
153  return 0;
154  if (HSE_ISNULL(entries[2 * i]))
155  return 0;
156  }
157 
158  if (vsize != VARSIZE(hs))
159  return 1;
160 
161  return 2;
162 }
163 
164 /*
165  * Validity test for an old-format hstore.
166  * 0 = not valid
167  * 1 = valid but with "slop" in the length
168  * 2 = exactly valid
169  */
170 static int
172 {
173  int count = hs->size_;
174  HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
175  int vsize;
176  int lastpos = 0;
177  int i;
178 
179  if (hs->size_ & HS_FLAG_NEWVERSION)
180  return 0;
181 
182  /* New format uses an HEntry for key and another for value */
183  StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
184  "old hstore format is not upward-compatible");
185 
186  if (count == 0)
187  return 2;
188 
189  if (count > 0xFFFFFFF)
190  return 0;
191 
192  if (CALCDATASIZE(count, 0) > VARSIZE(hs))
193  return 0;
194 
195  if (entries[0].pos != 0)
196  return 0;
197 
198  /* key length must be nondecreasing */
199 
200  for (i = 1; i < count; ++i)
201  {
202  if (entries[i].keylen < entries[i - 1].keylen)
203  return 0;
204  }
205 
206  /*
207  * entry position must be strictly increasing, except for the first entry
208  * (which can be ""=>"" and thus zero-length); and all entries must be
209  * properly contiguous
210  */
211 
212  for (i = 0; i < count; ++i)
213  {
214  if (entries[i].pos != lastpos)
215  return 0;
216  lastpos += (entries[i].keylen
217  + ((entries[i].valisnull) ? 0 : entries[i].vallen));
218  }
219 
220  vsize = CALCDATASIZE(count, lastpos);
221 
222  if (vsize > VARSIZE(hs))
223  return 0;
224 
225  if (vsize != VARSIZE(hs))
226  return 1;
227 
228  return 2;
229 }
230 
231 
232 /*
233  * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
234  */
235 HStore *
237 {
238  HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
239  int valid_new;
240  int valid_old;
241  bool writable;
242 
243  /* Return immediately if no conversion needed */
244  if ((hs->size_ & HS_FLAG_NEWVERSION) ||
245  hs->size_ == 0 ||
246  (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
247  return hs;
248 
249  valid_new = hstoreValidNewFormat(hs);
250  valid_old = hstoreValidOldFormat(hs);
251  /* Do we have a writable copy? */
252  writable = ((void *) hs != (void *) DatumGetPointer(orig));
253 
254  if (!valid_old || hs->size_ == 0)
255  {
256  if (valid_new)
257  {
258  /*
259  * force the "new version" flag and the correct varlena length,
260  * but only if we have a writable copy already (which we almost
261  * always will, since short new-format values won't come through
262  * here)
263  */
264  if (writable)
265  {
266  HS_SETCOUNT(hs, HS_COUNT(hs));
267  HS_FIXSIZE(hs, HS_COUNT(hs));
268  }
269  return hs;
270  }
271  else
272  {
273  elog(ERROR, "invalid hstore value found");
274  }
275  }
276 
277  /*
278  * this is the tricky edge case. It is only possible in some quite extreme
279  * cases (the hstore must have had a lot of wasted padding space at the
280  * end). But the only way a "new" hstore value could get here is if we're
281  * upgrading in place from a pre-release version of hstore-new (NOT
282  * contrib/hstore), so we work off the following assumptions: 1. If you're
283  * moving from old contrib/hstore to hstore-new, you're required to fix up
284  * any potential conflicts first, e.g. by running ALTER TABLE ... USING
285  * col::text::hstore; on all hstore columns before upgrading. 2. If you're
286  * moving from old contrib/hstore to new contrib/hstore, then "new" values
287  * are impossible here 3. If you're moving from pre-release hstore-new to
288  * hstore-new, then "old" values are impossible here 4. If you're moving
289  * from pre-release hstore-new to new contrib/hstore, you're not doing so
290  * as an in-place upgrade, so there is no issue So the upshot of all this
291  * is that we can treat all the edge cases as "new" if we're being built
292  * as hstore-new, and "old" if we're being built as contrib/hstore.
293  *
294  * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
295  * beta-tested. But for now, it would be very useful to know if anyone can
296  * actually reach this case in a non-contrived setting.
297  */
298 
299  if (valid_new)
300  {
301 #if HSTORE_IS_HSTORE_NEW
302  elog(WARNING, "ambiguous hstore value resolved as hstore-new");
303 
304  /*
305  * force the "new version" flag and the correct varlena length, but
306  * only if we have a writable copy already (which we almost always
307  * will, since short new-format values won't come through here)
308  */
309  if (writable)
310  {
311  HS_SETCOUNT(hs, HS_COUNT(hs));
312  HS_FIXSIZE(hs, HS_COUNT(hs));
313  }
314  return hs;
315 #else
316  elog(WARNING, "ambiguous hstore value resolved as hstore-old");
317 #endif
318  }
319 
320  /*
321  * must have an old-style value. Overwrite it in place as a new-style one,
322  * making sure we have a writable copy first.
323  */
324 
325  if (!writable)
326  hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
327 
328  {
329  int count = hs->size_;
330  HEntry *new_entries = ARRPTR(hs);
331  HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
332  int i;
333 
334  for (i = 0; i < count; ++i)
335  {
336  uint32 pos = old_entries[i].pos;
337  uint32 keylen = old_entries[i].keylen;
338  uint32 vallen = old_entries[i].vallen;
339  bool isnull = old_entries[i].valisnull;
340 
341  if (isnull)
342  vallen = 0;
343 
344  new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
345  new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
346  | ((isnull) ? HENTRY_ISNULL : 0));
347  }
348 
349  if (count)
350  new_entries[0].entry |= HENTRY_ISFIRST;
351  HS_SETCOUNT(hs, count);
352  HS_FIXSIZE(hs, count);
353  }
354 
355  return hs;
356 }
357 
358 
360 Datum
362 {
364  int valid_new = hstoreValidNewFormat(hs);
365  int valid_old = hstoreValidOldFormat(hs);
366 
367  PG_RETURN_INT32(valid_old * 10 + valid_new);
368 }
#define HSE_ENDPOS(he_)
Definition: hstore.h:30
Definition: hstore.h:44
#define HENTRY_POSMASK
Definition: hstore.h:25
#define PG_DETOAST_DATUM_COPY(datum)
Definition: fmgr.h:207
#define VARSIZE(PTR)
Definition: postgres.h:304
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
#define HSTORE_KEYLEN(arr_, i_)
Definition: hstore.h:81
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
#define HSE_ISFIRST(he_)
Definition: hstore.h:28
#define HENTRY_ISNULL
Definition: hstore.h:24
#define HSE_ISNULL(he_)
Definition: hstore.h:29
static int hstoreValidNewFormat(HStore *hs)
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:746
PG_FUNCTION_INFO_V1(hstore_version_diag)
unsigned short uint16
Definition: c.h:257
uint16 keylen
#define ERROR
Definition: elog.h:43
#define HS_FLAG_NEWVERSION
Definition: hstore.h:59
Datum hstore_version_diag(PG_FUNCTION_ARGS)
#define HS_COUNT(hsp_)
Definition: hstore.h:61
uint16 vallen
uint32 entry
Definition: hstore.h:20
unsigned int uint32
Definition: c.h:258
HStore * hstoreUpgrade(Datum orig)
#define WARNING
Definition: elog.h:40
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
static int hstoreValidOldFormat(HStore *hs)
uintptr_t Datum
Definition: postgres.h:372
#define HENTRY_ISFIRST
Definition: hstore.h:23
uint32 valisnull
uint32 pos
Definition: hstore.h:18
#define DatumGetPointer(X)
Definition: postgres.h:555
uint32 size_
Definition: hstore.h:47
int i
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:205
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define ARRPTR(x)
Definition: cube.c:26
#define elog
Definition: elog.h:219
#define HS_SETCOUNT(hsp_, c_)
Definition: hstore.h:62
#define HS_FIXSIZE(hsp_, count_)
Definition: hstore.h:143