PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
gistutil.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * gistutil.c
4 * utilities routines for the postgres GiST index access method.
5 *
6 *
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/gist/gistutil.c
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include <math.h>
17
18#include "access/gist_private.h"
19#include "access/htup_details.h"
20#include "access/reloptions.h"
21#include "common/pg_prng.h"
22#include "storage/indexfsm.h"
23#include "utils/float.h"
24#include "utils/fmgrprotos.h"
25#include "utils/lsyscache.h"
26#include "utils/rel.h"
27#include "utils/snapmgr.h"
28#include "utils/syscache.h"
29
30/*
31 * Write itup vector to page, has no control of free space.
32 */
33void
35{
36 int i;
37
38 if (off == InvalidOffsetNumber)
39 off = (PageIsEmpty(page)) ? FirstOffsetNumber :
41
42 for (i = 0; i < len; i++)
43 {
44 Size sz = IndexTupleSize(itup[i]);
46
47 l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
48 if (l == InvalidOffsetNumber)
49 elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
50 i, len, (int) sz);
51 off++;
52 }
53}
54
55/*
56 * Check space for itup vector on page
57 */
58bool
59gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
60{
61 unsigned int size = freespace,
62 deleted = 0;
63 int i;
64
65 for (i = 0; i < len; i++)
66 size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
67
68 if (todelete != InvalidOffsetNumber)
69 {
70 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
71
72 deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
73 }
74
75 return (PageGetFreeSpace(page) + deleted < size);
76}
77
78bool
80{
81 int i;
82 Size size = 0;
83
84 for (i = 0; i < len; i++)
85 size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
86
87 /* TODO: Consider fillfactor */
88 return (size <= GiSTPageSize);
89}
90
91/*
92 * Read buffer into itup vector
93 */
95gistextractpage(Page page, int *len /* out */ )
96{
98 maxoff;
99 IndexTuple *itvec;
100
101 maxoff = PageGetMaxOffsetNumber(page);
102 *len = maxoff;
103 itvec = palloc(sizeof(IndexTuple) * maxoff);
104 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
105 itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
106
107 return itvec;
108}
109
110/*
111 * join two vectors into one
112 */
114gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
115{
116 itvec = (IndexTuple *) repalloc(itvec, sizeof(IndexTuple) * ((*len) + addlen));
117 memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
118 *len += addlen;
119 return itvec;
120}
121
122/*
123 * make plain IndexTuple vector
124 */
125
127gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
128{
129 char *ptr,
130 *ret;
131 int i;
132
133 *memlen = 0;
134
135 for (i = 0; i < veclen; i++)
136 *memlen += IndexTupleSize(vec[i]);
137
138 ptr = ret = palloc(*memlen);
139
140 for (i = 0; i < veclen; i++)
141 {
142 memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
143 ptr += IndexTupleSize(vec[i]);
144 }
145
146 return (IndexTupleData *) ret;
147}
148
149/*
150 * Make unions of keys in IndexTuple vector (one union datum per index column).
151 * Union Datums are returned into the attr/isnull arrays.
152 * Resulting Datums aren't compressed.
153 */
154void
156 Datum *attr, bool *isnull)
157{
158 int i;
159 GistEntryVector *evec;
160 int attrsize;
161
162 evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
163
164 for (i = 0; i < giststate->nonLeafTupdesc->natts; i++)
165 {
166 int j;
167
168 /* Collect non-null datums for this column */
169 evec->n = 0;
170 for (j = 0; j < len; j++)
171 {
172 Datum datum;
173 bool IsNull;
174
175 datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
176 &IsNull);
177 if (IsNull)
178 continue;
179
180 gistdentryinit(giststate, i,
181 evec->vector + evec->n,
182 datum,
183 NULL, NULL, (OffsetNumber) 0,
184 false, IsNull);
185 evec->n++;
186 }
187
188 /* If this column was all NULLs, the union is NULL */
189 if (evec->n == 0)
190 {
191 attr[i] = (Datum) 0;
192 isnull[i] = true;
193 }
194 else
195 {
196 if (evec->n == 1)
197 {
198 /* unionFn may expect at least two inputs */
199 evec->n = 2;
200 evec->vector[1] = evec->vector[0];
201 }
202
203 /* Make union and store in attr array */
204 attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
205 giststate->supportCollation[i],
206 PointerGetDatum(evec),
207 PointerGetDatum(&attrsize));
208
209 isnull[i] = false;
210 }
211 }
212}
213
214/*
215 * Return an IndexTuple containing the result of applying the "union"
216 * method to the specified IndexTuple vector.
217 */
219gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
220{
221 Datum attr[INDEX_MAX_KEYS];
222 bool isnull[INDEX_MAX_KEYS];
223
224 gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
225
226 return gistFormTuple(giststate, r, attr, isnull, false);
227}
228
229/*
230 * makes union of two key
231 */
232void
233gistMakeUnionKey(GISTSTATE *giststate, int attno,
234 GISTENTRY *entry1, bool isnull1,
235 GISTENTRY *entry2, bool isnull2,
236 Datum *dst, bool *dstisnull)
237{
238 /* we need a GistEntryVector with room for exactly 2 elements */
239 union
240 {
241 GistEntryVector gev;
242 char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
243 } storage;
244 GistEntryVector *evec = &storage.gev;
245 int dstsize;
246
247 evec->n = 2;
248
249 if (isnull1 && isnull2)
250 {
251 *dstisnull = true;
252 *dst = (Datum) 0;
253 }
254 else
255 {
256 if (isnull1 == false && isnull2 == false)
257 {
258 evec->vector[0] = *entry1;
259 evec->vector[1] = *entry2;
260 }
261 else if (isnull1 == false)
262 {
263 evec->vector[0] = *entry1;
264 evec->vector[1] = *entry1;
265 }
266 else
267 {
268 evec->vector[0] = *entry2;
269 evec->vector[1] = *entry2;
270 }
271
272 *dstisnull = false;
273 *dst = FunctionCall2Coll(&giststate->unionFn[attno],
274 giststate->supportCollation[attno],
275 PointerGetDatum(evec),
276 PointerGetDatum(&dstsize));
277 }
278}
279
280bool
281gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
282{
283 bool result = false; /* silence compiler warning */
284
285 FunctionCall3Coll(&giststate->equalFn[attno],
286 giststate->supportCollation[attno],
287 a, b,
288 PointerGetDatum(&result));
289 return result;
290}
291
292/*
293 * Decompress all keys in tuple
294 */
295void
297 OffsetNumber o, GISTENTRY *attdata, bool *isnull)
298{
299 int i;
300
301 for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
302 {
303 Datum datum;
304
305 datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
306 gistdentryinit(giststate, i, &attdata[i],
307 datum, r, p, o,
308 false, isnull[i]);
309 }
310}
311
312/*
313 * Forms union of oldtup and addtup, if union == oldtup then return NULL
314 */
317{
318 bool neednew = false;
319 GISTENTRY oldentries[INDEX_MAX_KEYS],
320 addentries[INDEX_MAX_KEYS];
321 bool oldisnull[INDEX_MAX_KEYS],
322 addisnull[INDEX_MAX_KEYS];
323 Datum attr[INDEX_MAX_KEYS];
324 bool isnull[INDEX_MAX_KEYS];
325 IndexTuple newtup = NULL;
326 int i;
327
328 gistDeCompressAtt(giststate, r, oldtup, NULL,
329 (OffsetNumber) 0, oldentries, oldisnull);
330
331 gistDeCompressAtt(giststate, r, addtup, NULL,
332 (OffsetNumber) 0, addentries, addisnull);
333
334 for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
335 {
336 gistMakeUnionKey(giststate, i,
337 oldentries + i, oldisnull[i],
338 addentries + i, addisnull[i],
339 attr + i, isnull + i);
340
341 if (neednew)
342 /* we already need new key, so we can skip check */
343 continue;
344
345 if (isnull[i])
346 /* union of key may be NULL if and only if both keys are NULL */
347 continue;
348
349 if (!addisnull[i])
350 {
351 if (oldisnull[i] ||
352 !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
353 neednew = true;
354 }
355 }
356
357 if (neednew)
358 {
359 /* need to update key */
360 newtup = gistFormTuple(giststate, r, attr, isnull, false);
361 newtup->t_tid = oldtup->t_tid;
362 }
363
364 return newtup;
365}
366
367/*
368 * Search an upper index page for the entry with lowest penalty for insertion
369 * of the new index key contained in "it".
370 *
371 * Returns the index of the page entry to insert into.
372 */
374gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
375 GISTSTATE *giststate)
376{
377 OffsetNumber result;
378 OffsetNumber maxoff;
380 float best_penalty[INDEX_MAX_KEYS];
381 GISTENTRY entry,
382 identry[INDEX_MAX_KEYS];
383 bool isnull[INDEX_MAX_KEYS];
384 int keep_current_best;
385
387
388 gistDeCompressAtt(giststate, r,
389 it, NULL, (OffsetNumber) 0,
390 identry, isnull);
391
392 /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
393 result = FirstOffsetNumber;
394
395 /*
396 * The index may have multiple columns, and there's a penalty value for
397 * each column. The penalty associated with a column that appears earlier
398 * in the index definition is strictly more important than the penalty of
399 * a column that appears later in the index definition.
400 *
401 * best_penalty[j] is the best penalty we have seen so far for column j,
402 * or -1 when we haven't yet examined column j. Array entries to the
403 * right of the first -1 are undefined.
404 */
405 best_penalty[0] = -1;
406
407 /*
408 * If we find a tuple that's exactly as good as the currently best one, we
409 * could use either one. When inserting a lot of tuples with the same or
410 * similar keys, it's preferable to descend down the same path when
411 * possible, as that's more cache-friendly. On the other hand, if all
412 * inserts land on the same leaf page after a split, we're never going to
413 * insert anything to the other half of the split, and will end up using
414 * only 50% of the available space. Distributing the inserts evenly would
415 * lead to better space usage, but that hurts cache-locality during
416 * insertion. To get the best of both worlds, when we find a tuple that's
417 * exactly as good as the previous best, choose randomly whether to stick
418 * to the old best, or use the new one. Once we decide to stick to the
419 * old best, we keep sticking to it for any subsequent equally good tuples
420 * we might find. This favors tuples with low offsets, but still allows
421 * some inserts to go to other equally-good subtrees.
422 *
423 * keep_current_best is -1 if we haven't yet had to make a random choice
424 * whether to keep the current best tuple. If we have done so, and
425 * decided to keep it, keep_current_best is 1; if we've decided to
426 * replace, keep_current_best is 0. (This state will be reset to -1 as
427 * soon as we've made the replacement, but sometimes we make the choice in
428 * advance of actually finding a replacement best tuple.)
429 */
430 keep_current_best = -1;
431
432 /*
433 * Loop over tuples on page.
434 */
435 maxoff = PageGetMaxOffsetNumber(p);
436 Assert(maxoff >= FirstOffsetNumber);
437
438 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
439 {
441 bool zero_penalty;
442 int j;
443
444 zero_penalty = true;
445
446 /* Loop over index attributes. */
447 for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
448 {
449 Datum datum;
450 float usize;
451 bool IsNull;
452
453 /* Compute penalty for this column. */
454 datum = index_getattr(itup, j + 1, giststate->leafTupdesc,
455 &IsNull);
456 gistdentryinit(giststate, j, &entry, datum, r, p, i,
457 false, IsNull);
458 usize = gistpenalty(giststate, j, &entry, IsNull,
459 &identry[j], isnull[j]);
460 if (usize > 0)
461 zero_penalty = false;
462
463 if (best_penalty[j] < 0 || usize < best_penalty[j])
464 {
465 /*
466 * New best penalty for column. Tentatively select this tuple
467 * as the target, and record the best penalty. Then reset the
468 * next column's penalty to "unknown" (and indirectly, the
469 * same for all the ones to its right). This will force us to
470 * adopt this tuple's penalty values as the best for all the
471 * remaining columns during subsequent loop iterations.
472 */
473 result = i;
474 best_penalty[j] = usize;
475
477 best_penalty[j + 1] = -1;
478
479 /* we have new best, so reset keep-it decision */
480 keep_current_best = -1;
481 }
482 else if (best_penalty[j] == usize)
483 {
484 /*
485 * The current tuple is exactly as good for this column as the
486 * best tuple seen so far. The next iteration of this loop
487 * will compare the next column.
488 */
489 }
490 else
491 {
492 /*
493 * The current tuple is worse for this column than the best
494 * tuple seen so far. Skip the remaining columns and move on
495 * to the next tuple, if any.
496 */
497 zero_penalty = false; /* so outer loop won't exit */
498 break;
499 }
500 }
501
502 /*
503 * If we looped past the last column, and did not update "result",
504 * then this tuple is exactly as good as the prior best tuple.
505 */
506 if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
507 {
508 if (keep_current_best == -1)
509 {
510 /* we didn't make the random choice yet for this old best */
511 keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
512 }
513 if (keep_current_best == 0)
514 {
515 /* we choose to use the new tuple */
516 result = i;
517 /* choose again if there are even more exactly-as-good ones */
518 keep_current_best = -1;
519 }
520 }
521
522 /*
523 * If we find a tuple with zero penalty for all columns, and we've
524 * decided we don't want to search for another tuple with equal
525 * penalty, there's no need to examine remaining tuples; just break
526 * out of the loop and return it.
527 */
528 if (zero_penalty)
529 {
530 if (keep_current_best == -1)
531 {
532 /* we didn't make the random choice yet for this old best */
533 keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
534 }
535 if (keep_current_best == 1)
536 break;
537 }
538 }
539
540 return result;
541}
542
543/*
544 * initialize a GiST entry with a decompressed version of key
545 */
546void
547gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
548 Datum k, Relation r, Page pg, OffsetNumber o,
549 bool l, bool isNull)
550{
551 if (!isNull)
552 {
553 GISTENTRY *dep;
554
555 gistentryinit(*e, k, r, pg, o, l);
556
557 /* there may not be a decompress function in opclass */
558 if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
559 return;
560
561 dep = (GISTENTRY *)
563 giststate->supportCollation[nkey],
565 /* decompressFn may just return the given pointer */
566 if (dep != e)
567 gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
568 dep->leafkey);
569 }
570 else
571 gistentryinit(*e, (Datum) 0, r, pg, o, l);
572}
573
576 const Datum *attdata, const bool *isnull, bool isleaf)
577{
578 Datum compatt[INDEX_MAX_KEYS];
580
581 gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
582
583 res = index_form_tuple(isleaf ? giststate->leafTupdesc :
584 giststate->nonLeafTupdesc,
585 compatt, isnull);
586
587 /*
588 * The offset number on tuples on internal pages is unused. For historical
589 * reasons, it is set to 0xffff.
590 */
591 ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
592 return res;
593}
594
595void
597 const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
598{
599 int i;
600
601 /*
602 * Call the compress method on each attribute.
603 */
604 for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
605 {
606 if (isnull[i])
607 compatt[i] = (Datum) 0;
608 else
609 {
610 GISTENTRY centry;
611 GISTENTRY *cep;
612
613 gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
614 isleaf);
615 /* there may not be a compress function in opclass */
616 if (OidIsValid(giststate->compressFn[i].fn_oid))
617 cep = (GISTENTRY *)
619 giststate->supportCollation[i],
620 PointerGetDatum(&centry)));
621 else
622 cep = &centry;
623 compatt[i] = cep->key;
624 }
625 }
626
627 if (isleaf)
628 {
629 /*
630 * Emplace each included attribute if any.
631 */
632 for (; i < r->rd_att->natts; i++)
633 {
634 if (isnull[i])
635 compatt[i] = (Datum) 0;
636 else
637 compatt[i] = attdata[i];
638 }
639 }
640}
641
642/*
643 * initialize a GiST entry with fetched value in key field
644 */
645static Datum
646gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
647{
648 GISTENTRY fentry;
649 GISTENTRY *fep;
650
651 gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
652
653 fep = (GISTENTRY *)
654 DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
655 giststate->supportCollation[nkey],
656 PointerGetDatum(&fentry)));
657
658 /* fetchFn set 'key', return it to the caller */
659 return fep->key;
660}
661
662/*
663 * Fetch all keys in tuple.
664 * Returns a new HeapTuple containing the originally-indexed data.
665 */
668{
669 MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
671 bool isnull[INDEX_MAX_KEYS];
672 int i;
673
674 for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
675 {
676 Datum datum;
677
678 datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
679
680 if (giststate->fetchFn[i].fn_oid != InvalidOid)
681 {
682 if (!isnull[i])
683 fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
684 else
685 fetchatt[i] = (Datum) 0;
686 }
687 else if (giststate->compressFn[i].fn_oid == InvalidOid)
688 {
689 /*
690 * If opclass does not provide compress method that could change
691 * original value, att is necessarily stored in original form.
692 */
693 if (!isnull[i])
694 fetchatt[i] = datum;
695 else
696 fetchatt[i] = (Datum) 0;
697 }
698 else
699 {
700 /*
701 * Index-only scans not supported for this column. Since the
702 * planner chose an index-only scan anyway, it is not interested
703 * in this column, and we can replace it with a NULL.
704 */
705 isnull[i] = true;
706 fetchatt[i] = (Datum) 0;
707 }
708 }
709
710 /*
711 * Get each included attribute.
712 */
713 for (; i < r->rd_att->natts; i++)
714 {
715 fetchatt[i] = index_getattr(tuple, i + 1, giststate->leafTupdesc,
716 &isnull[i]);
717 }
718 MemoryContextSwitchTo(oldcxt);
719
720 return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
721}
722
723float
724gistpenalty(GISTSTATE *giststate, int attno,
725 GISTENTRY *orig, bool isNullOrig,
726 GISTENTRY *add, bool isNullAdd)
727{
728 float penalty = 0.0;
729
730 if (giststate->penaltyFn[attno].fn_strict == false ||
731 (isNullOrig == false && isNullAdd == false))
732 {
733 FunctionCall3Coll(&giststate->penaltyFn[attno],
734 giststate->supportCollation[attno],
735 PointerGetDatum(orig),
736 PointerGetDatum(add),
737 PointerGetDatum(&penalty));
738 /* disallow negative or NaN penalty */
739 if (isnan(penalty) || penalty < 0.0)
740 penalty = 0.0;
741 }
742 else if (isNullOrig && isNullAdd)
743 penalty = 0.0;
744 else
745 {
746 /* try to prevent mixing null and non-null values */
747 penalty = get_float4_infinity();
748 }
749
750 return penalty;
751}
752
753/*
754 * Initialize a new index page
755 */
756void
758{
759 GISTPageOpaque opaque;
760
761 PageInit(page, BLCKSZ, sizeof(GISTPageOpaqueData));
762
763 opaque = GistPageGetOpaque(page);
765 opaque->flags = f;
766 opaque->gist_page_id = GIST_PAGE_ID;
767}
768
769/*
770 * Initialize a new index buffer
771 */
772void
774{
775 Page page;
776
777 page = BufferGetPage(b);
778 gistinitpage(page, f);
779}
780
781/*
782 * Verify that a freshly-read page looks sane.
783 */
784void
786{
787 Page page = BufferGetPage(buf);
788
789 /*
790 * ReadBuffer verifies that every newly-read page passes
791 * PageHeaderIsValid, which means it either contains a reasonably sane
792 * page header or is all-zero. We have to defend against the all-zero
793 * case, however.
794 */
795 if (PageIsNew(page))
797 (errcode(ERRCODE_INDEX_CORRUPTED),
798 errmsg("index \"%s\" contains unexpected zero page at block %u",
801 errhint("Please REINDEX it.")));
802
803 /*
804 * Additionally check that the special area looks sane.
805 */
806 if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
808 (errcode(ERRCODE_INDEX_CORRUPTED),
809 errmsg("index \"%s\" contains corrupted page at block %u",
812 errhint("Please REINDEX it.")));
813}
814
815
816/*
817 * Allocate a new page (either by recycling, or by extending the index file)
818 *
819 * The returned buffer is already pinned and exclusive-locked
820 *
821 * Caller is responsible for initializing the page by calling GISTInitBuffer
822 */
823Buffer
825{
826 Buffer buffer;
827
828 /* First, try to get a page from FSM */
829 for (;;)
830 {
831 BlockNumber blkno = GetFreeIndexPage(r);
832
833 if (blkno == InvalidBlockNumber)
834 break; /* nothing left in FSM */
835
836 buffer = ReadBuffer(r, blkno);
837
838 /*
839 * We have to guard against the possibility that someone else already
840 * recycled this page; the buffer may be locked if so.
841 */
842 if (ConditionalLockBuffer(buffer))
843 {
844 Page page = BufferGetPage(buffer);
845
846 /*
847 * If the page was never initialized, it's OK to use.
848 */
849 if (PageIsNew(page))
850 return buffer;
851
852 gistcheckpage(r, buffer);
853
854 /*
855 * Otherwise, recycle it if deleted, and too old to have any
856 * processes interested in it.
857 */
858 if (gistPageRecyclable(page))
859 {
860 /*
861 * If we are generating WAL for Hot Standby then create a WAL
862 * record that will allow us to conflict with queries running
863 * on standby, in case they have snapshots older than the
864 * page's deleteXid.
865 */
867 gistXLogPageReuse(r, heaprel, blkno, GistPageGetDeleteXid(page));
868
869 return buffer;
870 }
871
872 LockBuffer(buffer, GIST_UNLOCK);
873 }
874
875 /* Can't use it, so release buffer and try again */
876 ReleaseBuffer(buffer);
877 }
878
879 /* Must extend the file */
880 buffer = ExtendBufferedRel(BMR_REL(r), MAIN_FORKNUM, NULL,
882
883 return buffer;
884}
885
886/* Can this page be recycled yet? */
887bool
889{
890 if (PageIsNew(page))
891 return true;
892 if (GistPageIsDeleted(page))
893 {
894 /*
895 * The page was deleted, but when? If it was just deleted, a scan
896 * might have seen the downlink to it, and will read the page later.
897 * As long as that can happen, we must keep the deleted page around as
898 * a tombstone.
899 *
900 * For that check if the deletion XID could still be visible to
901 * anyone. If not, then no scan that's still in progress could have
902 * seen its downlink, and we can recycle it.
903 */
904 FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
905
906 return GlobalVisCheckRemovableFullXid(NULL, deletexid_full);
907 }
908 return false;
909}
910
911bytea *
912gistoptions(Datum reloptions, bool validate)
913{
914 static const relopt_parse_elt tab[] = {
915 {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
916 {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}
917 };
918
919 return (bytea *) build_reloptions(reloptions, validate,
921 sizeof(GiSTOptions),
922 tab, lengthof(tab));
923}
924
925/*
926 * gistproperty() -- Check boolean properties of indexes.
927 *
928 * This is optional for most AMs, but is required for GiST because the core
929 * property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
930 * AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
931 */
932bool
933gistproperty(Oid index_oid, int attno,
934 IndexAMProperty prop, const char *propname,
935 bool *res, bool *isnull)
936{
937 Oid opclass,
938 opfamily,
939 opcintype;
940 int16 procno;
941
942 /* Only answer column-level inquiries */
943 if (attno == 0)
944 return false;
945
946 /*
947 * Currently, GiST distance-ordered scans require that there be a distance
948 * function in the opclass with the default types (i.e. the one loaded
949 * into the relcache entry, see initGISTstate). So we assume that if such
950 * a function exists, then there's a reason for it (rather than grubbing
951 * through all the opfamily's operators to find an ordered one).
952 *
953 * Essentially the same code can test whether we support returning the
954 * column data, since that's true if the opclass provides a fetch proc.
955 */
956
957 switch (prop)
958 {
960 procno = GIST_DISTANCE_PROC;
961 break;
963 procno = GIST_FETCH_PROC;
964 break;
965 default:
966 return false;
967 }
968
969 /* First we need to know the column's opclass. */
970 opclass = get_index_column_opclass(index_oid, attno);
971 if (!OidIsValid(opclass))
972 {
973 *isnull = true;
974 return true;
975 }
976
977 /* Now look up the opclass family and input datatype. */
978 if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
979 {
980 *isnull = true;
981 return true;
982 }
983
984 /* And now we can check whether the function is provided. */
985
986 *res = SearchSysCacheExists4(AMPROCNUM,
987 ObjectIdGetDatum(opfamily),
988 ObjectIdGetDatum(opcintype),
989 ObjectIdGetDatum(opcintype),
990 Int16GetDatum(procno));
991
992 /*
993 * Special case: even without a fetch function, AMPROP_RETURNABLE is true
994 * if the opclass has no compress function.
995 */
996 if (prop == AMPROP_RETURNABLE && !*res)
997 {
998 *res = !SearchSysCacheExists4(AMPROCNUM,
999 ObjectIdGetDatum(opfamily),
1000 ObjectIdGetDatum(opcintype),
1001 ObjectIdGetDatum(opcintype),
1003 }
1004
1005 *isnull = false;
1006
1007 return true;
1008}
1009
1010/*
1011 * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
1012 * splits anyway. This function provides a fake sequence of LSNs for that
1013 * purpose.
1014 */
1017{
1018 if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
1019 {
1020 /*
1021 * Temporary relations are only accessible in our session, so a simple
1022 * backend-local counter will do.
1023 */
1024 static XLogRecPtr counter = FirstNormalUnloggedLSN;
1025
1026 return counter++;
1027 }
1028 else if (RelationIsPermanent(rel))
1029 {
1030 /*
1031 * WAL-logging on this relation will start after commit, so its LSNs
1032 * must be distinct numbers smaller than the LSN at the next commit.
1033 * Emit a dummy WAL record if insert-LSN hasn't advanced after the
1034 * last call.
1035 */
1036 static XLogRecPtr lastlsn = InvalidXLogRecPtr;
1037 XLogRecPtr currlsn = GetXLogInsertRecPtr();
1038
1039 /* Shouldn't be called for WAL-logging relations */
1040 Assert(!RelationNeedsWAL(rel));
1041
1042 /* No need for an actual record if we already have a distinct LSN */
1043 if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
1044 currlsn = gistXLogAssignLSN();
1045
1046 lastlsn = currlsn;
1047 return currlsn;
1048 }
1049 else
1050 {
1051 /*
1052 * Unlogged relations are accessible from other backends, and survive
1053 * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1054 */
1055 Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1056 return GetFakeLSNForUnloggedRel();
1057 }
1058}
1059
1060/*
1061 * Returns the same number that was received.
1062 *
1063 * This is for GiST opclasses that use the RT*StrategyNumber constants.
1064 */
1065Datum
1067{
1069
1070 PG_RETURN_UINT16(strat);
1071}
1072
1073/*
1074 * Returns the opclass's private stratnum used for the given strategy.
1075 *
1076 * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
1077 * and returns the result.
1078 * Returns InvalidStrategy if the function is not defined.
1079 */
1082{
1083 Oid opfamily;
1084 Oid opcintype;
1085 Oid funcid;
1086 Datum result;
1087
1088 /* Look up the opclass family and input datatype. */
1089 if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
1090 return InvalidStrategy;
1091
1092 /* Check whether the function is provided. */
1093 funcid = get_opfamily_proc(opfamily, opcintype, opcintype, GIST_STRATNUM_PROC);
1094 if (!OidIsValid(funcid))
1095 return InvalidStrategy;
1096
1097 /* Ask the translation function */
1098 result = OidFunctionCall1Coll(funcid, InvalidOid, UInt16GetDatum(strat));
1099 return DatumGetUInt16(result);
1100}
IndexAMProperty
Definition: amapi.h:35
@ AMPROP_DISTANCE_ORDERABLE
Definition: amapi.h:42
@ AMPROP_RETURNABLE
Definition: amapi.h:43
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:846
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5184
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:896
static bool PageIsEmpty(Page page)
Definition: bufpage.h:223
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsNew(Page page)
Definition: bufpage.h:233
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
static uint16 PageGetSpecialSize(Page page)
Definition: bufpage.h:316
#define MAXALIGN(LEN)
Definition: c.h:765
#define Assert(condition)
Definition: c.h:812
int16_t int16
Definition: c.h:480
uint32_t uint32
Definition: c.h:485
#define lengthof(array)
Definition: c.h:742
#define OidIsValid(objectId)
Definition: c.h:729
size_t Size
Definition: c.h:559
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
static float4 get_float4_infinity(void)
Definition: float.h:74
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
Datum OidFunctionCall1Coll(Oid functionId, Oid collation, Datum arg1)
Definition: fmgr.c:1411
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1171
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1129
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_UINT16(x)
Definition: fmgr.h:357
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define GIST_STRATNUM_PROC
Definition: gist.h:42
#define GIST_PAGE_ID
Definition: gist.h:111
struct GISTENTRY GISTENTRY
#define GIST_FETCH_PROC
Definition: gist.h:39
#define GIST_COMPRESS_PROC
Definition: gist.h:33
#define GEVHDRSZ
Definition: gist.h:239
#define GistPageIsLeaf(page)
Definition: gist.h:169
#define GIST_DISTANCE_PROC
Definition: gist.h:38
static FullTransactionId GistPageGetDeleteXid(Page page)
Definition: gist.h:215
#define GistPageIsDeleted(page)
Definition: gist.h:172
#define GistPageGetOpaque(page)
Definition: gist.h:167
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
#define GIST_UNLOCK
Definition: gist_private.h:44
#define GiSTPageSize
Definition: gist_private.h:476
void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY *attdata, bool *isnull)
Definition: gistutil.c:296
bytea * gistoptions(Datum reloptions, bool validate)
Definition: gistutil.c:912
Buffer gistNewBuffer(Relation r, Relation heaprel)
Definition: gistutil.c:824
bool gistproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: gistutil.c:933
bool gistPageRecyclable(Page page)
Definition: gistutil.c:888
void gistMakeUnionKey(GISTSTATE *giststate, int attno, GISTENTRY *entry1, bool isnull1, GISTENTRY *entry2, bool isnull2, Datum *dst, bool *dstisnull)
Definition: gistutil.c:233
void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, Datum *attr, bool *isnull)
Definition: gistutil.c:155
bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
Definition: gistutil.c:59
HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
Definition: gistutil.c:667
bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
Definition: gistutil.c:281
static Datum gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
Definition: gistutil.c:646
void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
Definition: gistutil.c:34
IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf)
Definition: gistutil.c:575
IndexTuple * gistextractpage(Page page, int *len)
Definition: gistutil.c:95
void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, bool l, bool isNull)
Definition: gistutil.c:547
bool gistfitpage(IndexTuple *itvec, int len)
Definition: gistutil.c:79
IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
Definition: gistutil.c:316
OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)
Definition: gistutil.c:374
XLogRecPtr gistGetFakeLSN(Relation rel)
Definition: gistutil.c:1016
void gistCompressValues(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
Definition: gistutil.c:596
IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
Definition: gistutil.c:114
Datum gist_stratnum_identity(PG_FUNCTION_ARGS)
Definition: gistutil.c:1066
StrategyNumber GistTranslateStratnum(Oid opclass, StrategyNumber strat)
Definition: gistutil.c:1081
void gistinitpage(Page page, uint32 f)
Definition: gistutil.c:757
IndexTuple gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
Definition: gistutil.c:219
void GISTInitBuffer(Buffer b, uint32 f)
Definition: gistutil.c:773
void gistcheckpage(Relation rel, Buffer buf)
Definition: gistutil.c:785
IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
Definition: gistutil.c:127
float gistpenalty(GISTSTATE *giststate, int attno, GISTENTRY *orig, bool isNullOrig, GISTENTRY *add, bool isNullAdd)
Definition: gistutil.c:724
XLogRecPtr gistXLogAssignLSN(void)
Definition: gistxlog.c:576
void gistXLogPageReuse(Relation rel, Relation heaprel, BlockNumber blkno, FullTransactionId deleteXid)
Definition: gistxlog.c:594
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
#define storage
Definition: indent_codes.h:68
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
Pointer Item
Definition: item.h:17
struct ItemIdData ItemIdData
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
static Datum index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
Definition: itup.h:117
bool get_opclass_opfamily_and_input_type(Oid opclass, Oid *opfamily, Oid *opcintype)
Definition: lsyscache.c:1235
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:796
Oid get_index_column_opclass(Oid index_oid, int attno)
Definition: lsyscache.c:3512
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
#define INDEX_MAX_KEYS
const void size_t len
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
bool pg_prng_bool(pg_prng_state *state)
Definition: pg_prng.c:313
static char * buf
Definition: pg_test_fsync.c:72
static int fillfactor
Definition: pgbench.c:187
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:172
static Datum UInt16GetDatum(uint16 X)
Definition: postgres.h:192
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static uint16 DatumGetUInt16(Datum X)
Definition: postgres.h:182
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4286
MemoryContextSwitchTo(old_ctx)
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:524
#define RelationIsPermanent(relation)
Definition: rel.h:617
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1908
@ RELOPT_KIND_GIST
Definition: reloptions.h:47
@ RELOPT_TYPE_ENUM
Definition: reloptions.h:34
@ RELOPT_TYPE_INT
Definition: reloptions.h:32
@ MAIN_FORKNUM
Definition: relpath.h:58
static pg_noinline void Size size
Definition: slab.c:607
uint16 StrategyNumber
Definition: stratnum.h:22
#define InvalidStrategy
Definition: stratnum.h:24
Oid fn_oid
Definition: fmgr.h:59
bool fn_strict
Definition: fmgr.h:61
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
uint16 gist_page_id
Definition: gist.h:82
BlockNumber rightlink
Definition: gist.h:80
uint16 flags
Definition: gist.h:81
FmgrInfo fetchFn[INDEX_MAX_KEYS]
Definition: gist_private.h:94
TupleDesc leafTupdesc
Definition: gist_private.h:80
TupleDesc fetchTupdesc
Definition: gist_private.h:83
TupleDesc nonLeafTupdesc
Definition: gist_private.h:81
FmgrInfo penaltyFn[INDEX_MAX_KEYS]
Definition: gist_private.h:90
MemoryContext tempCxt
Definition: gist_private.h:78
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gist_private.h:97
FmgrInfo decompressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:89
FmgrInfo compressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:88
FmgrInfo equalFn[INDEX_MAX_KEYS]
Definition: gist_private.h:92
FmgrInfo unionFn[INDEX_MAX_KEYS]
Definition: gist_private.h:87
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER]
Definition: gist.h:236
int32 n
Definition: gist.h:235
ItemPointerData t_tid
Definition: itup.h:37
TupleDesc rd_att
Definition: rel.h:112
Form_pg_class rd_rel
Definition: rel.h:111
Definition: c.h:641
#define SearchSysCacheExists4(cacheId, key1, key2, key3, key4)
Definition: syscache.h:106
#define fetchatt(A, T)
Definition: tupmacs.h:47
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9435
XLogRecPtr GetFakeLSNForUnloggedRel(void)
Definition: xlog.c:4604
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28