PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ginget.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ginget.c
4  * fetch tuples from a GIN scan.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gin/ginget.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/gin_private.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
20 #include "utils/datum.h"
21 #include "utils/memutils.h"
22 
23 /* GUC parameter */
25 
26 typedef struct pendingPosition
27 {
32  bool *hasMatchKey;
34 
35 
36 /*
37  * Goes to the next page if current offset is outside of bounds
38  */
39 static bool
41 {
42  Page page = BufferGetPage(stack->buffer);
43 
44  if (stack->off > PageGetMaxOffsetNumber(page))
45  {
46  /*
47  * We scanned the whole page, so we should take right page
48  */
49  if (GinPageRightMost(page))
50  return false; /* no more pages */
51 
52  stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
53  stack->blkno = BufferGetBlockNumber(stack->buffer);
54  stack->off = FirstOffsetNumber;
55  }
56 
57  return true;
58 }
59 
60 /*
61  * Scan all pages of a posting tree and save all its heap ItemPointers
62  * in scanEntry->matchBitmap
63  */
64 static void
66  BlockNumber rootPostingTree, Snapshot snapshot)
67 {
68  GinBtreeData btree;
69  GinBtreeStack *stack;
70  Buffer buffer;
71  Page page;
72 
73  /* Descend to the leftmost leaf page */
74  stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
75  buffer = stack->buffer;
76  IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
77 
78  freeGinBtreeStack(stack);
79 
80  /*
81  * Loop iterates through all leaf pages of posting tree
82  */
83  for (;;)
84  {
85  page = BufferGetPage(buffer);
86  if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
87  {
88  int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
89 
90  scanEntry->predictNumberResult += n;
91  }
92 
93  if (GinPageRightMost(page))
94  break; /* no more pages */
95 
96  buffer = ginStepRight(buffer, index, GIN_SHARE);
97  }
98 
99  UnlockReleaseBuffer(buffer);
100 }
101 
102 /*
103  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
104  * match the search entry. This supports three different match modes:
105  *
106  * 1. Partial-match support: scan from current point until the
107  * comparePartialFn says we're done.
108  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
109  * key for the current attnum) until we hit null items or end of attnum
110  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
111  * key for the current attnum) until we hit end of attnum
112  *
113  * Returns true if done, false if it's necessary to restart scan from scratch
114  */
115 static bool
117  GinScanEntry scanEntry, Snapshot snapshot)
118 {
119  OffsetNumber attnum;
120  Form_pg_attribute attr;
121 
122  /* Initialize empty bitmap result */
123  scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
124 
125  /* Null query cannot partial-match anything */
126  if (scanEntry->isPartialMatch &&
127  scanEntry->queryCategory != GIN_CAT_NORM_KEY)
128  return true;
129 
130  /* Locate tupdesc entry for key column (for attbyval/attlen data) */
131  attnum = scanEntry->attnum;
132  attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
133 
134  for (;;)
135  {
136  Page page;
137  IndexTuple itup;
138  Datum idatum;
139  GinNullCategory icategory;
140 
141  /*
142  * stack->off points to the interested entry, buffer is already locked
143  */
144  if (moveRightIfItNeeded(btree, stack) == false)
145  return true;
146 
147  page = BufferGetPage(stack->buffer);
148  TestForOldSnapshot(snapshot, btree->index, page);
149  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
150 
151  /*
152  * If tuple stores another attribute then stop scan
153  */
154  if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
155  return true;
156 
157  /* Safe to fetch attribute value */
158  idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
159 
160  /*
161  * Check for appropriate scan stop conditions
162  */
163  if (scanEntry->isPartialMatch)
164  {
165  int32 cmp;
166 
167  /*
168  * In partial match, stop scan at any null (including
169  * placeholders); partial matches never match nulls
170  */
171  if (icategory != GIN_CAT_NORM_KEY)
172  return true;
173 
174  /*----------
175  * Check of partial match.
176  * case cmp == 0 => match
177  * case cmp > 0 => not match and finish scan
178  * case cmp < 0 => not match and continue scan
179  *----------
180  */
181  cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
182  btree->ginstate->supportCollation[attnum - 1],
183  scanEntry->queryKey,
184  idatum,
185  UInt16GetDatum(scanEntry->strategy),
186  PointerGetDatum(scanEntry->extra_data)));
187 
188  if (cmp > 0)
189  return true;
190  else if (cmp < 0)
191  {
192  stack->off++;
193  continue;
194  }
195  }
196  else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
197  {
198  /*
199  * In ALL mode, we are not interested in null items, so we can
200  * stop if we get to a null-item placeholder (which will be the
201  * last entry for a given attnum). We do want to include NULL_KEY
202  * and EMPTY_ITEM entries, though.
203  */
204  if (icategory == GIN_CAT_NULL_ITEM)
205  return true;
206  }
207 
208  /*
209  * OK, we want to return the TIDs listed in this entry.
210  */
211  if (GinIsPostingTree(itup))
212  {
213  BlockNumber rootPostingTree = GinGetPostingTree(itup);
214 
215  /*
216  * We should unlock current page (but not unpin) during tree scan
217  * to prevent deadlock with vacuum processes.
218  *
219  * We save current entry value (idatum) to be able to re-find our
220  * tuple after re-locking
221  */
222  if (icategory == GIN_CAT_NORM_KEY)
223  idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
224 
225  LockBuffer(stack->buffer, GIN_UNLOCK);
226 
227  /* Collect all the TIDs in this entry's posting tree */
228  scanPostingTree(btree->index, scanEntry, rootPostingTree,
229  snapshot);
230 
231  /*
232  * We lock again the entry page and while it was unlocked insert
233  * might have occurred, so we need to re-find our position.
234  */
235  LockBuffer(stack->buffer, GIN_SHARE);
236  page = BufferGetPage(stack->buffer);
237  if (!GinPageIsLeaf(page))
238  {
239  /*
240  * Root page becomes non-leaf while we unlock it. We will
241  * start again, this situation doesn't occur often - root can
242  * became a non-leaf only once per life of index.
243  */
244  return false;
245  }
246 
247  /* Search forward to re-find idatum */
248  for (;;)
249  {
250  Datum newDatum;
251  GinNullCategory newCategory;
252 
253  if (moveRightIfItNeeded(btree, stack) == false)
254  elog(ERROR, "lost saved point in index"); /* must not happen !!! */
255 
256  page = BufferGetPage(stack->buffer);
257  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
258 
259  if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
260  elog(ERROR, "lost saved point in index"); /* must not happen !!! */
261  newDatum = gintuple_get_key(btree->ginstate, itup,
262  &newCategory);
263 
264  if (ginCompareEntries(btree->ginstate, attnum,
265  newDatum, newCategory,
266  idatum, icategory) == 0)
267  break; /* Found! */
268 
269  stack->off++;
270  }
271 
272  if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
273  pfree(DatumGetPointer(idatum));
274  }
275  else
276  {
277  ItemPointer ipd;
278  int nipd;
279 
280  ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
281  tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
282  scanEntry->predictNumberResult += GinGetNPosting(itup);
283  pfree(ipd);
284  }
285 
286  /*
287  * Done with this entry, go to the next
288  */
289  stack->off++;
290  }
291 }
292 
293 /*
294  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
295  */
296 static void
297 startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
298 {
299  GinBtreeData btreeEntry;
300  GinBtreeStack *stackEntry;
301  Page page;
302  bool needUnlock;
303 
304 restartScanEntry:
305  entry->buffer = InvalidBuffer;
306  ItemPointerSetMin(&entry->curItem);
307  entry->offset = InvalidOffsetNumber;
308  if (entry->list)
309  pfree(entry->list);
310  entry->list = NULL;
311  entry->nlist = 0;
312  entry->matchBitmap = NULL;
313  entry->matchResult = NULL;
314  entry->reduceResult = FALSE;
315  entry->predictNumberResult = 0;
316 
317  /*
318  * we should find entry, and begin scan of posting tree or just store
319  * posting list in memory
320  */
321  ginPrepareEntryScan(&btreeEntry, entry->attnum,
322  entry->queryKey, entry->queryCategory,
323  ginstate);
324  stackEntry = ginFindLeafPage(&btreeEntry, true, snapshot);
325  page = BufferGetPage(stackEntry->buffer);
326  /* ginFindLeafPage() will have already checked snapshot age. */
327  needUnlock = TRUE;
328 
329  entry->isFinished = TRUE;
330 
331  if (entry->isPartialMatch ||
333  {
334  /*
335  * btreeEntry.findItem locates the first item >= given search key.
336  * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
337  * because of the way the GIN_CAT_EMPTY_QUERY category code is
338  * assigned.) We scan forward from there and collect all TIDs needed
339  * for the entry type.
340  */
341  btreeEntry.findItem(&btreeEntry, stackEntry);
342  if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
343  == false)
344  {
345  /*
346  * GIN tree was seriously restructured, so we will cleanup all
347  * found data and rescan. See comments near 'return false' in
348  * collectMatchBitmap()
349  */
350  if (entry->matchBitmap)
351  {
352  if (entry->matchIterator)
354  entry->matchIterator = NULL;
355  tbm_free(entry->matchBitmap);
356  entry->matchBitmap = NULL;
357  }
358  LockBuffer(stackEntry->buffer, GIN_UNLOCK);
359  freeGinBtreeStack(stackEntry);
360  goto restartScanEntry;
361  }
362 
363  if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
364  {
366  entry->isFinished = FALSE;
367  }
368  }
369  else if (btreeEntry.findItem(&btreeEntry, stackEntry))
370  {
371  IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
372 
373  if (GinIsPostingTree(itup))
374  {
375  BlockNumber rootPostingTree = GinGetPostingTree(itup);
376  GinBtreeStack *stack;
377  Page page;
378  ItemPointerData minItem;
379 
380  /*
381  * We should unlock entry page before touching posting tree to
382  * prevent deadlocks with vacuum processes. Because entry is never
383  * deleted from page and posting tree is never reduced to the
384  * posting list, we can unlock page after getting BlockNumber of
385  * root of posting tree.
386  */
387  LockBuffer(stackEntry->buffer, GIN_UNLOCK);
388  needUnlock = FALSE;
389 
390  stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
391  rootPostingTree, snapshot);
392  entry->buffer = stack->buffer;
393 
394  /*
395  * We keep buffer pinned because we need to prevent deletion of
396  * page during scan. See GIN's vacuum implementation. RefCount is
397  * increased to keep buffer pinned after freeGinBtreeStack() call.
398  */
399  IncrBufferRefCount(entry->buffer);
400 
401  page = BufferGetPage(entry->buffer);
402 
403  /*
404  * Load the first page into memory.
405  */
406  ItemPointerSetMin(&minItem);
407  entry->list = GinDataLeafPageGetItems(page, &entry->nlist, minItem);
408 
409  entry->predictNumberResult = stack->predictNumber * entry->nlist;
410 
411  LockBuffer(entry->buffer, GIN_UNLOCK);
412  freeGinBtreeStack(stack);
413  entry->isFinished = FALSE;
414  }
415  else if (GinGetNPosting(itup) > 0)
416  {
417  entry->list = ginReadTuple(ginstate, entry->attnum, itup,
418  &entry->nlist);
419  entry->predictNumberResult = entry->nlist;
420 
421  entry->isFinished = FALSE;
422  }
423  }
424 
425  if (needUnlock)
426  LockBuffer(stackEntry->buffer, GIN_UNLOCK);
427  freeGinBtreeStack(stackEntry);
428 }
429 
430 /*
431  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
432  * least frequent items first.
433  */
434 static int
435 entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
436 {
437  const GinScanKey key = (const GinScanKey) arg;
438  int i1 = *(const int *) a1;
439  int i2 = *(const int *) a2;
440  uint32 n1 = key->scanEntry[i1]->predictNumberResult;
441  uint32 n2 = key->scanEntry[i2]->predictNumberResult;
442 
443  if (n1 < n2)
444  return -1;
445  else if (n1 == n2)
446  return 0;
447  else
448  return 1;
449 }
450 
451 static void
453 {
455  int i;
456  int j;
457  int *entryIndexes;
458 
459  ItemPointerSetMin(&key->curItem);
460  key->curItemMatches = false;
461  key->recheckCurItem = false;
462  key->isFinished = false;
463 
464  /*
465  * Divide the entries into two distinct sets: required and additional.
466  * Additional entries are not enough for a match alone, without any items
467  * from the required set, but are needed by the consistent function to
468  * decide if an item matches. When scanning, we can skip over items from
469  * additional entries that have no corresponding matches in any of the
470  * required entries. That speeds up queries like "frequent & rare"
471  * considerably, if the frequent term can be put in the additional set.
472  *
473  * There can be many legal ways to divide them entries into these two
474  * sets. A conservative division is to just put everything in the required
475  * set, but the more you can put in the additional set, the more you can
476  * skip during the scan. To maximize skipping, we try to put as many
477  * frequent items as possible into additional, and less frequent ones into
478  * required. To do that, sort the entries by frequency
479  * (predictNumberResult), and put entries into the required set in that
480  * order, until the consistent function says that none of the remaining
481  * entries can form a match, without any items from the required set. The
482  * rest go to the additional set.
483  */
484  if (key->nentries > 1)
485  {
487 
488  entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
489  for (i = 0; i < key->nentries; i++)
490  entryIndexes[i] = i;
491  qsort_arg(entryIndexes, key->nentries, sizeof(int),
493 
494  for (i = 0; i < key->nentries - 1; i++)
495  {
496  /* Pass all entries <= i as FALSE, and the rest as MAYBE */
497  for (j = 0; j <= i; j++)
498  key->entryRes[entryIndexes[j]] = GIN_FALSE;
499  for (j = i + 1; j < key->nentries; j++)
500  key->entryRes[entryIndexes[j]] = GIN_MAYBE;
501 
502  if (key->triConsistentFn(key) == GIN_FALSE)
503  break;
504  }
505  /* i is now the last required entry. */
506 
508 
509  key->nrequired = i + 1;
510  key->nadditional = key->nentries - key->nrequired;
511  key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
512  key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
513 
514  j = 0;
515  for (i = 0; i < key->nrequired; i++)
516  key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
517  for (i = 0; i < key->nadditional; i++)
518  key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
519 
520  /* clean up after consistentFn calls (also frees entryIndexes) */
522  }
523  else
524  {
526 
527  key->nrequired = 1;
528  key->nadditional = 0;
529  key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
530  key->requiredEntries[0] = key->scanEntry[0];
531  }
532  MemoryContextSwitchTo(oldCtx);
533 }
534 
535 static void
537 {
538  GinScanOpaque so = (GinScanOpaque) scan->opaque;
539  GinState *ginstate = &so->ginstate;
540  uint32 i;
541 
542  for (i = 0; i < so->totalentries; i++)
543  startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
544 
545  if (GinFuzzySearchLimit > 0)
546  {
547  /*
548  * If all of keys more than threshold we will try to reduce result, we
549  * hope (and only hope, for intersection operation of array our
550  * supposition isn't true), that total result will not more than
551  * minimal predictNumberResult.
552  */
553  bool reduce = true;
554 
555  for (i = 0; i < so->totalentries; i++)
556  {
558  {
559  reduce = false;
560  break;
561  }
562  }
563  if (reduce)
564  {
565  for (i = 0; i < so->totalentries; i++)
566  {
568  so->entries[i]->reduceResult = TRUE;
569  }
570  }
571  }
572 
573  /*
574  * Now that we have the estimates for the entry frequencies, finish
575  * initializing the scan keys.
576  */
577  for (i = 0; i < so->nkeys; i++)
578  startScanKey(ginstate, so, so->keys + i);
579 }
580 
581 /*
582  * Load the next batch of item pointers from a posting tree.
583  *
584  * Note that we copy the page into GinScanEntry->list array and unlock it, but
585  * keep it pinned to prevent interference with vacuum.
586  */
587 static void
589  ItemPointerData advancePast, Snapshot snapshot)
590 {
591  Page page;
592  int i;
593  bool stepright;
594 
595  if (!BufferIsValid(entry->buffer))
596  {
597  entry->isFinished = true;
598  return;
599  }
600 
601  /*
602  * We have two strategies for finding the correct page: step right from
603  * the current page, or descend the tree again from the root. If
604  * advancePast equals the current item, the next matching item should be
605  * on the next page, so we step right. Otherwise, descend from root.
606  */
607  if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
608  {
609  stepright = true;
610  LockBuffer(entry->buffer, GIN_SHARE);
611  }
612  else
613  {
614  GinBtreeStack *stack;
615 
616  ReleaseBuffer(entry->buffer);
617 
618  /*
619  * Set the search key, and find the correct leaf page.
620  */
621  if (ItemPointerIsLossyPage(&advancePast))
622  {
623  ItemPointerSet(&entry->btree.itemptr,
624  GinItemPointerGetBlockNumber(&advancePast) + 1,
626  }
627  else
628  {
629  ItemPointerSet(&entry->btree.itemptr,
630  GinItemPointerGetBlockNumber(&advancePast),
632  }
633  entry->btree.fullScan = false;
634  stack = ginFindLeafPage(&entry->btree, true, snapshot);
635 
636  /* we don't need the stack, just the buffer. */
637  entry->buffer = stack->buffer;
638  IncrBufferRefCount(entry->buffer);
639  freeGinBtreeStack(stack);
640  stepright = false;
641  }
642 
643  elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
644  GinItemPointerGetBlockNumber(&advancePast),
645  GinItemPointerGetOffsetNumber(&advancePast),
646  !stepright);
647 
648  page = BufferGetPage(entry->buffer);
649  for (;;)
650  {
651  entry->offset = InvalidOffsetNumber;
652  if (entry->list)
653  {
654  pfree(entry->list);
655  entry->list = NULL;
656  entry->nlist = 0;
657  }
658 
659  if (stepright)
660  {
661  /*
662  * We've processed all the entries on this page. If it was the
663  * last page in the tree, we're done.
664  */
665  if (GinPageRightMost(page))
666  {
667  UnlockReleaseBuffer(entry->buffer);
668  entry->buffer = InvalidBuffer;
669  entry->isFinished = TRUE;
670  return;
671  }
672 
673  /*
674  * Step to next page, following the right link. then find the
675  * first ItemPointer greater than advancePast.
676  */
677  entry->buffer = ginStepRight(entry->buffer,
678  ginstate->index,
679  GIN_SHARE);
680  page = BufferGetPage(entry->buffer);
681  }
682  stepright = true;
683 
684  if (GinPageGetOpaque(page)->flags & GIN_DELETED)
685  continue; /* page was deleted by concurrent vacuum */
686 
687  /*
688  * The first item > advancePast might not be on this page, but
689  * somewhere to the right, if the page was split, or a non-match from
690  * another key in the query allowed us to skip some items from this
691  * entry. Keep following the right-links until we re-find the correct
692  * page.
693  */
694  if (!GinPageRightMost(page) &&
695  ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
696  {
697  /*
698  * the item we're looking is > the right bound of the page, so it
699  * can't be on this page.
700  */
701  continue;
702  }
703 
704  entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
705 
706  for (i = 0; i < entry->nlist; i++)
707  {
708  if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
709  {
710  entry->offset = i;
711 
712  if (GinPageRightMost(page))
713  {
714  /* after processing the copied items, we're done. */
715  UnlockReleaseBuffer(entry->buffer);
716  entry->buffer = InvalidBuffer;
717  }
718  else
719  LockBuffer(entry->buffer, GIN_UNLOCK);
720  return;
721  }
722  }
723  }
724 }
725 
726 #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
727 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
728 
729 /*
730  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
731  * of one scan key, or sets entry->isFinished to TRUE if there are no more.
732  *
733  * Item pointers are returned in ascending order.
734  *
735  * Note: this can return a "lossy page" item pointer, indicating that the
736  * entry potentially matches all items on that heap page. However, it is
737  * not allowed to return both a lossy page pointer and exact (regular)
738  * item pointers for the same page. (Doing so would break the key-combination
739  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
740  * current implementation this is guaranteed by the behavior of tidbitmaps.
741  */
742 static void
744  ItemPointerData advancePast, Snapshot snapshot)
745 {
746  Assert(!entry->isFinished);
747 
748  Assert(!ItemPointerIsValid(&entry->curItem) ||
749  ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
750 
751  if (entry->matchBitmap)
752  {
753  /* A bitmap result */
754  BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
755  OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
756  bool gotitem = false;
757 
758  do
759  {
760  /*
761  * If we've exhausted all items on this block, move to next block
762  * in the bitmap.
763  */
764  while (entry->matchResult == NULL ||
765  (entry->matchResult->ntuples >= 0 &&
766  entry->offset >= entry->matchResult->ntuples) ||
767  entry->matchResult->blockno < advancePastBlk ||
768  (ItemPointerIsLossyPage(&advancePast) &&
769  entry->matchResult->blockno == advancePastBlk))
770  {
771  entry->matchResult = tbm_iterate(entry->matchIterator);
772 
773  if (entry->matchResult == NULL)
774  {
777  entry->matchIterator = NULL;
778  entry->isFinished = TRUE;
779  break;
780  }
781 
782  /*
783  * Reset counter to the beginning of entry->matchResult. Note:
784  * entry->offset is still greater than matchResult->ntuples if
785  * matchResult is lossy. So, on next call we will get next
786  * result from TIDBitmap.
787  */
788  entry->offset = 0;
789  }
790  if (entry->isFinished)
791  break;
792 
793  /*
794  * We're now on the first page after advancePast which has any
795  * items on it. If it's a lossy result, return that.
796  */
797  if (entry->matchResult->ntuples < 0)
798  {
800  entry->matchResult->blockno);
801 
802  /*
803  * We might as well fall out of the loop; we could not
804  * estimate number of results on this page to support correct
805  * reducing of result even if it's enabled.
806  */
807  gotitem = true;
808  break;
809  }
810 
811  /*
812  * Not a lossy page. Skip over any offsets <= advancePast, and
813  * return that.
814  */
815  if (entry->matchResult->blockno == advancePastBlk)
816  {
817  /*
818  * First, do a quick check against the last offset on the
819  * page. If that's > advancePast, so are all the other
820  * offsets.
821  */
822  if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
823  {
824  entry->offset = entry->matchResult->ntuples;
825  continue;
826  }
827 
828  /* Otherwise scan to find the first item > advancePast */
829  while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
830  entry->offset++;
831  }
832 
833  ItemPointerSet(&entry->curItem,
834  entry->matchResult->blockno,
835  entry->matchResult->offsets[entry->offset]);
836  entry->offset++;
837  gotitem = true;
838  } while (!gotitem || (entry->reduceResult == TRUE && dropItem(entry)));
839  }
840  else if (!BufferIsValid(entry->buffer))
841  {
842  /*
843  * A posting list from an entry tuple, or the last page of a posting
844  * tree.
845  */
846  do
847  {
848  if (entry->offset >= entry->nlist)
849  {
851  entry->isFinished = TRUE;
852  break;
853  }
854 
855  entry->curItem = entry->list[entry->offset++];
856  } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
857  /* XXX: shouldn't we apply the fuzzy search limit here? */
858  }
859  else
860  {
861  /* A posting tree */
862  do
863  {
864  /* If we've processed the current batch, load more items */
865  while (entry->offset >= entry->nlist)
866  {
867  entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
868 
869  if (entry->isFinished)
870  {
872  return;
873  }
874  }
875 
876  entry->curItem = entry->list[entry->offset++];
877 
878  } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 ||
879  (entry->reduceResult == TRUE && dropItem(entry)));
880  }
881 }
882 
883 /*
884  * Identify the "current" item among the input entry streams for this scan key
885  * that is greater than advancePast, and test whether it passes the scan key
886  * qual condition.
887  *
888  * The current item is the smallest curItem among the inputs. key->curItem
889  * is set to that value. key->curItemMatches is set to indicate whether that
890  * TID passes the consistentFn test. If so, key->recheckCurItem is set true
891  * iff recheck is needed for this item pointer (including the case where the
892  * item pointer is a lossy page pointer).
893  *
894  * If all entry streams are exhausted, sets key->isFinished to TRUE.
895  *
896  * Item pointers must be returned in ascending order.
897  *
898  * Note: this can return a "lossy page" item pointer, indicating that the
899  * key potentially matches all items on that heap page. However, it is
900  * not allowed to return both a lossy page pointer and exact (regular)
901  * item pointers for the same page. (Doing so would break the key-combination
902  * logic in scanGetItem.)
903  */
904 static void
905 keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
906  ItemPointerData advancePast, Snapshot snapshot)
907 {
908  ItemPointerData minItem;
909  ItemPointerData curPageLossy;
910  uint32 i;
911  bool haveLossyEntry;
912  GinScanEntry entry;
913  GinTernaryValue res;
914  MemoryContext oldCtx;
915  bool allFinished;
916 
917  Assert(!key->isFinished);
918 
919  /*
920  * We might have already tested this item; if so, no need to repeat work.
921  * (Note: the ">" case can happen, if advancePast is exact but we
922  * previously had to set curItem to a lossy-page pointer.)
923  */
924  if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
925  return;
926 
927  /*
928  * Find the minimum item > advancePast among the active entry streams.
929  *
930  * Note: a lossy-page entry is encoded by a ItemPointer with max value for
931  * offset (0xffff), so that it will sort after any exact entries for the
932  * same page. So we'll prefer to return exact pointers not lossy
933  * pointers, which is good.
934  */
935  ItemPointerSetMax(&minItem);
936  allFinished = true;
937  for (i = 0; i < key->nrequired; i++)
938  {
939  entry = key->requiredEntries[i];
940 
941  if (entry->isFinished)
942  continue;
943 
944  /*
945  * Advance this stream if necessary.
946  *
947  * In particular, since entry->curItem was initialized with
948  * ItemPointerSetMin, this ensures we fetch the first item for each
949  * entry on the first call.
950  */
951  if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
952  {
953  entryGetItem(ginstate, entry, advancePast, snapshot);
954  if (entry->isFinished)
955  continue;
956  }
957 
958  allFinished = false;
959  if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
960  minItem = entry->curItem;
961  }
962 
963  if (allFinished)
964  {
965  /* all entries are finished */
966  key->isFinished = TRUE;
967  return;
968  }
969 
970  /*
971  * Ok, we now know that there are no matches < minItem.
972  *
973  * If minItem is lossy, it means that there were no exact items on the
974  * page among requiredEntries, because lossy pointers sort after exact
975  * items. However, there might be exact items for the same page among
976  * additionalEntries, so we mustn't advance past them.
977  */
978  if (ItemPointerIsLossyPage(&minItem))
979  {
980  if (GinItemPointerGetBlockNumber(&advancePast) <
982  {
983  ItemPointerSet(&advancePast,
986  }
987  }
988  else
989  {
990  Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
991  ItemPointerSet(&advancePast,
994  }
995 
996  /*
997  * We might not have loaded all the entry streams for this TID yet. We
998  * could call the consistent function, passing MAYBE for those entries, to
999  * see if it can decide if this TID matches based on the information we
1000  * have. But if the consistent-function is expensive, and cannot in fact
1001  * decide with partial information, that could be a big loss. So, load all
1002  * the additional entries, before calling the consistent function.
1003  */
1004  for (i = 0; i < key->nadditional; i++)
1005  {
1006  entry = key->additionalEntries[i];
1007 
1008  if (entry->isFinished)
1009  continue;
1010 
1011  if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1012  {
1013  entryGetItem(ginstate, entry, advancePast, snapshot);
1014  if (entry->isFinished)
1015  continue;
1016  }
1017 
1018  /*
1019  * Normally, none of the items in additionalEntries can have a curItem
1020  * larger than minItem. But if minItem is a lossy page, then there
1021  * might be exact items on the same page among additionalEntries.
1022  */
1023  if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1024  {
1025  Assert(ItemPointerIsLossyPage(&minItem));
1026  minItem = entry->curItem;
1027  }
1028  }
1029 
1030  /*
1031  * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1032  * and perform consistentFn test.
1033  *
1034  * Lossy-page entries pose a problem, since we don't know the correct
1035  * entryRes state to pass to the consistentFn, and we also don't know what
1036  * its combining logic will be (could be AND, OR, or even NOT). If the
1037  * logic is OR then the consistentFn might succeed for all items in the
1038  * lossy page even when none of the other entries match.
1039  *
1040  * Our strategy is to call the tri-state consistent function, with the
1041  * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1042  * returns FALSE, none of the lossy items alone are enough for a match, so
1043  * we don't need to return a lossy-page pointer. Otherwise, return a
1044  * lossy-page pointer to indicate that the whole heap page must be
1045  * checked. (On subsequent calls, we'll do nothing until minItem is past
1046  * the page altogether, thus ensuring that we never return both regular
1047  * and lossy pointers for the same page.)
1048  *
1049  * An exception is that it doesn't matter what we pass for lossy pointers
1050  * in "hidden" entries, because the consistentFn's result can't depend on
1051  * them. We could pass them as MAYBE as well, but if we're using the
1052  * "shim" implementation of a tri-state consistent function (see
1053  * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1054  * them as TRUE.
1055  *
1056  * Note that only lossy-page entries pointing to the current item's page
1057  * should trigger this processing; we might have future lossy pages in the
1058  * entry array, but they aren't relevant yet.
1059  */
1060  key->curItem = minItem;
1061  ItemPointerSetLossyPage(&curPageLossy,
1063  haveLossyEntry = false;
1064  for (i = 0; i < key->nentries; i++)
1065  {
1066  entry = key->scanEntry[i];
1067  if (entry->isFinished == FALSE &&
1068  ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1069  {
1070  if (i < key->nuserentries)
1071  key->entryRes[i] = GIN_MAYBE;
1072  else
1073  key->entryRes[i] = GIN_TRUE;
1074  haveLossyEntry = true;
1075  }
1076  else
1077  key->entryRes[i] = GIN_FALSE;
1078  }
1079 
1080  /* prepare for calling consistentFn in temp context */
1081  oldCtx = MemoryContextSwitchTo(tempCtx);
1082 
1083  if (haveLossyEntry)
1084  {
1085  /* Have lossy-page entries, so see if whole page matches */
1086  res = key->triConsistentFn(key);
1087 
1088  if (res == GIN_TRUE || res == GIN_MAYBE)
1089  {
1090  /* Yes, so clean up ... */
1091  MemoryContextSwitchTo(oldCtx);
1092  MemoryContextReset(tempCtx);
1093 
1094  /* and return lossy pointer for whole page */
1095  key->curItem = curPageLossy;
1096  key->curItemMatches = true;
1097  key->recheckCurItem = true;
1098  return;
1099  }
1100  }
1101 
1102  /*
1103  * At this point we know that we don't need to return a lossy whole-page
1104  * pointer, but we might have matches for individual exact item pointers,
1105  * possibly in combination with a lossy pointer. Pass lossy pointers as
1106  * MAYBE to the ternary consistent function, to let it decide if this
1107  * tuple satisfies the overall key, even though we don't know if the lossy
1108  * entries match.
1109  *
1110  * Prepare entryRes array to be passed to consistentFn.
1111  */
1112  for (i = 0; i < key->nentries; i++)
1113  {
1114  entry = key->scanEntry[i];
1115  if (entry->isFinished)
1116  key->entryRes[i] = GIN_FALSE;
1117 #if 0
1118 
1119  /*
1120  * This case can't currently happen, because we loaded all the entries
1121  * for this item earlier.
1122  */
1123  else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1124  key->entryRes[i] = GIN_MAYBE;
1125 #endif
1126  else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1127  key->entryRes[i] = GIN_MAYBE;
1128  else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1129  key->entryRes[i] = GIN_TRUE;
1130  else
1131  key->entryRes[i] = GIN_FALSE;
1132  }
1133 
1134  res = key->triConsistentFn(key);
1135 
1136  switch (res)
1137  {
1138  case GIN_TRUE:
1139  key->curItemMatches = true;
1140  /* triConsistentFn set recheckCurItem */
1141  break;
1142 
1143  case GIN_FALSE:
1144  key->curItemMatches = false;
1145  break;
1146 
1147  case GIN_MAYBE:
1148  key->curItemMatches = true;
1149  key->recheckCurItem = true;
1150  break;
1151 
1152  default:
1153 
1154  /*
1155  * the 'default' case shouldn't happen, but if the consistent
1156  * function returns something bogus, this is the safe result
1157  */
1158  key->curItemMatches = true;
1159  key->recheckCurItem = true;
1160  break;
1161  }
1162 
1163  /*
1164  * We have a tuple, and we know if it matches or not. If it's a non-match,
1165  * we could continue to find the next matching tuple, but let's break out
1166  * and give scanGetItem a chance to advance the other keys. They might be
1167  * able to skip past to a much higher TID, allowing us to save work.
1168  */
1169 
1170  /* clean up after consistentFn calls */
1171  MemoryContextSwitchTo(oldCtx);
1172  MemoryContextReset(tempCtx);
1173 }
1174 
1175 /*
1176  * Get next heap item pointer (after advancePast) from scan.
1177  * Returns true if anything found.
1178  * On success, *item and *recheck are set.
1179  *
1180  * Note: this is very nearly the same logic as in keyGetItem(), except
1181  * that we know the keys are to be combined with AND logic, whereas in
1182  * keyGetItem() the combination logic is known only to the consistentFn.
1183  */
1184 static bool
1186  ItemPointerData *item, bool *recheck)
1187 {
1188  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1189  uint32 i;
1190  bool match;
1191 
1192  /*----------
1193  * Advance the scan keys in lock-step, until we find an item that matches
1194  * all the keys. If any key reports isFinished, meaning its subset of the
1195  * entries is exhausted, we can stop. Otherwise, set *item to the next
1196  * matching item.
1197  *
1198  * This logic works only if a keyGetItem stream can never contain both
1199  * exact and lossy pointers for the same page. Else we could have a
1200  * case like
1201  *
1202  * stream 1 stream 2
1203  * ... ...
1204  * 42/6 42/7
1205  * 50/1 42/0xffff
1206  * ... ...
1207  *
1208  * We would conclude that 42/6 is not a match and advance stream 1,
1209  * thus never detecting the match to the lossy pointer in stream 2.
1210  * (keyGetItem has a similar problem versus entryGetItem.)
1211  *----------
1212  */
1213  do
1214  {
1215  ItemPointerSetMin(item);
1216  match = true;
1217  for (i = 0; i < so->nkeys && match; i++)
1218  {
1219  GinScanKey key = so->keys + i;
1220 
1221  /* Fetch the next item for this key that is > advancePast. */
1222  keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
1223  scan->xs_snapshot);
1224 
1225  if (key->isFinished)
1226  return false;
1227 
1228  /*
1229  * If it's not a match, we can immediately conclude that nothing
1230  * <= this item matches, without checking the rest of the keys.
1231  */
1232  if (!key->curItemMatches)
1233  {
1234  advancePast = key->curItem;
1235  match = false;
1236  break;
1237  }
1238 
1239  /*
1240  * It's a match. We can conclude that nothing < matches, so the
1241  * other key streams can skip to this item.
1242  *
1243  * Beware of lossy pointers, though; from a lossy pointer, we can
1244  * only conclude that nothing smaller than this *block* matches.
1245  */
1246  if (ItemPointerIsLossyPage(&key->curItem))
1247  {
1248  if (GinItemPointerGetBlockNumber(&advancePast) <
1250  {
1251  ItemPointerSet(&advancePast,
1254  }
1255  }
1256  else
1257  {
1259  ItemPointerSet(&advancePast,
1262  }
1263 
1264  /*
1265  * If this is the first key, remember this location as a potential
1266  * match, and proceed to check the rest of the keys.
1267  *
1268  * Otherwise, check if this is the same item that we checked the
1269  * previous keys for (or a lossy pointer for the same page). If
1270  * not, loop back to check the previous keys for this item (we
1271  * will check this key again too, but keyGetItem returns quickly
1272  * for that)
1273  */
1274  if (i == 0)
1275  {
1276  *item = key->curItem;
1277  }
1278  else
1279  {
1280  if (ItemPointerIsLossyPage(&key->curItem) ||
1281  ItemPointerIsLossyPage(item))
1282  {
1284  match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1286  }
1287  else
1288  {
1289  Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1290  match = (ginCompareItemPointers(&key->curItem, item) == 0);
1291  }
1292  }
1293  }
1294  } while (!match);
1295 
1296  Assert(!ItemPointerIsMin(item));
1297 
1298  /*
1299  * Now *item contains the first ItemPointer after previous result that
1300  * satisfied all the keys for that exact TID, or a lossy reference to the
1301  * same page.
1302  *
1303  * We must return recheck = true if any of the keys are marked recheck.
1304  */
1305  *recheck = false;
1306  for (i = 0; i < so->nkeys; i++)
1307  {
1308  GinScanKey key = so->keys + i;
1309 
1310  if (key->recheckCurItem)
1311  {
1312  *recheck = true;
1313  break;
1314  }
1315  }
1316 
1317  return TRUE;
1318 }
1319 
1320 
1321 /*
1322  * Functions for scanning the pending list
1323  */
1324 
1325 
1326 /*
1327  * Get ItemPointer of next heap row to be checked from pending list.
1328  * Returns false if there are no more. On pages with several heap rows
1329  * it returns each row separately, on page with part of heap row returns
1330  * per page data. pos->firstOffset and pos->lastOffset are set to identify
1331  * the range of pending-list tuples belonging to this heap row.
1332  *
1333  * The pendingBuffer is presumed pinned and share-locked on entry, and is
1334  * pinned and share-locked on success exit. On failure exit it's released.
1335  */
1336 static bool
1338 {
1339  OffsetNumber maxoff;
1340  Page page;
1341  IndexTuple itup;
1342 
1343  ItemPointerSetInvalid(&pos->item);
1344  for (;;)
1345  {
1346  page = BufferGetPage(pos->pendingBuffer);
1347  TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1348 
1349  maxoff = PageGetMaxOffsetNumber(page);
1350  if (pos->firstOffset > maxoff)
1351  {
1352  BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1353 
1354  if (blkno == InvalidBlockNumber)
1355  {
1358 
1359  return false;
1360  }
1361  else
1362  {
1363  /*
1364  * Here we must prevent deletion of next page by insertcleanup
1365  * process, which may be trying to obtain exclusive lock on
1366  * current page. So, we lock next page before releasing the
1367  * current one
1368  */
1369  Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1370 
1371  LockBuffer(tmpbuf, GIN_SHARE);
1373 
1374  pos->pendingBuffer = tmpbuf;
1376  }
1377  }
1378  else
1379  {
1380  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1381  pos->item = itup->t_tid;
1382  if (GinPageHasFullRow(page))
1383  {
1384  /*
1385  * find itempointer to the next row
1386  */
1387  for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1388  {
1389  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1390  if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1391  break;
1392  }
1393  }
1394  else
1395  {
1396  /*
1397  * All itempointers are the same on this page
1398  */
1399  pos->lastOffset = maxoff + 1;
1400  }
1401 
1402  /*
1403  * Now pos->firstOffset points to the first tuple of current heap
1404  * row, pos->lastOffset points to the first tuple of next heap row
1405  * (or to the end of page)
1406  */
1407  break;
1408  }
1409  }
1410 
1411  return true;
1412 }
1413 
1414 /*
1415  * Scan pending-list page from current tuple (off) up till the first of:
1416  * - match is found (then returns true)
1417  * - no later match is possible
1418  * - tuple's attribute number is not equal to entry's attrnum
1419  * - reach end of page
1420  *
1421  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1422  * of gintuple_get_key() on the current page.
1423  */
1424 static bool
1426  OffsetNumber off, OffsetNumber maxoff,
1427  GinScanEntry entry,
1428  Datum *datum, GinNullCategory *category,
1429  bool *datumExtracted)
1430 {
1431  IndexTuple itup;
1432  int32 cmp;
1433 
1434  /* Partial match to a null is not possible */
1435  if (entry->queryCategory != GIN_CAT_NORM_KEY)
1436  return false;
1437 
1438  while (off < maxoff)
1439  {
1440  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1441 
1442  if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1443  return false;
1444 
1445  if (datumExtracted[off - 1] == false)
1446  {
1447  datum[off - 1] = gintuple_get_key(ginstate, itup,
1448  &category[off - 1]);
1449  datumExtracted[off - 1] = true;
1450  }
1451 
1452  /* Once we hit nulls, no further match is possible */
1453  if (category[off - 1] != GIN_CAT_NORM_KEY)
1454  return false;
1455 
1456  /*----------
1457  * Check partial match.
1458  * case cmp == 0 => match
1459  * case cmp > 0 => not match and end scan (no later match possible)
1460  * case cmp < 0 => not match and continue scan
1461  *----------
1462  */
1463  cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1464  ginstate->supportCollation[entry->attnum - 1],
1465  entry->queryKey,
1466  datum[off - 1],
1467  UInt16GetDatum(entry->strategy),
1468  PointerGetDatum(entry->extra_data)));
1469  if (cmp == 0)
1470  return true;
1471  else if (cmp > 0)
1472  return false;
1473 
1474  off++;
1475  }
1476 
1477  return false;
1478 }
1479 
1480 /*
1481  * Set up the entryRes array for each key by looking at
1482  * every entry for current heap row in pending list.
1483  *
1484  * Returns true if each scan key has at least one entryRes match.
1485  * This corresponds to the situations where the normal index search will
1486  * try to apply the key's consistentFn. (A tuple not meeting that requirement
1487  * cannot be returned by the normal search since no entry stream will
1488  * source its TID.)
1489  *
1490  * The pendingBuffer is presumed pinned and share-locked on entry.
1491  */
1492 static bool
1494 {
1495  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1496  OffsetNumber attrnum;
1497  Page page;
1498  IndexTuple itup;
1499  int i,
1500  j;
1501 
1502  /*
1503  * Reset all entryRes and hasMatchKey flags
1504  */
1505  for (i = 0; i < so->nkeys; i++)
1506  {
1507  GinScanKey key = so->keys + i;
1508 
1509  memset(key->entryRes, GIN_FALSE, key->nentries);
1510  }
1511  memset(pos->hasMatchKey, FALSE, so->nkeys);
1512 
1513  /*
1514  * Outer loop iterates over multiple pending-list pages when a single heap
1515  * row has entries spanning those pages.
1516  */
1517  for (;;)
1518  {
1519  Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1520  GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1521  bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1522 
1523  Assert(pos->lastOffset > pos->firstOffset);
1524  memset(datumExtracted + pos->firstOffset - 1, 0,
1525  sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1526 
1527  page = BufferGetPage(pos->pendingBuffer);
1528  TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1529 
1530  for (i = 0; i < so->nkeys; i++)
1531  {
1532  GinScanKey key = so->keys + i;
1533 
1534  for (j = 0; j < key->nentries; j++)
1535  {
1536  GinScanEntry entry = key->scanEntry[j];
1537  OffsetNumber StopLow = pos->firstOffset,
1538  StopHigh = pos->lastOffset,
1539  StopMiddle;
1540 
1541  /* If already matched on earlier page, do no extra work */
1542  if (key->entryRes[j])
1543  continue;
1544 
1545  /*
1546  * Interesting tuples are from pos->firstOffset to
1547  * pos->lastOffset and they are ordered by (attnum, Datum) as
1548  * it's done in entry tree. So we can use binary search to
1549  * avoid linear scanning.
1550  */
1551  while (StopLow < StopHigh)
1552  {
1553  int res;
1554 
1555  StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1556 
1557  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1558 
1559  attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1560 
1561  if (key->attnum < attrnum)
1562  {
1563  StopHigh = StopMiddle;
1564  continue;
1565  }
1566  if (key->attnum > attrnum)
1567  {
1568  StopLow = StopMiddle + 1;
1569  continue;
1570  }
1571 
1572  if (datumExtracted[StopMiddle - 1] == false)
1573  {
1574  datum[StopMiddle - 1] =
1575  gintuple_get_key(&so->ginstate, itup,
1576  &category[StopMiddle - 1]);
1577  datumExtracted[StopMiddle - 1] = true;
1578  }
1579 
1580  if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1581  {
1582  /* special behavior depending on searchMode */
1583  if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1584  {
1585  /* match anything except NULL_ITEM */
1586  if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1587  res = -1;
1588  else
1589  res = 0;
1590  }
1591  else
1592  {
1593  /* match everything */
1594  res = 0;
1595  }
1596  }
1597  else
1598  {
1599  res = ginCompareEntries(&so->ginstate,
1600  entry->attnum,
1601  entry->queryKey,
1602  entry->queryCategory,
1603  datum[StopMiddle - 1],
1604  category[StopMiddle - 1]);
1605  }
1606 
1607  if (res == 0)
1608  {
1609  /*
1610  * Found exact match (there can be only one, except in
1611  * EMPTY_QUERY mode).
1612  *
1613  * If doing partial match, scan forward from here to
1614  * end of page to check for matches.
1615  *
1616  * See comment above about tuple's ordering.
1617  */
1618  if (entry->isPartialMatch)
1619  key->entryRes[j] =
1621  page,
1622  StopMiddle,
1623  pos->lastOffset,
1624  entry,
1625  datum,
1626  category,
1627  datumExtracted);
1628  else
1629  key->entryRes[j] = true;
1630 
1631  /* done with binary search */
1632  break;
1633  }
1634  else if (res < 0)
1635  StopHigh = StopMiddle;
1636  else
1637  StopLow = StopMiddle + 1;
1638  }
1639 
1640  if (StopLow >= StopHigh && entry->isPartialMatch)
1641  {
1642  /*
1643  * No exact match on this page. If doing partial match,
1644  * scan from the first tuple greater than target value to
1645  * end of page. Note that since we don't remember whether
1646  * the comparePartialFn told us to stop early on a
1647  * previous page, we will uselessly apply comparePartialFn
1648  * to the first tuple on each subsequent page.
1649  */
1650  key->entryRes[j] =
1652  page,
1653  StopHigh,
1654  pos->lastOffset,
1655  entry,
1656  datum,
1657  category,
1658  datumExtracted);
1659  }
1660 
1661  pos->hasMatchKey[i] |= key->entryRes[j];
1662  }
1663  }
1664 
1665  /* Advance firstOffset over the scanned tuples */
1666  pos->firstOffset = pos->lastOffset;
1667 
1668  if (GinPageHasFullRow(page))
1669  {
1670  /*
1671  * We have examined all pending entries for the current heap row.
1672  * Break out of loop over pages.
1673  */
1674  break;
1675  }
1676  else
1677  {
1678  /*
1679  * Advance to next page of pending entries for the current heap
1680  * row. Complain if there isn't one.
1681  */
1682  ItemPointerData item = pos->item;
1683 
1684  if (scanGetCandidate(scan, pos) == false ||
1685  !ItemPointerEquals(&pos->item, &item))
1686  elog(ERROR, "could not find additional pending pages for same heap tuple");
1687  }
1688  }
1689 
1690  /*
1691  * Now return "true" if all scan keys have at least one matching datum
1692  */
1693  for (i = 0; i < so->nkeys; i++)
1694  {
1695  if (pos->hasMatchKey[i] == false)
1696  return false;
1697  }
1698 
1699  return true;
1700 }
1701 
1702 /*
1703  * Collect all matched rows from pending list into bitmap
1704  */
1705 static void
1706 scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1707 {
1708  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1709  MemoryContext oldCtx;
1710  bool recheck,
1711  match;
1712  int i;
1713  pendingPosition pos;
1714  Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1715  Page page;
1716  BlockNumber blkno;
1717 
1718  *ntids = 0;
1719 
1720  LockBuffer(metabuffer, GIN_SHARE);
1721  page = BufferGetPage(metabuffer);
1722  TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1723  blkno = GinPageGetMeta(page)->head;
1724 
1725  /*
1726  * fetch head of list before unlocking metapage. head page must be pinned
1727  * to prevent deletion by vacuum process
1728  */
1729  if (blkno == InvalidBlockNumber)
1730  {
1731  /* No pending list, so proceed with normal scan */
1732  UnlockReleaseBuffer(metabuffer);
1733  return;
1734  }
1735 
1736  pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1737  LockBuffer(pos.pendingBuffer, GIN_SHARE);
1738  pos.firstOffset = FirstOffsetNumber;
1739  UnlockReleaseBuffer(metabuffer);
1740  pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1741 
1742  /*
1743  * loop for each heap row. scanGetCandidate returns full row or row's
1744  * tuples from first page.
1745  */
1746  while (scanGetCandidate(scan, &pos))
1747  {
1748  /*
1749  * Check entries in tuple and set up entryRes array.
1750  *
1751  * If pending tuples belonging to the current heap row are spread
1752  * across several pages, collectMatchesForHeapRow will read all of
1753  * those pages.
1754  */
1755  if (!collectMatchesForHeapRow(scan, &pos))
1756  continue;
1757 
1758  /*
1759  * Matching of entries of one row is finished, so check row using
1760  * consistent functions.
1761  */
1762  oldCtx = MemoryContextSwitchTo(so->tempCtx);
1763  recheck = false;
1764  match = true;
1765 
1766  for (i = 0; i < so->nkeys; i++)
1767  {
1768  GinScanKey key = so->keys + i;
1769 
1770  if (!key->boolConsistentFn(key))
1771  {
1772  match = false;
1773  break;
1774  }
1775  recheck |= key->recheckCurItem;
1776  }
1777 
1778  MemoryContextSwitchTo(oldCtx);
1780 
1781  if (match)
1782  {
1783  tbm_add_tuples(tbm, &pos.item, 1, recheck);
1784  (*ntids)++;
1785  }
1786  }
1787 
1788  pfree(pos.hasMatchKey);
1789 }
1790 
1791 
1792 #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1793 
1794 int64
1796 {
1797  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1798  int64 ntids;
1799  ItemPointerData iptr;
1800  bool recheck;
1801 
1802  /*
1803  * Set up the scan keys, and check for unsatisfiable query.
1804  */
1805  ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1806  * sure */
1807  ginNewScanKey(scan);
1808 
1809  if (GinIsVoidRes(scan))
1810  return 0;
1811 
1812  ntids = 0;
1813 
1814  /*
1815  * First, scan the pending list and collect any matching entries into the
1816  * bitmap. After we scan a pending item, some other backend could post it
1817  * into the main index, and so we might visit it a second time during the
1818  * main scan. This is okay because we'll just re-set the same bit in the
1819  * bitmap. (The possibility of duplicate visits is a major reason why GIN
1820  * can't support the amgettuple API, however.) Note that it would not do
1821  * to scan the main index before the pending list, since concurrent
1822  * cleanup could then make us miss entries entirely.
1823  */
1824  scanPendingInsert(scan, tbm, &ntids);
1825 
1826  /*
1827  * Now scan the main index.
1828  */
1829  startScan(scan);
1830 
1831  ItemPointerSetMin(&iptr);
1832 
1833  for (;;)
1834  {
1836 
1837  if (!scanGetItem(scan, iptr, &iptr, &recheck))
1838  break;
1839 
1840  if (ItemPointerIsLossyPage(&iptr))
1842  else
1843  tbm_add_tuples(tbm, &iptr, 1, recheck);
1844  ntids++;
1845  }
1846 
1847  return ntids;
1848 }
#define GinPageHasFullRow(page)
Definition: ginblock.h:118
static void startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
Definition: ginget.c:452
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:59
#define GIN_TRUE
Definition: gin.h:60
#define GIN_UNLOCK
Definition: gin_private.h:43
#define GIN_DELETED
Definition: ginblock.h:41
#define GIN_CAT_EMPTY_QUERY
Definition: ginblock.h:203
int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: ginget.c:1795
GinBtreeData btree
Definition: gin_private.h:344
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gin_private.h:82
ItemPointerData curItem
Definition: gin_private.h:329
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:1172
OffsetNumber offset
Definition: gin_private.h:339
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
GinState * ginstate
Definition: gin_private.h:162
uint32 predictNumberResult
Definition: gin_private.h:343
static void entryGetItem(GinState *ginstate, GinScanEntry entry, ItemPointerData advancePast, Snapshot snapshot)
Definition: ginget.c:743
Buffer pendingBuffer
Definition: ginget.c:28
Relation index
Definition: gin_private.h:53
#define DatumGetInt32(X)
Definition: postgres.h:478
#define dropItem(e)
Definition: ginget.c:727
#define GinItemPointerGetOffsetNumber(pointer)
Definition: ginblock.h:137
OffsetNumber off
Definition: gin_private.h:126
#define GIN_MAYBE
Definition: gin.h:61
#define PointerGetDatum(X)
Definition: postgres.h:562
static bool matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off, OffsetNumber maxoff, GinScanEntry entry, Datum *datum, GinNullCategory *category, bool *datumExtracted)
Definition: ginget.c:1425
TIDBitmap * tbm_create(long maxbytes, dsa_area *dsa)
Definition: tidbitmap.c:281
static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
Definition: ginget.c:1706
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:403
TBMIterator * matchIterator
Definition: gin_private.h:333
ItemPointerData t_tid
Definition: itup.h:37
Form_pg_attribute * attrs
Definition: tupdesc.h:74
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Snapshot xs_snapshot
Definition: relscan.h:90
#define InvalidBuffer
Definition: buf.h:25
int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, GinNullCategory categorya, Datum b, GinNullCategory categoryb)
Definition: ginutil.c:381
Pointer extra_data
Definition: gin_private.h:320
#define ItemPointerIsLossyPage(p)
Definition: ginblock.h:169
#define GinGetNPosting(itup)
Definition: ginblock.h:219
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:135
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1094
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define GinPageGetOpaque(page)
Definition: ginblock.h:109
#define GIN_METAPAGE_BLKNO
Definition: ginblock.h:50
static void startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
Definition: ginget.c:297
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
ItemPointerData * list
Definition: gin_private.h:337
signed int int32
Definition: c.h:256
int GinFuzzySearchLimit
Definition: ginget.c:24
Relation indexRelation
Definition: relscan.h:89
uint16 OffsetNumber
Definition: off.h:24
Definition: type.h:90
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot)
Definition: ginbtree.c:76
BlockNumber blockno
Definition: tidbitmap.h:42
GinScanEntry * scanEntry
Definition: gin_private.h:268
bool(* findItem)(GinBtree, GinBtreeStack *)
Definition: gin_private.h:149
#define GIN_SEARCH_MODE_ALL
Definition: gin.h:35
void pfree(void *pointer)
Definition: mcxt.c:950
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
#define GinPageRightMost(page)
Definition: ginblock.h:128
signed char GinNullCategory
Definition: ginblock.h:197
#define ItemPointerSetLossyPage(p, b)
Definition: ginblock.h:167
#define FALSE
Definition: c.h:221
struct GinScanKeyData * GinScanKey
Definition: gin_private.h:256
static void startScan(IndexScanDesc scan)
Definition: ginget.c:536
char GinTernaryValue
Definition: gin.h:57
OffsetNumber attnum
Definition: gin_private.h:323
#define DEBUG2
Definition: elog.h:24
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
Definition: ginutil.c:213
#define GIN_CAT_NORM_KEY
Definition: ginblock.h:199
GinScanOpaqueData * GinScanOpaque
Definition: gin_private.h:364
FmgrInfo comparePartialFn[INDEX_MAX_KEYS]
Definition: gin_private.h:78
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:348
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
MemoryContext keyCtx
Definition: gin_private.h:359
GinBtreeStack * ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot)
Definition: gindatapage.c:1915
GinTernaryValue * entryRes
Definition: gin_private.h:284
static bool collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
Definition: ginget.c:1493
Buffer ginStepRight(Buffer buffer, Relation index, int lockmode)
Definition: ginbtree.c:165
static void scanPostingTree(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree, Snapshot snapshot)
Definition: ginget.c:65
static void entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advancePast, Snapshot snapshot)
Definition: ginget.c:588
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:187
unsigned int uint32
Definition: c.h:268
OffsetNumber firstOffset
Definition: ginget.c:29
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
#define GinPageIsLeaf(page)
Definition: ginblock.h:111
OffsetNumber attnum
Definition: gin_private.h:299
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
Definition: ginutil.c:246
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ItemPointerIsMin(p)
Definition: ginblock.h:159
void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate)
Definition: ginentrypage.c:745
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:128
#define ItemPointerSetMin(p)
Definition: ginblock.h:157
#define GIN_SHARE
Definition: gin_private.h:44
bool * hasMatchKey
Definition: ginget.c:32
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
TIDBitmap * matchBitmap
Definition: gin_private.h:332
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
GinScanKey keys
Definition: gin_private.h:352
MemoryContext tempCtx
Definition: gin_private.h:349
static bool scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, ItemPointerData *item, bool *recheck)
Definition: ginget.c:1185
GinScanEntry * requiredEntries
Definition: gin_private.h:278
static int entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
Definition: ginget.c:435
uintptr_t Datum
Definition: postgres.h:372
struct IndexTupleData IndexTupleData
StrategyNumber strategy
Definition: gin_private.h:321
bool tbm_is_empty(const TIDBitmap *tbm)
Definition: tidbitmap.c:696
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define GIN_CAT_NULL_ITEM
Definition: ginblock.h:202
int work_mem
Definition: globals.c:112
Relation index
Definition: gin_private.h:160
static void keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key, ItemPointerData advancePast, Snapshot snapshot)
Definition: ginget.c:905
TBMIterateResult * matchResult
Definition: gin_private.h:334
#define InvalidOffsetNumber
Definition: off.h:26
void ginNewScanKey(IndexScanDesc scan)
Definition: ginscan.c:262
static FormData_pg_attribute a1
Definition: heap.c:144
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:997
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
GinNullCategory queryCategory
Definition: gin_private.h:318
#define GinIsVoidRes(s)
Definition: ginget.c:1792
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:211
#define GIN_FALSE
Definition: gin.h:59
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:715
#define InvalidBlockNumber
Definition: block.h:33
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:55
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems)
Definition: ginentrypage.c:163
BlockNumber blkno
Definition: gin_private.h:124
ItemPointerData curItem
Definition: gin_private.h:308
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
#define GinGetPostingTree(itup)
Definition: ginblock.h:224
#define ItemPointerSetMax(p)
Definition: ginblock.h:162
#define DatumGetPointer(X)
Definition: postgres.h:555
bool(* boolConsistentFn)(GinScanKey key)
Definition: gin_private.h:285
static StringInfoData tmpbuf
Definition: walsender.c:158
static int ginCompareItemPointers(ItemPointer a, ItemPointer b)
Definition: gin_private.h:461
uint32 predictNumber
Definition: gin_private.h:129
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:469
static bool collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry, Snapshot snapshot)
Definition: ginget.c:116
#define ItemPointerSetInvalid(pointer)
Definition: itemptr.h:149
int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
Definition: gindatapage.c:181
#define GinDataPageGetRightBound(page)
Definition: ginblock.h:279
GinScanEntry * entries
Definition: gin_private.h:355
#define GinIsPostingTree(itup)
Definition: ginblock.h:222
void * palloc(Size size)
Definition: mcxt.c:849
void freeGinBtreeStack(GinBtreeStack *stack)
Definition: ginbtree.c:193
GinTernaryValue(* triConsistentFn)(GinScanKey key)
Definition: gin_private.h:286
ItemPointerData itemptr
Definition: gin_private.h:172
GinScanEntry * additionalEntries
Definition: gin_private.h:280
int i
void * arg
ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
Definition: gindatapage.c:134
TupleDesc origTupdesc
Definition: gin_private.h:67
static bool moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
Definition: ginget.c:40
#define TRUE
Definition: c.h:217
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
#define GinPageGetMeta(p)
Definition: ginblock.h:103
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:75
static bool scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
Definition: ginget.c:1337
ItemPointerData item
Definition: ginget.c:31
struct pendingPosition pendingPosition
#define GinItemPointerGetBlockNumber(pointer)
Definition: ginblock.h:134
int Buffer
Definition: buf.h:23
#define UInt16GetDatum(X)
Definition: postgres.h:471
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3347
OffsetNumber lastOffset
Definition: ginget.c:30
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:104
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
static FormData_pg_attribute a2
Definition: heap.c:150
void ginFreeScanKeys(GinScanOpaque so)
Definition: ginscan.c:232