PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ginget.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ginget.c
4  * fetch tuples from a GIN scan.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gin/ginget.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/gin_private.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
20 #include "utils/datum.h"
21 #include "utils/memutils.h"
22 
23 /* GUC parameter */
25 
26 typedef struct pendingPosition
27 {
32  bool *hasMatchKey;
34 
35 
36 /*
37  * Goes to the next page if current offset is outside of bounds
38  */
39 static bool
41 {
42  Page page = BufferGetPage(stack->buffer);
43 
44  if (stack->off > PageGetMaxOffsetNumber(page))
45  {
46  /*
47  * We scanned the whole page, so we should take right page
48  */
49  if (GinPageRightMost(page))
50  return false; /* no more pages */
51 
52  stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
53  stack->blkno = BufferGetBlockNumber(stack->buffer);
54  stack->off = FirstOffsetNumber;
55  }
56 
57  return true;
58 }
59 
60 /*
61  * Scan all pages of a posting tree and save all its heap ItemPointers
62  * in scanEntry->matchBitmap
63  */
64 static void
66  BlockNumber rootPostingTree, Snapshot snapshot)
67 {
68  GinBtreeData btree;
69  GinBtreeStack *stack;
70  Buffer buffer;
71  Page page;
72 
73  /* Descend to the leftmost leaf page */
74  stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
75  buffer = stack->buffer;
76  IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
77 
78  freeGinBtreeStack(stack);
79 
80  /*
81  * Loop iterates through all leaf pages of posting tree
82  */
83  for (;;)
84  {
85  page = BufferGetPage(buffer);
86  if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
87  {
88  int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
89 
90  scanEntry->predictNumberResult += n;
91  }
92 
93  if (GinPageRightMost(page))
94  break; /* no more pages */
95 
96  buffer = ginStepRight(buffer, index, GIN_SHARE);
97  }
98 
99  UnlockReleaseBuffer(buffer);
100 }
101 
102 /*
103  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
104  * match the search entry. This supports three different match modes:
105  *
106  * 1. Partial-match support: scan from current point until the
107  * comparePartialFn says we're done.
108  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
109  * key for the current attnum) until we hit null items or end of attnum
110  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
111  * key for the current attnum) until we hit end of attnum
112  *
113  * Returns true if done, false if it's necessary to restart scan from scratch
114  */
115 static bool
117  GinScanEntry scanEntry, Snapshot snapshot)
118 {
119  OffsetNumber attnum;
120  Form_pg_attribute attr;
121 
122  /* Initialize empty bitmap result */
123  scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
124 
125  /* Null query cannot partial-match anything */
126  if (scanEntry->isPartialMatch &&
127  scanEntry->queryCategory != GIN_CAT_NORM_KEY)
128  return true;
129 
130  /* Locate tupdesc entry for key column (for attbyval/attlen data) */
131  attnum = scanEntry->attnum;
132  attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
133 
134  for (;;)
135  {
136  Page page;
137  IndexTuple itup;
138  Datum idatum;
139  GinNullCategory icategory;
140 
141  /*
142  * stack->off points to the interested entry, buffer is already locked
143  */
144  if (moveRightIfItNeeded(btree, stack) == false)
145  return true;
146 
147  page = BufferGetPage(stack->buffer);
148  TestForOldSnapshot(snapshot, btree->index, page);
149  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
150 
151  /*
152  * If tuple stores another attribute then stop scan
153  */
154  if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
155  return true;
156 
157  /* Safe to fetch attribute value */
158  idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
159 
160  /*
161  * Check for appropriate scan stop conditions
162  */
163  if (scanEntry->isPartialMatch)
164  {
165  int32 cmp;
166 
167  /*
168  * In partial match, stop scan at any null (including
169  * placeholders); partial matches never match nulls
170  */
171  if (icategory != GIN_CAT_NORM_KEY)
172  return true;
173 
174  /*----------
175  * Check of partial match.
176  * case cmp == 0 => match
177  * case cmp > 0 => not match and finish scan
178  * case cmp < 0 => not match and continue scan
179  *----------
180  */
181  cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
182  btree->ginstate->supportCollation[attnum - 1],
183  scanEntry->queryKey,
184  idatum,
185  UInt16GetDatum(scanEntry->strategy),
186  PointerGetDatum(scanEntry->extra_data)));
187 
188  if (cmp > 0)
189  return true;
190  else if (cmp < 0)
191  {
192  stack->off++;
193  continue;
194  }
195  }
196  else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
197  {
198  /*
199  * In ALL mode, we are not interested in null items, so we can
200  * stop if we get to a null-item placeholder (which will be the
201  * last entry for a given attnum). We do want to include NULL_KEY
202  * and EMPTY_ITEM entries, though.
203  */
204  if (icategory == GIN_CAT_NULL_ITEM)
205  return true;
206  }
207 
208  /*
209  * OK, we want to return the TIDs listed in this entry.
210  */
211  if (GinIsPostingTree(itup))
212  {
213  BlockNumber rootPostingTree = GinGetPostingTree(itup);
214 
215  /*
216  * We should unlock current page (but not unpin) during tree scan
217  * to prevent deadlock with vacuum processes.
218  *
219  * We save current entry value (idatum) to be able to re-find our
220  * tuple after re-locking
221  */
222  if (icategory == GIN_CAT_NORM_KEY)
223  idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
224 
225  LockBuffer(stack->buffer, GIN_UNLOCK);
226 
227  /* Collect all the TIDs in this entry's posting tree */
228  scanPostingTree(btree->index, scanEntry, rootPostingTree,
229  snapshot);
230 
231  /*
232  * We lock again the entry page and while it was unlocked insert
233  * might have occurred, so we need to re-find our position.
234  */
235  LockBuffer(stack->buffer, GIN_SHARE);
236  page = BufferGetPage(stack->buffer);
237  if (!GinPageIsLeaf(page))
238  {
239  /*
240  * Root page becomes non-leaf while we unlock it. We will
241  * start again, this situation doesn't occur often - root can
242  * became a non-leaf only once per life of index.
243  */
244  return false;
245  }
246 
247  /* Search forward to re-find idatum */
248  for (;;)
249  {
250  Datum newDatum;
251  GinNullCategory newCategory;
252 
253  if (moveRightIfItNeeded(btree, stack) == false)
254  elog(ERROR, "lost saved point in index"); /* must not happen !!! */
255 
256  page = BufferGetPage(stack->buffer);
257  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
258 
259  if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
260  elog(ERROR, "lost saved point in index"); /* must not happen !!! */
261  newDatum = gintuple_get_key(btree->ginstate, itup,
262  &newCategory);
263 
264  if (ginCompareEntries(btree->ginstate, attnum,
265  newDatum, newCategory,
266  idatum, icategory) == 0)
267  break; /* Found! */
268 
269  stack->off++;
270  }
271 
272  if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
273  pfree(DatumGetPointer(idatum));
274  }
275  else
276  {
277  ItemPointer ipd;
278  int nipd;
279 
280  ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
281  tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
282  scanEntry->predictNumberResult += GinGetNPosting(itup);
283  pfree(ipd);
284  }
285 
286  /*
287  * Done with this entry, go to the next
288  */
289  stack->off++;
290  }
291 }
292 
293 /*
294  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
295  */
296 static void
297 startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
298 {
299  GinBtreeData btreeEntry;
300  GinBtreeStack *stackEntry;
301  Page page;
302  bool needUnlock;
303 
304 restartScanEntry:
305  entry->buffer = InvalidBuffer;
306  ItemPointerSetMin(&entry->curItem);
307  entry->offset = InvalidOffsetNumber;
308  if (entry->list)
309  pfree(entry->list);
310  entry->list = NULL;
311  entry->nlist = 0;
312  entry->matchBitmap = NULL;
313  entry->matchResult = NULL;
314  entry->reduceResult = FALSE;
315  entry->predictNumberResult = 0;
316 
317  /*
318  * we should find entry, and begin scan of posting tree or just store
319  * posting list in memory
320  */
321  ginPrepareEntryScan(&btreeEntry, entry->attnum,
322  entry->queryKey, entry->queryCategory,
323  ginstate);
324  stackEntry = ginFindLeafPage(&btreeEntry, true, snapshot);
325  page = BufferGetPage(stackEntry->buffer);
326  /* ginFindLeafPage() will have already checked snapshot age. */
327  needUnlock = TRUE;
328 
329  entry->isFinished = TRUE;
330 
331  if (entry->isPartialMatch ||
333  {
334  /*
335  * btreeEntry.findItem locates the first item >= given search key.
336  * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
337  * because of the way the GIN_CAT_EMPTY_QUERY category code is
338  * assigned.) We scan forward from there and collect all TIDs needed
339  * for the entry type.
340  */
341  btreeEntry.findItem(&btreeEntry, stackEntry);
342  if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
343  == false)
344  {
345  /*
346  * GIN tree was seriously restructured, so we will cleanup all
347  * found data and rescan. See comments near 'return false' in
348  * collectMatchBitmap()
349  */
350  if (entry->matchBitmap)
351  {
352  if (entry->matchIterator)
354  entry->matchIterator = NULL;
355  tbm_free(entry->matchBitmap);
356  entry->matchBitmap = NULL;
357  }
358  LockBuffer(stackEntry->buffer, GIN_UNLOCK);
359  freeGinBtreeStack(stackEntry);
360  goto restartScanEntry;
361  }
362 
363  if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
364  {
366  entry->isFinished = FALSE;
367  }
368  }
369  else if (btreeEntry.findItem(&btreeEntry, stackEntry))
370  {
371  IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
372 
373  if (GinIsPostingTree(itup))
374  {
375  BlockNumber rootPostingTree = GinGetPostingTree(itup);
376  GinBtreeStack *stack;
377  Page page;
378  ItemPointerData minItem;
379 
380  /*
381  * We should unlock entry page before touching posting tree to
382  * prevent deadlocks with vacuum processes. Because entry is never
383  * deleted from page and posting tree is never reduced to the
384  * posting list, we can unlock page after getting BlockNumber of
385  * root of posting tree.
386  */
387  LockBuffer(stackEntry->buffer, GIN_UNLOCK);
388  needUnlock = FALSE;
389 
390  stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
391  rootPostingTree, snapshot);
392  entry->buffer = stack->buffer;
393 
394  /*
395  * We keep buffer pinned because we need to prevent deletion of
396  * page during scan. See GIN's vacuum implementation. RefCount is
397  * increased to keep buffer pinned after freeGinBtreeStack() call.
398  */
399  IncrBufferRefCount(entry->buffer);
400 
401  page = BufferGetPage(entry->buffer);
402 
403  /*
404  * Load the first page into memory.
405  */
406  ItemPointerSetMin(&minItem);
407  entry->list = GinDataLeafPageGetItems(page, &entry->nlist, minItem);
408 
409  entry->predictNumberResult = stack->predictNumber * entry->nlist;
410 
411  LockBuffer(entry->buffer, GIN_UNLOCK);
412  freeGinBtreeStack(stack);
413  entry->isFinished = FALSE;
414  }
415  else if (GinGetNPosting(itup) > 0)
416  {
417  entry->list = ginReadTuple(ginstate, entry->attnum, itup,
418  &entry->nlist);
419  entry->predictNumberResult = entry->nlist;
420 
421  entry->isFinished = FALSE;
422  }
423  }
424 
425  if (needUnlock)
426  LockBuffer(stackEntry->buffer, GIN_UNLOCK);
427  freeGinBtreeStack(stackEntry);
428 }
429 
430 /*
431  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
432  * least frequent items first.
433  */
434 static int
435 entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
436 {
437  const GinScanKey key = (const GinScanKey) arg;
438  int i1 = *(const int *) a1;
439  int i2 = *(const int *) a2;
440  uint32 n1 = key->scanEntry[i1]->predictNumberResult;
441  uint32 n2 = key->scanEntry[i2]->predictNumberResult;
442 
443  if (n1 < n2)
444  return -1;
445  else if (n1 == n2)
446  return 0;
447  else
448  return 1;
449 }
450 
451 static void
453 {
455  int i;
456  int j;
457  int *entryIndexes;
458 
459  ItemPointerSetMin(&key->curItem);
460  key->curItemMatches = false;
461  key->recheckCurItem = false;
462  key->isFinished = false;
463 
464  /*
465  * Divide the entries into two distinct sets: required and additional.
466  * Additional entries are not enough for a match alone, without any items
467  * from the required set, but are needed by the consistent function to
468  * decide if an item matches. When scanning, we can skip over items from
469  * additional entries that have no corresponding matches in any of the
470  * required entries. That speeds up queries like "frequent & rare"
471  * considerably, if the frequent term can be put in the additional set.
472  *
473  * There can be many legal ways to divide them entries into these two
474  * sets. A conservative division is to just put everything in the required
475  * set, but the more you can put in the additional set, the more you can
476  * skip during the scan. To maximize skipping, we try to put as many
477  * frequent items as possible into additional, and less frequent ones into
478  * required. To do that, sort the entries by frequency
479  * (predictNumberResult), and put entries into the required set in that
480  * order, until the consistent function says that none of the remaining
481  * entries can form a match, without any items from the required set. The
482  * rest go to the additional set.
483  */
484  if (key->nentries > 1)
485  {
487 
488  entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
489  for (i = 0; i < key->nentries; i++)
490  entryIndexes[i] = i;
491  qsort_arg(entryIndexes, key->nentries, sizeof(int),
493 
494  for (i = 0; i < key->nentries - 1; i++)
495  {
496  /* Pass all entries <= i as FALSE, and the rest as MAYBE */
497  for (j = 0; j <= i; j++)
498  key->entryRes[entryIndexes[j]] = GIN_FALSE;
499  for (j = i + 1; j < key->nentries; j++)
500  key->entryRes[entryIndexes[j]] = GIN_MAYBE;
501 
502  if (key->triConsistentFn(key) == GIN_FALSE)
503  break;
504  }
505  /* i is now the last required entry. */
506 
508 
509  key->nrequired = i + 1;
510  key->nadditional = key->nentries - key->nrequired;
511  key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
512  key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
513 
514  j = 0;
515  for (i = 0; i < key->nrequired; i++)
516  key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
517  for (i = 0; i < key->nadditional; i++)
518  key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
519 
520  /* clean up after consistentFn calls (also frees entryIndexes) */
522  }
523  else
524  {
526 
527  key->nrequired = 1;
528  key->nadditional = 0;
529  key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
530  key->requiredEntries[0] = key->scanEntry[0];
531  }
532  MemoryContextSwitchTo(oldCtx);
533 }
534 
535 static void
537 {
538  GinScanOpaque so = (GinScanOpaque) scan->opaque;
539  GinState *ginstate = &so->ginstate;
540  uint32 i;
541 
542  for (i = 0; i < so->totalentries; i++)
543  startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
544 
545  if (GinFuzzySearchLimit > 0)
546  {
547  /*
548  * If all of keys more than threshold we will try to reduce result, we
549  * hope (and only hope, for intersection operation of array our
550  * supposition isn't true), that total result will not more than
551  * minimal predictNumberResult.
552  */
553  bool reduce = true;
554 
555  for (i = 0; i < so->totalentries; i++)
556  {
558  {
559  reduce = false;
560  break;
561  }
562  }
563  if (reduce)
564  {
565  for (i = 0; i < so->totalentries; i++)
566  {
568  so->entries[i]->reduceResult = TRUE;
569  }
570  }
571  }
572 
573  /*
574  * Now that we have the estimates for the entry frequencies, finish
575  * initializing the scan keys.
576  */
577  for (i = 0; i < so->nkeys; i++)
578  startScanKey(ginstate, so, so->keys + i);
579 }
580 
581 /*
582  * Load the next batch of item pointers from a posting tree.
583  *
584  * Note that we copy the page into GinScanEntry->list array and unlock it, but
585  * keep it pinned to prevent interference with vacuum.
586  */
587 static void
589  ItemPointerData advancePast, Snapshot snapshot)
590 {
591  Page page;
592  int i;
593  bool stepright;
594 
595  if (!BufferIsValid(entry->buffer))
596  {
597  entry->isFinished = true;
598  return;
599  }
600 
601  /*
602  * We have two strategies for finding the correct page: step right from
603  * the current page, or descend the tree again from the root. If
604  * advancePast equals the current item, the next matching item should be
605  * on the next page, so we step right. Otherwise, descend from root.
606  */
607  if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
608  {
609  stepright = true;
610  LockBuffer(entry->buffer, GIN_SHARE);
611  }
612  else
613  {
614  GinBtreeStack *stack;
615 
616  ReleaseBuffer(entry->buffer);
617 
618  /*
619  * Set the search key, and find the correct leaf page.
620  */
621  if (ItemPointerIsLossyPage(&advancePast))
622  {
623  ItemPointerSet(&entry->btree.itemptr,
624  GinItemPointerGetBlockNumber(&advancePast) + 1,
626  }
627  else
628  {
629  entry->btree.itemptr = advancePast;
630  entry->btree.itemptr.ip_posid++;
631  }
632  entry->btree.fullScan = false;
633  stack = ginFindLeafPage(&entry->btree, true, snapshot);
634 
635  /* we don't need the stack, just the buffer. */
636  entry->buffer = stack->buffer;
637  IncrBufferRefCount(entry->buffer);
638  freeGinBtreeStack(stack);
639  stepright = false;
640  }
641 
642  elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
643  GinItemPointerGetBlockNumber(&advancePast),
644  GinItemPointerGetOffsetNumber(&advancePast),
645  !stepright);
646 
647  page = BufferGetPage(entry->buffer);
648  for (;;)
649  {
650  entry->offset = InvalidOffsetNumber;
651  if (entry->list)
652  {
653  pfree(entry->list);
654  entry->list = NULL;
655  entry->nlist = 0;
656  }
657 
658  if (stepright)
659  {
660  /*
661  * We've processed all the entries on this page. If it was the
662  * last page in the tree, we're done.
663  */
664  if (GinPageRightMost(page))
665  {
666  UnlockReleaseBuffer(entry->buffer);
667  entry->buffer = InvalidBuffer;
668  entry->isFinished = TRUE;
669  return;
670  }
671 
672  /*
673  * Step to next page, following the right link. then find the
674  * first ItemPointer greater than advancePast.
675  */
676  entry->buffer = ginStepRight(entry->buffer,
677  ginstate->index,
678  GIN_SHARE);
679  page = BufferGetPage(entry->buffer);
680  }
681  stepright = true;
682 
683  if (GinPageGetOpaque(page)->flags & GIN_DELETED)
684  continue; /* page was deleted by concurrent vacuum */
685 
686  /*
687  * The first item > advancePast might not be on this page, but
688  * somewhere to the right, if the page was split, or a non-match from
689  * another key in the query allowed us to skip some items from this
690  * entry. Keep following the right-links until we re-find the correct
691  * page.
692  */
693  if (!GinPageRightMost(page) &&
694  ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
695  {
696  /*
697  * the item we're looking is > the right bound of the page, so it
698  * can't be on this page.
699  */
700  continue;
701  }
702 
703  entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
704 
705  for (i = 0; i < entry->nlist; i++)
706  {
707  if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
708  {
709  entry->offset = i;
710 
711  if (GinPageRightMost(page))
712  {
713  /* after processing the copied items, we're done. */
714  UnlockReleaseBuffer(entry->buffer);
715  entry->buffer = InvalidBuffer;
716  }
717  else
718  LockBuffer(entry->buffer, GIN_UNLOCK);
719  return;
720  }
721  }
722  }
723 }
724 
725 #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
726 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
727 
728 /*
729  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
730  * of one scan key, or sets entry->isFinished to TRUE if there are no more.
731  *
732  * Item pointers are returned in ascending order.
733  *
734  * Note: this can return a "lossy page" item pointer, indicating that the
735  * entry potentially matches all items on that heap page. However, it is
736  * not allowed to return both a lossy page pointer and exact (regular)
737  * item pointers for the same page. (Doing so would break the key-combination
738  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
739  * current implementation this is guaranteed by the behavior of tidbitmaps.
740  */
741 static void
743  ItemPointerData advancePast, Snapshot snapshot)
744 {
745  Assert(!entry->isFinished);
746 
747  Assert(!ItemPointerIsValid(&entry->curItem) ||
748  ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
749 
750  if (entry->matchBitmap)
751  {
752  /* A bitmap result */
753  BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
754  OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
755  bool gotitem = false;
756 
757  do
758  {
759  /*
760  * If we've exhausted all items on this block, move to next block
761  * in the bitmap.
762  */
763  while (entry->matchResult == NULL ||
764  (entry->matchResult->ntuples >= 0 &&
765  entry->offset >= entry->matchResult->ntuples) ||
766  entry->matchResult->blockno < advancePastBlk ||
767  (ItemPointerIsLossyPage(&advancePast) &&
768  entry->matchResult->blockno == advancePastBlk))
769  {
770  entry->matchResult = tbm_iterate(entry->matchIterator);
771 
772  if (entry->matchResult == NULL)
773  {
776  entry->matchIterator = NULL;
777  entry->isFinished = TRUE;
778  break;
779  }
780 
781  /*
782  * Reset counter to the beginning of entry->matchResult. Note:
783  * entry->offset is still greater than matchResult->ntuples if
784  * matchResult is lossy. So, on next call we will get next
785  * result from TIDBitmap.
786  */
787  entry->offset = 0;
788  }
789  if (entry->isFinished)
790  break;
791 
792  /*
793  * We're now on the first page after advancePast which has any
794  * items on it. If it's a lossy result, return that.
795  */
796  if (entry->matchResult->ntuples < 0)
797  {
799  entry->matchResult->blockno);
800 
801  /*
802  * We might as well fall out of the loop; we could not
803  * estimate number of results on this page to support correct
804  * reducing of result even if it's enabled.
805  */
806  gotitem = true;
807  break;
808  }
809 
810  /*
811  * Not a lossy page. Skip over any offsets <= advancePast, and
812  * return that.
813  */
814  if (entry->matchResult->blockno == advancePastBlk)
815  {
816  /*
817  * First, do a quick check against the last offset on the
818  * page. If that's > advancePast, so are all the other
819  * offsets.
820  */
821  if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
822  {
823  entry->offset = entry->matchResult->ntuples;
824  continue;
825  }
826 
827  /* Otherwise scan to find the first item > advancePast */
828  while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
829  entry->offset++;
830  }
831 
832  ItemPointerSet(&entry->curItem,
833  entry->matchResult->blockno,
834  entry->matchResult->offsets[entry->offset]);
835  entry->offset++;
836  gotitem = true;
837  } while (!gotitem || (entry->reduceResult == TRUE && dropItem(entry)));
838  }
839  else if (!BufferIsValid(entry->buffer))
840  {
841  /*
842  * A posting list from an entry tuple, or the last page of a posting
843  * tree.
844  */
845  do
846  {
847  if (entry->offset >= entry->nlist)
848  {
850  entry->isFinished = TRUE;
851  break;
852  }
853 
854  entry->curItem = entry->list[entry->offset++];
855  } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
856  /* XXX: shouldn't we apply the fuzzy search limit here? */
857  }
858  else
859  {
860  /* A posting tree */
861  do
862  {
863  /* If we've processed the current batch, load more items */
864  while (entry->offset >= entry->nlist)
865  {
866  entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
867 
868  if (entry->isFinished)
869  {
871  return;
872  }
873  }
874 
875  entry->curItem = entry->list[entry->offset++];
876 
877  } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 ||
878  (entry->reduceResult == TRUE && dropItem(entry)));
879  }
880 }
881 
882 /*
883  * Identify the "current" item among the input entry streams for this scan key
884  * that is greater than advancePast, and test whether it passes the scan key
885  * qual condition.
886  *
887  * The current item is the smallest curItem among the inputs. key->curItem
888  * is set to that value. key->curItemMatches is set to indicate whether that
889  * TID passes the consistentFn test. If so, key->recheckCurItem is set true
890  * iff recheck is needed for this item pointer (including the case where the
891  * item pointer is a lossy page pointer).
892  *
893  * If all entry streams are exhausted, sets key->isFinished to TRUE.
894  *
895  * Item pointers must be returned in ascending order.
896  *
897  * Note: this can return a "lossy page" item pointer, indicating that the
898  * key potentially matches all items on that heap page. However, it is
899  * not allowed to return both a lossy page pointer and exact (regular)
900  * item pointers for the same page. (Doing so would break the key-combination
901  * logic in scanGetItem.)
902  */
903 static void
904 keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
905  ItemPointerData advancePast, Snapshot snapshot)
906 {
907  ItemPointerData minItem;
908  ItemPointerData curPageLossy;
909  uint32 i;
910  bool haveLossyEntry;
911  GinScanEntry entry;
912  GinTernaryValue res;
913  MemoryContext oldCtx;
914  bool allFinished;
915 
916  Assert(!key->isFinished);
917 
918  /*
919  * We might have already tested this item; if so, no need to repeat work.
920  * (Note: the ">" case can happen, if advancePast is exact but we
921  * previously had to set curItem to a lossy-page pointer.)
922  */
923  if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
924  return;
925 
926  /*
927  * Find the minimum item > advancePast among the active entry streams.
928  *
929  * Note: a lossy-page entry is encoded by a ItemPointer with max value for
930  * offset (0xffff), so that it will sort after any exact entries for the
931  * same page. So we'll prefer to return exact pointers not lossy
932  * pointers, which is good.
933  */
934  ItemPointerSetMax(&minItem);
935  allFinished = true;
936  for (i = 0; i < key->nrequired; i++)
937  {
938  entry = key->requiredEntries[i];
939 
940  if (entry->isFinished)
941  continue;
942 
943  /*
944  * Advance this stream if necessary.
945  *
946  * In particular, since entry->curItem was initialized with
947  * ItemPointerSetMin, this ensures we fetch the first item for each
948  * entry on the first call.
949  */
950  if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
951  {
952  entryGetItem(ginstate, entry, advancePast, snapshot);
953  if (entry->isFinished)
954  continue;
955  }
956 
957  allFinished = false;
958  if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
959  minItem = entry->curItem;
960  }
961 
962  if (allFinished)
963  {
964  /* all entries are finished */
965  key->isFinished = TRUE;
966  return;
967  }
968 
969  /*
970  * Ok, we now know that there are no matches < minItem.
971  *
972  * If minItem is lossy, it means that there were no exact items on the
973  * page among requiredEntries, because lossy pointers sort after exact
974  * items. However, there might be exact items for the same page among
975  * additionalEntries, so we mustn't advance past them.
976  */
977  if (ItemPointerIsLossyPage(&minItem))
978  {
979  if (GinItemPointerGetBlockNumber(&advancePast) <
981  {
982  advancePast.ip_blkid = minItem.ip_blkid;
983  advancePast.ip_posid = 0;
984  }
985  }
986  else
987  {
988  Assert(minItem.ip_posid > 0);
989  advancePast = minItem;
990  advancePast.ip_posid--;
991  }
992 
993  /*
994  * We might not have loaded all the entry streams for this TID yet. We
995  * could call the consistent function, passing MAYBE for those entries, to
996  * see if it can decide if this TID matches based on the information we
997  * have. But if the consistent-function is expensive, and cannot in fact
998  * decide with partial information, that could be a big loss. So, load all
999  * the additional entries, before calling the consistent function.
1000  */
1001  for (i = 0; i < key->nadditional; i++)
1002  {
1003  entry = key->additionalEntries[i];
1004 
1005  if (entry->isFinished)
1006  continue;
1007 
1008  if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1009  {
1010  entryGetItem(ginstate, entry, advancePast, snapshot);
1011  if (entry->isFinished)
1012  continue;
1013  }
1014 
1015  /*
1016  * Normally, none of the items in additionalEntries can have a curItem
1017  * larger than minItem. But if minItem is a lossy page, then there
1018  * might be exact items on the same page among additionalEntries.
1019  */
1020  if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1021  {
1022  Assert(ItemPointerIsLossyPage(&minItem));
1023  minItem = entry->curItem;
1024  }
1025  }
1026 
1027  /*
1028  * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1029  * and perform consistentFn test.
1030  *
1031  * Lossy-page entries pose a problem, since we don't know the correct
1032  * entryRes state to pass to the consistentFn, and we also don't know what
1033  * its combining logic will be (could be AND, OR, or even NOT). If the
1034  * logic is OR then the consistentFn might succeed for all items in the
1035  * lossy page even when none of the other entries match.
1036  *
1037  * Our strategy is to call the tri-state consistent function, with the
1038  * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1039  * returns FALSE, none of the lossy items alone are enough for a match, so
1040  * we don't need to return a lossy-page pointer. Otherwise, return a
1041  * lossy-page pointer to indicate that the whole heap page must be
1042  * checked. (On subsequent calls, we'll do nothing until minItem is past
1043  * the page altogether, thus ensuring that we never return both regular
1044  * and lossy pointers for the same page.)
1045  *
1046  * An exception is that it doesn't matter what we pass for lossy pointers
1047  * in "hidden" entries, because the consistentFn's result can't depend on
1048  * them. We could pass them as MAYBE as well, but if we're using the
1049  * "shim" implementation of a tri-state consistent function (see
1050  * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1051  * them as TRUE.
1052  *
1053  * Note that only lossy-page entries pointing to the current item's page
1054  * should trigger this processing; we might have future lossy pages in the
1055  * entry array, but they aren't relevant yet.
1056  */
1057  key->curItem = minItem;
1058  ItemPointerSetLossyPage(&curPageLossy,
1060  haveLossyEntry = false;
1061  for (i = 0; i < key->nentries; i++)
1062  {
1063  entry = key->scanEntry[i];
1064  if (entry->isFinished == FALSE &&
1065  ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1066  {
1067  if (i < key->nuserentries)
1068  key->entryRes[i] = GIN_MAYBE;
1069  else
1070  key->entryRes[i] = GIN_TRUE;
1071  haveLossyEntry = true;
1072  }
1073  else
1074  key->entryRes[i] = GIN_FALSE;
1075  }
1076 
1077  /* prepare for calling consistentFn in temp context */
1078  oldCtx = MemoryContextSwitchTo(tempCtx);
1079 
1080  if (haveLossyEntry)
1081  {
1082  /* Have lossy-page entries, so see if whole page matches */
1083  res = key->triConsistentFn(key);
1084 
1085  if (res == GIN_TRUE || res == GIN_MAYBE)
1086  {
1087  /* Yes, so clean up ... */
1088  MemoryContextSwitchTo(oldCtx);
1089  MemoryContextReset(tempCtx);
1090 
1091  /* and return lossy pointer for whole page */
1092  key->curItem = curPageLossy;
1093  key->curItemMatches = true;
1094  key->recheckCurItem = true;
1095  return;
1096  }
1097  }
1098 
1099  /*
1100  * At this point we know that we don't need to return a lossy whole-page
1101  * pointer, but we might have matches for individual exact item pointers,
1102  * possibly in combination with a lossy pointer. Pass lossy pointers as
1103  * MAYBE to the ternary consistent function, to let it decide if this
1104  * tuple satisfies the overall key, even though we don't know if the lossy
1105  * entries match.
1106  *
1107  * Prepare entryRes array to be passed to consistentFn.
1108  */
1109  for (i = 0; i < key->nentries; i++)
1110  {
1111  entry = key->scanEntry[i];
1112  if (entry->isFinished)
1113  key->entryRes[i] = GIN_FALSE;
1114 #if 0
1115 
1116  /*
1117  * This case can't currently happen, because we loaded all the entries
1118  * for this item earlier.
1119  */
1120  else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1121  key->entryRes[i] = GIN_MAYBE;
1122 #endif
1123  else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1124  key->entryRes[i] = GIN_MAYBE;
1125  else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1126  key->entryRes[i] = GIN_TRUE;
1127  else
1128  key->entryRes[i] = GIN_FALSE;
1129  }
1130 
1131  res = key->triConsistentFn(key);
1132 
1133  switch (res)
1134  {
1135  case GIN_TRUE:
1136  key->curItemMatches = true;
1137  /* triConsistentFn set recheckCurItem */
1138  break;
1139 
1140  case GIN_FALSE:
1141  key->curItemMatches = false;
1142  break;
1143 
1144  case GIN_MAYBE:
1145  key->curItemMatches = true;
1146  key->recheckCurItem = true;
1147  break;
1148 
1149  default:
1150 
1151  /*
1152  * the 'default' case shouldn't happen, but if the consistent
1153  * function returns something bogus, this is the safe result
1154  */
1155  key->curItemMatches = true;
1156  key->recheckCurItem = true;
1157  break;
1158  }
1159 
1160  /*
1161  * We have a tuple, and we know if it matches or not. If it's a non-match,
1162  * we could continue to find the next matching tuple, but let's break out
1163  * and give scanGetItem a chance to advance the other keys. They might be
1164  * able to skip past to a much higher TID, allowing us to save work.
1165  */
1166 
1167  /* clean up after consistentFn calls */
1168  MemoryContextSwitchTo(oldCtx);
1169  MemoryContextReset(tempCtx);
1170 }
1171 
1172 /*
1173  * Get next heap item pointer (after advancePast) from scan.
1174  * Returns true if anything found.
1175  * On success, *item and *recheck are set.
1176  *
1177  * Note: this is very nearly the same logic as in keyGetItem(), except
1178  * that we know the keys are to be combined with AND logic, whereas in
1179  * keyGetItem() the combination logic is known only to the consistentFn.
1180  */
1181 static bool
1183  ItemPointerData *item, bool *recheck)
1184 {
1185  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1186  uint32 i;
1187  bool match;
1188 
1189  /*----------
1190  * Advance the scan keys in lock-step, until we find an item that matches
1191  * all the keys. If any key reports isFinished, meaning its subset of the
1192  * entries is exhausted, we can stop. Otherwise, set *item to the next
1193  * matching item.
1194  *
1195  * This logic works only if a keyGetItem stream can never contain both
1196  * exact and lossy pointers for the same page. Else we could have a
1197  * case like
1198  *
1199  * stream 1 stream 2
1200  * ... ...
1201  * 42/6 42/7
1202  * 50/1 42/0xffff
1203  * ... ...
1204  *
1205  * We would conclude that 42/6 is not a match and advance stream 1,
1206  * thus never detecting the match to the lossy pointer in stream 2.
1207  * (keyGetItem has a similar problem versus entryGetItem.)
1208  *----------
1209  */
1210  do
1211  {
1212  ItemPointerSetMin(item);
1213  match = true;
1214  for (i = 0; i < so->nkeys && match; i++)
1215  {
1216  GinScanKey key = so->keys + i;
1217 
1218  /* Fetch the next item for this key that is > advancePast. */
1219  keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
1220  scan->xs_snapshot);
1221 
1222  if (key->isFinished)
1223  return false;
1224 
1225  /*
1226  * If it's not a match, we can immediately conclude that nothing
1227  * <= this item matches, without checking the rest of the keys.
1228  */
1229  if (!key->curItemMatches)
1230  {
1231  advancePast = key->curItem;
1232  match = false;
1233  break;
1234  }
1235 
1236  /*
1237  * It's a match. We can conclude that nothing < matches, so the
1238  * other key streams can skip to this item.
1239  *
1240  * Beware of lossy pointers, though; from a lossy pointer, we can
1241  * only conclude that nothing smaller than this *block* matches.
1242  */
1243  if (ItemPointerIsLossyPage(&key->curItem))
1244  {
1245  if (GinItemPointerGetBlockNumber(&advancePast) <
1247  {
1248  advancePast.ip_blkid = key->curItem.ip_blkid;
1249  advancePast.ip_posid = 0;
1250  }
1251  }
1252  else
1253  {
1254  Assert(key->curItem.ip_posid > 0);
1255  advancePast = key->curItem;
1256  advancePast.ip_posid--;
1257  }
1258 
1259  /*
1260  * If this is the first key, remember this location as a potential
1261  * match, and proceed to check the rest of the keys.
1262  *
1263  * Otherwise, check if this is the same item that we checked the
1264  * previous keys for (or a lossy pointer for the same page). If
1265  * not, loop back to check the previous keys for this item (we
1266  * will check this key again too, but keyGetItem returns quickly
1267  * for that)
1268  */
1269  if (i == 0)
1270  {
1271  *item = key->curItem;
1272  }
1273  else
1274  {
1275  if (ItemPointerIsLossyPage(&key->curItem) ||
1276  ItemPointerIsLossyPage(item))
1277  {
1279  match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1281  }
1282  else
1283  {
1284  Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1285  match = (ginCompareItemPointers(&key->curItem, item) == 0);
1286  }
1287  }
1288  }
1289  } while (!match);
1290 
1291  Assert(!ItemPointerIsMin(item));
1292 
1293  /*
1294  * Now *item contains the first ItemPointer after previous result that
1295  * satisfied all the keys for that exact TID, or a lossy reference to the
1296  * same page.
1297  *
1298  * We must return recheck = true if any of the keys are marked recheck.
1299  */
1300  *recheck = false;
1301  for (i = 0; i < so->nkeys; i++)
1302  {
1303  GinScanKey key = so->keys + i;
1304 
1305  if (key->recheckCurItem)
1306  {
1307  *recheck = true;
1308  break;
1309  }
1310  }
1311 
1312  return TRUE;
1313 }
1314 
1315 
1316 /*
1317  * Functions for scanning the pending list
1318  */
1319 
1320 
1321 /*
1322  * Get ItemPointer of next heap row to be checked from pending list.
1323  * Returns false if there are no more. On pages with several heap rows
1324  * it returns each row separately, on page with part of heap row returns
1325  * per page data. pos->firstOffset and pos->lastOffset are set to identify
1326  * the range of pending-list tuples belonging to this heap row.
1327  *
1328  * The pendingBuffer is presumed pinned and share-locked on entry, and is
1329  * pinned and share-locked on success exit. On failure exit it's released.
1330  */
1331 static bool
1333 {
1334  OffsetNumber maxoff;
1335  Page page;
1336  IndexTuple itup;
1337 
1338  ItemPointerSetInvalid(&pos->item);
1339  for (;;)
1340  {
1341  page = BufferGetPage(pos->pendingBuffer);
1342  TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1343 
1344  maxoff = PageGetMaxOffsetNumber(page);
1345  if (pos->firstOffset > maxoff)
1346  {
1347  BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1348 
1349  if (blkno == InvalidBlockNumber)
1350  {
1353 
1354  return false;
1355  }
1356  else
1357  {
1358  /*
1359  * Here we must prevent deletion of next page by insertcleanup
1360  * process, which may be trying to obtain exclusive lock on
1361  * current page. So, we lock next page before releasing the
1362  * current one
1363  */
1364  Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1365 
1366  LockBuffer(tmpbuf, GIN_SHARE);
1368 
1369  pos->pendingBuffer = tmpbuf;
1371  }
1372  }
1373  else
1374  {
1375  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1376  pos->item = itup->t_tid;
1377  if (GinPageHasFullRow(page))
1378  {
1379  /*
1380  * find itempointer to the next row
1381  */
1382  for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1383  {
1384  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1385  if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1386  break;
1387  }
1388  }
1389  else
1390  {
1391  /*
1392  * All itempointers are the same on this page
1393  */
1394  pos->lastOffset = maxoff + 1;
1395  }
1396 
1397  /*
1398  * Now pos->firstOffset points to the first tuple of current heap
1399  * row, pos->lastOffset points to the first tuple of next heap row
1400  * (or to the end of page)
1401  */
1402  break;
1403  }
1404  }
1405 
1406  return true;
1407 }
1408 
1409 /*
1410  * Scan pending-list page from current tuple (off) up till the first of:
1411  * - match is found (then returns true)
1412  * - no later match is possible
1413  * - tuple's attribute number is not equal to entry's attrnum
1414  * - reach end of page
1415  *
1416  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1417  * of gintuple_get_key() on the current page.
1418  */
1419 static bool
1421  OffsetNumber off, OffsetNumber maxoff,
1422  GinScanEntry entry,
1423  Datum *datum, GinNullCategory *category,
1424  bool *datumExtracted)
1425 {
1426  IndexTuple itup;
1427  int32 cmp;
1428 
1429  /* Partial match to a null is not possible */
1430  if (entry->queryCategory != GIN_CAT_NORM_KEY)
1431  return false;
1432 
1433  while (off < maxoff)
1434  {
1435  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1436 
1437  if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1438  return false;
1439 
1440  if (datumExtracted[off - 1] == false)
1441  {
1442  datum[off - 1] = gintuple_get_key(ginstate, itup,
1443  &category[off - 1]);
1444  datumExtracted[off - 1] = true;
1445  }
1446 
1447  /* Once we hit nulls, no further match is possible */
1448  if (category[off - 1] != GIN_CAT_NORM_KEY)
1449  return false;
1450 
1451  /*----------
1452  * Check partial match.
1453  * case cmp == 0 => match
1454  * case cmp > 0 => not match and end scan (no later match possible)
1455  * case cmp < 0 => not match and continue scan
1456  *----------
1457  */
1458  cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1459  ginstate->supportCollation[entry->attnum - 1],
1460  entry->queryKey,
1461  datum[off - 1],
1462  UInt16GetDatum(entry->strategy),
1463  PointerGetDatum(entry->extra_data)));
1464  if (cmp == 0)
1465  return true;
1466  else if (cmp > 0)
1467  return false;
1468 
1469  off++;
1470  }
1471 
1472  return false;
1473 }
1474 
1475 /*
1476  * Set up the entryRes array for each key by looking at
1477  * every entry for current heap row in pending list.
1478  *
1479  * Returns true if each scan key has at least one entryRes match.
1480  * This corresponds to the situations where the normal index search will
1481  * try to apply the key's consistentFn. (A tuple not meeting that requirement
1482  * cannot be returned by the normal search since no entry stream will
1483  * source its TID.)
1484  *
1485  * The pendingBuffer is presumed pinned and share-locked on entry.
1486  */
1487 static bool
1489 {
1490  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1491  OffsetNumber attrnum;
1492  Page page;
1493  IndexTuple itup;
1494  int i,
1495  j;
1496 
1497  /*
1498  * Reset all entryRes and hasMatchKey flags
1499  */
1500  for (i = 0; i < so->nkeys; i++)
1501  {
1502  GinScanKey key = so->keys + i;
1503 
1504  memset(key->entryRes, GIN_FALSE, key->nentries);
1505  }
1506  memset(pos->hasMatchKey, FALSE, so->nkeys);
1507 
1508  /*
1509  * Outer loop iterates over multiple pending-list pages when a single heap
1510  * row has entries spanning those pages.
1511  */
1512  for (;;)
1513  {
1514  Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1515  GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1516  bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1517 
1518  Assert(pos->lastOffset > pos->firstOffset);
1519  memset(datumExtracted + pos->firstOffset - 1, 0,
1520  sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1521 
1522  page = BufferGetPage(pos->pendingBuffer);
1523  TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1524 
1525  for (i = 0; i < so->nkeys; i++)
1526  {
1527  GinScanKey key = so->keys + i;
1528 
1529  for (j = 0; j < key->nentries; j++)
1530  {
1531  GinScanEntry entry = key->scanEntry[j];
1532  OffsetNumber StopLow = pos->firstOffset,
1533  StopHigh = pos->lastOffset,
1534  StopMiddle;
1535 
1536  /* If already matched on earlier page, do no extra work */
1537  if (key->entryRes[j])
1538  continue;
1539 
1540  /*
1541  * Interesting tuples are from pos->firstOffset to
1542  * pos->lastOffset and they are ordered by (attnum, Datum) as
1543  * it's done in entry tree. So we can use binary search to
1544  * avoid linear scanning.
1545  */
1546  while (StopLow < StopHigh)
1547  {
1548  int res;
1549 
1550  StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1551 
1552  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1553 
1554  attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1555 
1556  if (key->attnum < attrnum)
1557  {
1558  StopHigh = StopMiddle;
1559  continue;
1560  }
1561  if (key->attnum > attrnum)
1562  {
1563  StopLow = StopMiddle + 1;
1564  continue;
1565  }
1566 
1567  if (datumExtracted[StopMiddle - 1] == false)
1568  {
1569  datum[StopMiddle - 1] =
1570  gintuple_get_key(&so->ginstate, itup,
1571  &category[StopMiddle - 1]);
1572  datumExtracted[StopMiddle - 1] = true;
1573  }
1574 
1575  if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1576  {
1577  /* special behavior depending on searchMode */
1578  if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1579  {
1580  /* match anything except NULL_ITEM */
1581  if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1582  res = -1;
1583  else
1584  res = 0;
1585  }
1586  else
1587  {
1588  /* match everything */
1589  res = 0;
1590  }
1591  }
1592  else
1593  {
1594  res = ginCompareEntries(&so->ginstate,
1595  entry->attnum,
1596  entry->queryKey,
1597  entry->queryCategory,
1598  datum[StopMiddle - 1],
1599  category[StopMiddle - 1]);
1600  }
1601 
1602  if (res == 0)
1603  {
1604  /*
1605  * Found exact match (there can be only one, except in
1606  * EMPTY_QUERY mode).
1607  *
1608  * If doing partial match, scan forward from here to
1609  * end of page to check for matches.
1610  *
1611  * See comment above about tuple's ordering.
1612  */
1613  if (entry->isPartialMatch)
1614  key->entryRes[j] =
1616  page,
1617  StopMiddle,
1618  pos->lastOffset,
1619  entry,
1620  datum,
1621  category,
1622  datumExtracted);
1623  else
1624  key->entryRes[j] = true;
1625 
1626  /* done with binary search */
1627  break;
1628  }
1629  else if (res < 0)
1630  StopHigh = StopMiddle;
1631  else
1632  StopLow = StopMiddle + 1;
1633  }
1634 
1635  if (StopLow >= StopHigh && entry->isPartialMatch)
1636  {
1637  /*
1638  * No exact match on this page. If doing partial match,
1639  * scan from the first tuple greater than target value to
1640  * end of page. Note that since we don't remember whether
1641  * the comparePartialFn told us to stop early on a
1642  * previous page, we will uselessly apply comparePartialFn
1643  * to the first tuple on each subsequent page.
1644  */
1645  key->entryRes[j] =
1647  page,
1648  StopHigh,
1649  pos->lastOffset,
1650  entry,
1651  datum,
1652  category,
1653  datumExtracted);
1654  }
1655 
1656  pos->hasMatchKey[i] |= key->entryRes[j];
1657  }
1658  }
1659 
1660  /* Advance firstOffset over the scanned tuples */
1661  pos->firstOffset = pos->lastOffset;
1662 
1663  if (GinPageHasFullRow(page))
1664  {
1665  /*
1666  * We have examined all pending entries for the current heap row.
1667  * Break out of loop over pages.
1668  */
1669  break;
1670  }
1671  else
1672  {
1673  /*
1674  * Advance to next page of pending entries for the current heap
1675  * row. Complain if there isn't one.
1676  */
1677  ItemPointerData item = pos->item;
1678 
1679  if (scanGetCandidate(scan, pos) == false ||
1680  !ItemPointerEquals(&pos->item, &item))
1681  elog(ERROR, "could not find additional pending pages for same heap tuple");
1682  }
1683  }
1684 
1685  /*
1686  * Now return "true" if all scan keys have at least one matching datum
1687  */
1688  for (i = 0; i < so->nkeys; i++)
1689  {
1690  if (pos->hasMatchKey[i] == false)
1691  return false;
1692  }
1693 
1694  return true;
1695 }
1696 
1697 /*
1698  * Collect all matched rows from pending list into bitmap
1699  */
1700 static void
1701 scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1702 {
1703  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1704  MemoryContext oldCtx;
1705  bool recheck,
1706  match;
1707  int i;
1708  pendingPosition pos;
1709  Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1710  Page page;
1711  BlockNumber blkno;
1712 
1713  *ntids = 0;
1714 
1715  LockBuffer(metabuffer, GIN_SHARE);
1716  page = BufferGetPage(metabuffer);
1717  TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1718  blkno = GinPageGetMeta(page)->head;
1719 
1720  /*
1721  * fetch head of list before unlocking metapage. head page must be pinned
1722  * to prevent deletion by vacuum process
1723  */
1724  if (blkno == InvalidBlockNumber)
1725  {
1726  /* No pending list, so proceed with normal scan */
1727  UnlockReleaseBuffer(metabuffer);
1728  return;
1729  }
1730 
1731  pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1732  LockBuffer(pos.pendingBuffer, GIN_SHARE);
1733  pos.firstOffset = FirstOffsetNumber;
1734  UnlockReleaseBuffer(metabuffer);
1735  pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1736 
1737  /*
1738  * loop for each heap row. scanGetCandidate returns full row or row's
1739  * tuples from first page.
1740  */
1741  while (scanGetCandidate(scan, &pos))
1742  {
1743  /*
1744  * Check entries in tuple and set up entryRes array.
1745  *
1746  * If pending tuples belonging to the current heap row are spread
1747  * across several pages, collectMatchesForHeapRow will read all of
1748  * those pages.
1749  */
1750  if (!collectMatchesForHeapRow(scan, &pos))
1751  continue;
1752 
1753  /*
1754  * Matching of entries of one row is finished, so check row using
1755  * consistent functions.
1756  */
1757  oldCtx = MemoryContextSwitchTo(so->tempCtx);
1758  recheck = false;
1759  match = true;
1760 
1761  for (i = 0; i < so->nkeys; i++)
1762  {
1763  GinScanKey key = so->keys + i;
1764 
1765  if (!key->boolConsistentFn(key))
1766  {
1767  match = false;
1768  break;
1769  }
1770  recheck |= key->recheckCurItem;
1771  }
1772 
1773  MemoryContextSwitchTo(oldCtx);
1775 
1776  if (match)
1777  {
1778  tbm_add_tuples(tbm, &pos.item, 1, recheck);
1779  (*ntids)++;
1780  }
1781  }
1782 
1783  pfree(pos.hasMatchKey);
1784 }
1785 
1786 
1787 #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1788 
1789 int64
1791 {
1792  GinScanOpaque so = (GinScanOpaque) scan->opaque;
1793  int64 ntids;
1794  ItemPointerData iptr;
1795  bool recheck;
1796 
1797  /*
1798  * Set up the scan keys, and check for unsatisfiable query.
1799  */
1800  ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1801  * sure */
1802  ginNewScanKey(scan);
1803 
1804  if (GinIsVoidRes(scan))
1805  return 0;
1806 
1807  ntids = 0;
1808 
1809  /*
1810  * First, scan the pending list and collect any matching entries into the
1811  * bitmap. After we scan a pending item, some other backend could post it
1812  * into the main index, and so we might visit it a second time during the
1813  * main scan. This is okay because we'll just re-set the same bit in the
1814  * bitmap. (The possibility of duplicate visits is a major reason why GIN
1815  * can't support the amgettuple API, however.) Note that it would not do
1816  * to scan the main index before the pending list, since concurrent
1817  * cleanup could then make us miss entries entirely.
1818  */
1819  scanPendingInsert(scan, tbm, &ntids);
1820 
1821  /*
1822  * Now scan the main index.
1823  */
1824  startScan(scan);
1825 
1826  ItemPointerSetMin(&iptr);
1827 
1828  for (;;)
1829  {
1831 
1832  if (!scanGetItem(scan, iptr, &iptr, &recheck))
1833  break;
1834 
1835  if (ItemPointerIsLossyPage(&iptr))
1837  else
1838  tbm_add_tuples(tbm, &iptr, 1, recheck);
1839  ntids++;
1840  }
1841 
1842  return ntids;
1843 }
#define GinPageHasFullRow(page)
Definition: ginblock.h:118
static void startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
Definition: ginget.c:452
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:59
#define GIN_TRUE
Definition: gin.h:60
#define GIN_UNLOCK
Definition: gin_private.h:43
#define GIN_DELETED
Definition: ginblock.h:41
#define GIN_CAT_EMPTY_QUERY
Definition: ginblock.h:196
int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: ginget.c:1790
GinBtreeData btree
Definition: gin_private.h:344
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gin_private.h:82
ItemPointerData curItem
Definition: gin_private.h:329
void tbm_end_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:787
OffsetNumber offset
Definition: gin_private.h:339
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
GinState * ginstate
Definition: gin_private.h:162
uint32 predictNumberResult
Definition: gin_private.h:343
static void entryGetItem(GinState *ginstate, GinScanEntry entry, ItemPointerData advancePast, Snapshot snapshot)
Definition: ginget.c:742
Buffer pendingBuffer
Definition: ginget.c:28
Relation index
Definition: gin_private.h:53
#define DatumGetInt32(X)
Definition: postgres.h:480
#define dropItem(e)
Definition: ginget.c:726
#define GinItemPointerGetOffsetNumber(pointer)
Definition: ginblock.h:137
OffsetNumber off
Definition: gin_private.h:126
#define GIN_MAYBE
Definition: gin.h:61
#define PointerGetDatum(X)
Definition: postgres.h:564
static bool matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off, OffsetNumber maxoff, GinScanEntry entry, Datum *datum, GinNullCategory *category, bool *datumExtracted)
Definition: ginget.c:1420
static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
Definition: ginget.c:1701
void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck)
Definition: tidbitmap.c:290
TBMIterator * matchIterator
Definition: gin_private.h:333
ItemPointerData t_tid
Definition: itup.h:37
Form_pg_attribute * attrs
Definition: tupdesc.h:74
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Snapshot xs_snapshot
Definition: relscan.h:90
#define InvalidBuffer
Definition: buf.h:25
int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, GinNullCategory categorya, Datum b, GinNullCategory categoryb)
Definition: ginutil.c:381
Pointer extra_data
Definition: gin_private.h:320
#define ItemPointerIsLossyPage(p)
Definition: ginblock.h:162
#define GinGetNPosting(itup)
Definition: ginblock.h:212
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:135
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1353
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3292
#define GinPageGetOpaque(page)
Definition: ginblock.h:109
#define GIN_METAPAGE_BLKNO
Definition: ginblock.h:50
static void startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
Definition: ginget.c:297
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
ItemPointerData * list
Definition: gin_private.h:337
signed int int32
Definition: c.h:253
int GinFuzzySearchLimit
Definition: ginget.c:24
Relation indexRelation
Definition: relscan.h:89
uint16 OffsetNumber
Definition: off.h:24
Definition: type.h:90
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot)
Definition: ginbtree.c:76
BlockNumber blockno
Definition: tidbitmap.h:40
GinScanEntry * scanEntry
Definition: gin_private.h:268
bool(* findItem)(GinBtree, GinBtreeStack *)
Definition: gin_private.h:149
#define GIN_SEARCH_MODE_ALL
Definition: gin.h:35
void pfree(void *pointer)
Definition: mcxt.c:992
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3315
#define ERROR
Definition: elog.h:43
#define GinPageRightMost(page)
Definition: ginblock.h:128
BlockIdData ip_blkid
Definition: itemptr.h:38
signed char GinNullCategory
Definition: ginblock.h:190
#define ItemPointerSetLossyPage(p, b)
Definition: ginblock.h:160
#define FALSE
Definition: c.h:218
struct GinScanKeyData * GinScanKey
Definition: gin_private.h:256
static void startScan(IndexScanDesc scan)
Definition: ginget.c:536
char GinTernaryValue
Definition: gin.h:57
OffsetNumber attnum
Definition: gin_private.h:323
#define DEBUG2
Definition: elog.h:24
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
Definition: ginutil.c:213
#define GIN_CAT_NORM_KEY
Definition: ginblock.h:192
GinScanOpaqueData * GinScanOpaque
Definition: gin_private.h:364
FmgrInfo comparePartialFn[INDEX_MAX_KEYS]
Definition: gin_private.h:78
void tbm_free(TIDBitmap *tbm)
Definition: tidbitmap.c:272
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:44
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
MemoryContext keyCtx
Definition: gin_private.h:359
GinBtreeStack * ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot)
Definition: gindatapage.c:1915
static bool collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
Definition: ginget.c:1488
Buffer ginStepRight(Buffer buffer, Relation index, int lockmode)
Definition: ginbtree.c:165
static void scanPostingTree(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree, Snapshot snapshot)
Definition: ginget.c:65
static void entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advancePast, Snapshot snapshot)
Definition: ginget.c:588
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:184
unsigned int uint32
Definition: c.h:265
OffsetNumber firstOffset
Definition: ginget.c:29
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
#define GinPageIsLeaf(page)
Definition: ginblock.h:111
OffsetNumber attnum
Definition: gin_private.h:299
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
Definition: ginutil.c:246
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ItemPointerIsMin(p)
Definition: ginblock.h:152
void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate)
Definition: ginentrypage.c:745
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:128
#define ItemPointerSetMin(p)
Definition: ginblock.h:150
#define GIN_SHARE
Definition: gin_private.h:44
bool * hasMatchKey
Definition: ginget.c:32
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
TIDBitmap * matchBitmap
Definition: gin_private.h:332
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
GinScanKey keys
Definition: gin_private.h:352
MemoryContext tempCtx
Definition: gin_private.h:349
static bool scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, ItemPointerData *item, bool *recheck)
Definition: ginget.c:1182
GinScanEntry * requiredEntries
Definition: gin_private.h:278
static int entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
Definition: ginget.c:435
uintptr_t Datum
Definition: postgres.h:374
struct IndexTupleData IndexTupleData
StrategyNumber strategy
Definition: gin_private.h:321
bool tbm_is_empty(const TIDBitmap *tbm)
Definition: tidbitmap.c:583
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
#define GIN_CAT_NULL_ITEM
Definition: ginblock.h:195
int work_mem
Definition: globals.c:112
Relation index
Definition: gin_private.h:160
static void keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key, ItemPointerData advancePast, Snapshot snapshot)
Definition: ginget.c:904
TBMIterateResult * matchResult
Definition: gin_private.h:334
#define InvalidOffsetNumber
Definition: off.h:26
void ginNewScanKey(IndexScanDesc scan)
Definition: ginscan.c:262
static FormData_pg_attribute a1
Definition: heap.c:142
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:680
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
GinNullCategory queryCategory
Definition: gin_private.h:318
#define GinIsVoidRes(s)
Definition: ginget.c:1787
#define GIN_FALSE
Definition: gin.h:59
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
TBMIterator * tbm_begin_iterate(TIDBitmap *tbm)
Definition: tidbitmap.c:602
#define InvalidBlockNumber
Definition: block.h:33
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems)
Definition: ginentrypage.c:163
BlockNumber blkno
Definition: gin_private.h:124
ItemPointerData curItem
Definition: gin_private.h:308
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
TIDBitmap * tbm_create(long maxbytes)
Definition: tidbitmap.c:210
#define GinGetPostingTree(itup)
Definition: ginblock.h:217
#define ItemPointerSetMax(p)
Definition: ginblock.h:155
#define DatumGetPointer(X)
Definition: postgres.h:557
bool(* boolConsistentFn)(GinScanKey key)
Definition: gin_private.h:285
static StringInfoData tmpbuf
Definition: walsender.c:153
static int ginCompareItemPointers(ItemPointer a, ItemPointer b)
Definition: gin_private.h:461
uint32 predictNumber
Definition: gin_private.h:129
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
Definition: tidbitmap.c:356
static bool collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry, Snapshot snapshot)
Definition: ginget.c:116
#define ItemPointerSetInvalid(pointer)
Definition: itemptr.h:131
int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
Definition: gindatapage.c:181
#define GinDataPageGetRightBound(page)
Definition: ginblock.h:272
GinScanEntry * entries
Definition: gin_private.h:355
#define GinIsPostingTree(itup)
Definition: ginblock.h:215
void * palloc(Size size)
Definition: mcxt.c:891
void freeGinBtreeStack(GinBtreeStack *stack)
Definition: ginbtree.c:193
GinTernaryValue(* triConsistentFn)(GinScanKey key)
Definition: gin_private.h:286
ItemPointerData itemptr
Definition: gin_private.h:172
GinScanEntry * additionalEntries
Definition: gin_private.h:280
int i
void * arg
ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
Definition: gindatapage.c:134
TupleDesc origTupdesc
Definition: gin_private.h:67
static bool moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
Definition: ginget.c:40
#define TRUE
Definition: c.h:214
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
#define GinPageGetMeta(p)
Definition: ginblock.h:103
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:66
static bool scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
Definition: ginget.c:1332
ItemPointerData item
Definition: ginget.c:31
struct pendingPosition pendingPosition
#define GinItemPointerGetBlockNumber(pointer)
Definition: ginblock.h:134
int Buffer
Definition: buf.h:23
OffsetNumber ip_posid
Definition: itemptr.h:39
#define UInt16GetDatum(X)
Definition: postgres.h:473
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3330
OffsetNumber lastOffset
Definition: ginget.c:30
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:86
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
static FormData_pg_attribute a2
Definition: heap.c:148
void ginFreeScanKeys(GinScanOpaque so)
Definition: ginscan.c:232