PostgreSQL Source Code  git master
nodeResultCache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeResultCache.c
4  * Routines to handle caching of results from parameterized nodes
5  *
6  * Portions Copyright (c) 2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeResultCache.c
12  *
13  * ResultCache nodes are intended to sit above parameterized nodes in the plan
14  * tree in order to cache results from them. The intention here is that a
15  * repeat scan with a parameter value that has already been seen by the node
16  * can fetch tuples from the cache rather than having to re-scan the outer
17  * node all over again. The query planner may choose to make use of one of
18  * these when it thinks rescans for previously seen values are likely enough
19  * to warrant adding the additional node.
20  *
21  * The method of cache we use is a hash table. When the cache fills, we never
22  * spill tuples to disk, instead, we choose to evict the least recently used
23  * cache entry from the cache. We remember the least recently used entry by
24  * always pushing new entries and entries we look for onto the tail of a
25  * doubly linked list. This means that older items always bubble to the top
26  * of this LRU list.
27  *
28  * Sometimes our callers won't run their scans to completion. For example a
29  * semi-join only needs to run until it finds a matching tuple, and once it
30  * does, the join operator skips to the next outer tuple and does not execute
31  * the inner side again on that scan. Because of this, we must keep track of
32  * when a cache entry is complete, and by default, we know it is when we run
33  * out of tuples to read during the scan. However, there are cases where we
34  * can mark the cache entry as complete without exhausting the scan of all
35  * tuples. One case is unique joins, where the join operator knows that there
36  * will only be at most one match for any given outer tuple. In order to
37  * support such cases we allow the "singlerow" option to be set for the cache.
38  * This option marks the cache entry as complete after we read the first tuple
39  * from the subnode.
40  *
41  * It's possible when we're filling the cache for a given set of parameters
42  * that we're unable to free enough memory to store any more tuples. If this
43  * happens then we'll have already evicted all other cache entries. When
44  * caching another tuple would cause us to exceed our memory budget, we must
45  * free the entry that we're currently populating and move the state machine
46  * into RC_CACHE_BYPASS_MODE. This means that we'll not attempt to cache any
47  * further tuples for this particular scan. We don't have the memory for it.
48  * The state machine will be reset again on the next rescan. If the memory
49  * requirements to cache the next parameter's tuples are less demanding, then
50  * that may allow us to start putting useful entries back into the cache
51  * again.
52  *
53  *
54  * INTERFACE ROUTINES
55  * ExecResultCache - lookup cache, exec subplan when not found
56  * ExecInitResultCache - initialize node and subnodes
57  * ExecEndResultCache - shutdown node and subnodes
58  * ExecReScanResultCache - rescan the result cache
59  *
60  * ExecResultCacheEstimate estimates DSM space needed for parallel plan
61  * ExecResultCacheInitializeDSM initialize DSM for parallel plan
62  * ExecResultCacheInitializeWorker attach to DSM info in parallel worker
63  * ExecResultCacheRetrieveInstrumentation get instrumentation from worker
64  *-------------------------------------------------------------------------
65  */
66 
67 #include "postgres.h"
68 
69 #include "common/hashfn.h"
70 #include "executor/executor.h"
72 #include "lib/ilist.h"
73 #include "miscadmin.h"
74 #include "utils/lsyscache.h"
75 
76 /* States of the ExecResultCache state machine */
77 #define RC_CACHE_LOOKUP 1 /* Attempt to perform a cache lookup */
78 #define RC_CACHE_FETCH_NEXT_TUPLE 2 /* Get another tuple from the cache */
79 #define RC_FILLING_CACHE 3 /* Read outer node to fill cache */
80 #define RC_CACHE_BYPASS_MODE 4 /* Bypass mode. Just read from our
81  * subplan without caching anything */
82 #define RC_END_OF_SCAN 5 /* Ready for rescan */
83 
84 
85 /* Helper macros for memory accounting */
86 #define EMPTY_ENTRY_MEMORY_BYTES(e) (sizeof(ResultCacheEntry) + \
87  sizeof(ResultCacheKey) + \
88  (e)->key->params->t_len);
89 #define CACHE_TUPLE_BYTES(t) (sizeof(ResultCacheTuple) + \
90  (t)->mintuple->t_len)
91 
92  /* ResultCacheTuple Stores an individually cached tuple */
93 typedef struct ResultCacheTuple
94 {
95  MinimalTuple mintuple; /* Cached tuple */
96  struct ResultCacheTuple *next; /* The next tuple with the same parameter
97  * values or NULL if it's the last one */
99 
100 /*
101  * ResultCacheKey
102  * The hash table key for cached entries plus the LRU list link
103  */
104 typedef struct ResultCacheKey
105 {
107  dlist_node lru_node; /* Pointer to next/prev key in LRU list */
109 
110 /*
111  * ResultCacheEntry
112  * The data struct that the cache hash table stores
113  */
114 typedef struct ResultCacheEntry
115 {
116  ResultCacheKey *key; /* Hash key for hash table lookups */
117  ResultCacheTuple *tuplehead; /* Pointer to the first tuple or NULL if
118  * no tuples are cached for this entry */
119  uint32 hash; /* Hash value (cached) */
120  char status; /* Hash status */
121  bool complete; /* Did we read the outer plan to completion? */
123 
124 
125 #define SH_PREFIX resultcache
126 #define SH_ELEMENT_TYPE ResultCacheEntry
127 #define SH_KEY_TYPE ResultCacheKey *
128 #define SH_SCOPE static inline
129 #define SH_DECLARE
130 #include "lib/simplehash.h"
131 
132 static uint32 ResultCacheHash_hash(struct resultcache_hash *tb,
133  const ResultCacheKey *key);
134 static int ResultCacheHash_equal(struct resultcache_hash *tb,
135  const ResultCacheKey *params1,
136  const ResultCacheKey *params2);
137 
138 #define SH_PREFIX resultcache
139 #define SH_ELEMENT_TYPE ResultCacheEntry
140 #define SH_KEY_TYPE ResultCacheKey *
141 #define SH_KEY key
142 #define SH_HASH_KEY(tb, key) ResultCacheHash_hash(tb, key)
143 #define SH_EQUAL(tb, a, b) (ResultCacheHash_equal(tb, a, b) == 0)
144 #define SH_SCOPE static inline
145 #define SH_STORE_HASH
146 #define SH_GET_HASH(tb, a) a->hash
147 #define SH_DEFINE
148 #include "lib/simplehash.h"
149 
150 /*
151  * ResultCacheHash_hash
152  * Hash function for simplehash hashtable. 'key' is unused here as we
153  * require that all table lookups first populate the ResultCacheState's
154  * probeslot with the key values to be looked up.
155  */
156 static uint32
157 ResultCacheHash_hash(struct resultcache_hash *tb, const ResultCacheKey *key)
158 {
159  ResultCacheState *rcstate = (ResultCacheState *) tb->private_data;
160  TupleTableSlot *pslot = rcstate->probeslot;
161  uint32 hashkey = 0;
162  int numkeys = rcstate->nkeys;
163  FmgrInfo *hashfunctions = rcstate->hashfunctions;
164  Oid *collations = rcstate->collations;
165 
166  for (int i = 0; i < numkeys; i++)
167  {
168  /* rotate hashkey left 1 bit at each step */
169  hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
170 
171  if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
172  {
173  uint32 hkey;
174 
175  hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
176  collations[i], pslot->tts_values[i]));
177  hashkey ^= hkey;
178  }
179  }
180 
181  return murmurhash32(hashkey);
182 }
183 
184 /*
185  * ResultCacheHash_equal
186  * Equality function for confirming hash value matches during a hash
187  * table lookup. 'key2' is never used. Instead the ResultCacheState's
188  * probeslot is always populated with details of what's being looked up.
189  */
190 static int
191 ResultCacheHash_equal(struct resultcache_hash *tb, const ResultCacheKey *key1,
192  const ResultCacheKey *key2)
193 {
194  ResultCacheState *rcstate = (ResultCacheState *) tb->private_data;
195  ExprContext *econtext = rcstate->ss.ps.ps_ExprContext;
196  TupleTableSlot *tslot = rcstate->tableslot;
197  TupleTableSlot *pslot = rcstate->probeslot;
198 
199  /* probeslot should have already been prepared by prepare_probe_slot() */
200 
201  ExecStoreMinimalTuple(key1->params, tslot, false);
202 
203  econtext->ecxt_innertuple = tslot;
204  econtext->ecxt_outertuple = pslot;
205  return !ExecQualAndReset(rcstate->cache_eq_expr, econtext);
206 }
207 
208 /*
209  * Initialize the hash table to empty.
210  */
211 static void
213 {
214  /* Make a guess at a good size when we're not given a valid size. */
215  if (size == 0)
216  size = 1024;
217 
218  /* resultcache_create will convert the size to a power of 2 */
219  rcstate->hashtable = resultcache_create(rcstate->tableContext, size,
220  rcstate);
221 }
222 
223 /*
224  * prepare_probe_slot
225  * Populate rcstate's probeslot with the values from the tuple stored
226  * in 'key'. If 'key' is NULL, then perform the population by evaluating
227  * rcstate's param_exprs.
228  */
229 static inline void
231 {
232  TupleTableSlot *pslot = rcstate->probeslot;
233  TupleTableSlot *tslot = rcstate->tableslot;
234  int numKeys = rcstate->nkeys;
235 
236  ExecClearTuple(pslot);
237 
238  if (key == NULL)
239  {
240  /* Set the probeslot's values based on the current parameter values */
241  for (int i = 0; i < numKeys; i++)
242  pslot->tts_values[i] = ExecEvalExpr(rcstate->param_exprs[i],
243  rcstate->ss.ps.ps_ExprContext,
244  &pslot->tts_isnull[i]);
245  }
246  else
247  {
248  /* Process the key's MinimalTuple and store the values in probeslot */
249  ExecStoreMinimalTuple(key->params, tslot, false);
250  slot_getallattrs(tslot);
251  memcpy(pslot->tts_values, tslot->tts_values, sizeof(Datum) * numKeys);
252  memcpy(pslot->tts_isnull, tslot->tts_isnull, sizeof(bool) * numKeys);
253  }
254 
255  ExecStoreVirtualTuple(pslot);
256 }
257 
258 /*
259  * entry_purge_tuples
260  * Remove all tuples from the cache entry pointed to by 'entry'. This
261  * leaves an empty cache entry. Also, update the memory accounting to
262  * reflect the removal of the tuples.
263  */
264 static inline void
266 {
267  ResultCacheTuple *tuple = entry->tuplehead;
268  uint64 freed_mem = 0;
269 
270  while (tuple != NULL)
271  {
272  ResultCacheTuple *next = tuple->next;
273 
274  freed_mem += CACHE_TUPLE_BYTES(tuple);
275 
276  /* Free memory used for this tuple */
277  pfree(tuple->mintuple);
278  pfree(tuple);
279 
280  tuple = next;
281  }
282 
283  entry->complete = false;
284  entry->tuplehead = NULL;
285 
286  /* Update the memory accounting */
287  rcstate->mem_used -= freed_mem;
288 }
289 
290 /*
291  * remove_cache_entry
292  * Remove 'entry' from the cache and free memory used by it.
293  */
294 static void
296 {
297  ResultCacheKey *key = entry->key;
298 
299  dlist_delete(&entry->key->lru_node);
300 
301  /* Remove all of the tuples from this entry */
302  entry_purge_tuples(rcstate, entry);
303 
304  /*
305  * Update memory accounting. entry_purge_tuples should have already
306  * subtracted the memory used for each cached tuple. Here we just update
307  * the amount used by the entry itself.
308  */
309  rcstate->mem_used -= EMPTY_ENTRY_MEMORY_BYTES(entry);
310 
311  /* Remove the entry from the cache */
312  resultcache_delete_item(rcstate->hashtable, entry);
313 
314  pfree(key->params);
315  pfree(key);
316 }
317 
318 /*
319  * cache_reduce_memory
320  * Evict older and less recently used items from the cache in order to
321  * reduce the memory consumption back to something below the
322  * ResultCacheState's mem_limit.
323  *
324  * 'specialkey', if not NULL, causes the function to return false if the entry
325  * which the key belongs to is removed from the cache.
326  */
327 static bool
329 {
330  bool specialkey_intact = true; /* for now */
331  dlist_mutable_iter iter;
332  uint64 evictions = 0;
333 
334  /* Update peak memory usage */
335  if (rcstate->mem_used > rcstate->stats.mem_peak)
336  rcstate->stats.mem_peak = rcstate->mem_used;
337 
338  /* We expect only to be called when we've gone over budget on memory */
339  Assert(rcstate->mem_used > rcstate->mem_limit);
340 
341  /* Start the eviction process starting at the head of the LRU list. */
342  dlist_foreach_modify(iter, &rcstate->lru_list)
343  {
345  iter.cur);
346  ResultCacheEntry *entry;
347 
348  /*
349  * Populate the hash probe slot in preparation for looking up this LRU
350  * entry.
351  */
352  prepare_probe_slot(rcstate, key);
353 
354  /*
355  * Ideally the LRU list pointers would be stored in the entry itself
356  * rather than in the key. Unfortunately, we can't do that as the
357  * simplehash.h code may resize the table and allocate new memory for
358  * entries which would result in those pointers pointing to the old
359  * buckets. However, it's fine to use the key to store this as that's
360  * only referenced by a pointer in the entry, which of course follows
361  * the entry whenever the hash table is resized. Since we only have a
362  * pointer to the key here, we must perform a hash table lookup to
363  * find the entry that the key belongs to.
364  */
365  entry = resultcache_lookup(rcstate->hashtable, NULL);
366 
367  /* A good spot to check for corruption of the table and LRU list. */
368  Assert(entry != NULL);
369  Assert(entry->key == key);
370 
371  /*
372  * If we're being called to free memory while the cache is being
373  * populated with new tuples, then we'd better take some care as we
374  * could end up freeing the entry which 'specialkey' belongs to.
375  * Generally callers will pass 'specialkey' as the key for the cache
376  * entry which is currently being populated, so we must set
377  * 'specialkey_intact' to false to inform the caller the specialkey
378  * entry has been removed.
379  */
380  if (key == specialkey)
381  specialkey_intact = false;
382 
383  /*
384  * Finally remove the entry. This will remove from the LRU list too.
385  */
386  remove_cache_entry(rcstate, entry);
387 
388  evictions++;
389 
390  /* Exit if we've freed enough memory */
391  if (rcstate->mem_used <= rcstate->mem_limit)
392  break;
393  }
394 
395  rcstate->stats.cache_evictions += evictions; /* Update Stats */
396 
397  return specialkey_intact;
398 }
399 
400 /*
401  * cache_lookup
402  * Perform a lookup to see if we've already cached results based on the
403  * scan's current parameters. If we find an existing entry we move it to
404  * the end of the LRU list, set *found to true then return it. If we
405  * don't find an entry then we create a new one and add it to the end of
406  * the LRU list. We also update cache memory accounting and remove older
407  * entries if we go over the memory budget. If we managed to free enough
408  * memory we return the new entry, else we return NULL.
409  *
410  * Callers can assume we'll never return NULL when *found is true.
411  */
412 static ResultCacheEntry *
413 cache_lookup(ResultCacheState *rcstate, bool *found)
414 {
416  ResultCacheEntry *entry;
417  MemoryContext oldcontext;
418 
419  /* prepare the probe slot with the current scan parameters */
420  prepare_probe_slot(rcstate, NULL);
421 
422  /*
423  * Add the new entry to the cache. No need to pass a valid key since the
424  * hash function uses rcstate's probeslot, which we populated above.
425  */
426  entry = resultcache_insert(rcstate->hashtable, NULL, found);
427 
428  if (*found)
429  {
430  /*
431  * Move existing entry to the tail of the LRU list to mark it as the
432  * most recently used item.
433  */
434  dlist_move_tail(&rcstate->lru_list, &entry->key->lru_node);
435 
436  return entry;
437  }
438 
439  oldcontext = MemoryContextSwitchTo(rcstate->tableContext);
440 
441  /* Allocate a new key */
442  entry->key = key = (ResultCacheKey *) palloc(sizeof(ResultCacheKey));
443  key->params = ExecCopySlotMinimalTuple(rcstate->probeslot);
444 
445  /* Update the total cache memory utilization */
446  rcstate->mem_used += EMPTY_ENTRY_MEMORY_BYTES(entry);
447 
448  /* Initialize this entry */
449  entry->complete = false;
450  entry->tuplehead = NULL;
451 
452  /*
453  * Since this is the most recently used entry, push this entry onto the
454  * end of the LRU list.
455  */
456  dlist_push_tail(&rcstate->lru_list, &entry->key->lru_node);
457 
458  rcstate->last_tuple = NULL;
459 
460  MemoryContextSwitchTo(oldcontext);
461 
462  /*
463  * If we've gone over our memory budget, then we'll free up some space in
464  * the cache.
465  */
466  if (rcstate->mem_used > rcstate->mem_limit)
467  {
468  /*
469  * Try to free up some memory. It's highly unlikely that we'll fail
470  * to do so here since the entry we've just added is yet to contain
471  * any tuples and we're able to remove any other entry to reduce the
472  * memory consumption.
473  */
474  if (unlikely(!cache_reduce_memory(rcstate, key)))
475  return NULL;
476 
477  /*
478  * The process of removing entries from the cache may have caused the
479  * code in simplehash.h to shuffle elements to earlier buckets in the
480  * hash table. If it has, we'll need to find the entry again by
481  * performing a lookup. Fortunately, we can detect if this has
482  * happened by seeing if the entry is still in use and that the key
483  * pointer matches our expected key.
484  */
485  if (entry->status != resultcache_SH_IN_USE || entry->key != key)
486  {
487  /*
488  * We need to repopulate the probeslot as lookups performed during
489  * the cache evictions above will have stored some other key.
490  */
491  prepare_probe_slot(rcstate, key);
492 
493  /* Re-find the newly added entry */
494  entry = resultcache_lookup(rcstate->hashtable, NULL);
495  Assert(entry != NULL);
496  }
497  }
498 
499  return entry;
500 }
501 
502 /*
503  * cache_store_tuple
504  * Add the tuple stored in 'slot' to the rcstate's current cache entry.
505  * The cache entry must have already been made with cache_lookup().
506  * rcstate's last_tuple field must point to the tail of rcstate->entry's
507  * list of tuples.
508  */
509 static bool
511 {
512  ResultCacheTuple *tuple;
513  ResultCacheEntry *entry = rcstate->entry;
514  MemoryContext oldcontext;
515 
516  Assert(slot != NULL);
517  Assert(entry != NULL);
518 
519  oldcontext = MemoryContextSwitchTo(rcstate->tableContext);
520 
521  tuple = (ResultCacheTuple *) palloc(sizeof(ResultCacheTuple));
522  tuple->mintuple = ExecCopySlotMinimalTuple(slot);
523  tuple->next = NULL;
524 
525  /* Account for the memory we just consumed */
526  rcstate->mem_used += CACHE_TUPLE_BYTES(tuple);
527 
528  if (entry->tuplehead == NULL)
529  {
530  /*
531  * This is the first tuple for this entry, so just point the list head
532  * to it.
533  */
534  entry->tuplehead = tuple;
535  }
536  else
537  {
538  /* push this tuple onto the tail of the list */
539  rcstate->last_tuple->next = tuple;
540  }
541 
542  rcstate->last_tuple = tuple;
543  MemoryContextSwitchTo(oldcontext);
544 
545  /*
546  * If we've gone over our memory budget then free up some space in the
547  * cache.
548  */
549  if (rcstate->mem_used > rcstate->mem_limit)
550  {
551  ResultCacheKey *key = entry->key;
552 
553  if (!cache_reduce_memory(rcstate, key))
554  return false;
555 
556  /*
557  * The process of removing entries from the cache may have caused the
558  * code in simplehash.h to shuffle elements to earlier buckets in the
559  * hash table. If it has, we'll need to find the entry again by
560  * performing a lookup. Fortunately, we can detect if this has
561  * happened by seeing if the entry is still in use and that the key
562  * pointer matches our expected key.
563  */
564  if (entry->status != resultcache_SH_IN_USE || entry->key != key)
565  {
566  /*
567  * We need to repopulate the probeslot as lookups performed during
568  * the cache evictions above will have stored some other key.
569  */
570  prepare_probe_slot(rcstate, key);
571 
572  /* Re-find the entry */
573  rcstate->entry = entry = resultcache_lookup(rcstate->hashtable,
574  NULL);
575  Assert(entry != NULL);
576  }
577  }
578 
579  return true;
580 }
581 
582 static TupleTableSlot *
584 {
585  ResultCacheState *node = castNode(ResultCacheState, pstate);
586  PlanState *outerNode;
587  TupleTableSlot *slot;
588 
589  switch (node->rc_status)
590  {
591  case RC_CACHE_LOOKUP:
592  {
593  ResultCacheEntry *entry;
594  TupleTableSlot *outerslot;
595  bool found;
596 
597  Assert(node->entry == NULL);
598 
599  /*
600  * We're only ever in this state for the first call of the
601  * scan. Here we have a look to see if we've already seen the
602  * current parameters before and if we have already cached a
603  * complete set of records that the outer plan will return for
604  * these parameters.
605  *
606  * When we find a valid cache entry, we'll return the first
607  * tuple from it. If not found, we'll create a cache entry and
608  * then try to fetch a tuple from the outer scan. If we find
609  * one there, we'll try to cache it.
610  */
611 
612  /* see if we've got anything cached for the current parameters */
613  entry = cache_lookup(node, &found);
614 
615  if (found && entry->complete)
616  {
617  node->stats.cache_hits += 1; /* stats update */
618 
619  /*
620  * Set last_tuple and entry so that the state
621  * RC_CACHE_FETCH_NEXT_TUPLE can easily find the next
622  * tuple for these parameters.
623  */
624  node->last_tuple = entry->tuplehead;
625  node->entry = entry;
626 
627  /* Fetch the first cached tuple, if there is one */
628  if (entry->tuplehead)
629  {
631 
632  slot = node->ss.ps.ps_ResultTupleSlot;
634  slot, false);
635 
636  return slot;
637  }
638 
639  /* The cache entry is void of any tuples. */
640  node->rc_status = RC_END_OF_SCAN;
641  return NULL;
642  }
643 
644  /* Handle cache miss */
645  node->stats.cache_misses += 1; /* stats update */
646 
647  if (found)
648  {
649  /*
650  * A cache entry was found, but the scan for that entry
651  * did not run to completion. We'll just remove all
652  * tuples and start again. It might be tempting to
653  * continue where we left off, but there's no guarantee
654  * the outer node will produce the tuples in the same
655  * order as it did last time.
656  */
657  entry_purge_tuples(node, entry);
658  }
659 
660  /* Scan the outer node for a tuple to cache */
661  outerNode = outerPlanState(node);
662  outerslot = ExecProcNode(outerNode);
663  if (TupIsNull(outerslot))
664  {
665  /*
666  * cache_lookup may have returned NULL due to failure to
667  * free enough cache space, so ensure we don't do anything
668  * here that assumes it worked. There's no need to go into
669  * bypass mode here as we're setting rc_status to end of
670  * scan.
671  */
672  if (likely(entry))
673  entry->complete = true;
674 
675  node->rc_status = RC_END_OF_SCAN;
676  return NULL;
677  }
678 
679  node->entry = entry;
680 
681  /*
682  * If we failed to create the entry or failed to store the
683  * tuple in the entry, then go into bypass mode.
684  */
685  if (unlikely(entry == NULL ||
686  !cache_store_tuple(node, outerslot)))
687  {
688  node->stats.cache_overflows += 1; /* stats update */
689 
691 
692  /*
693  * No need to clear out last_tuple as we'll stay in bypass
694  * mode until the end of the scan.
695  */
696  }
697  else
698  {
699  /*
700  * If we only expect a single row from this scan then we
701  * can mark that we're not expecting more. This allows
702  * cache lookups to work even when the scan has not been
703  * executed to completion.
704  */
705  entry->complete = node->singlerow;
706  node->rc_status = RC_FILLING_CACHE;
707  }
708 
709  slot = node->ss.ps.ps_ResultTupleSlot;
710  ExecCopySlot(slot, outerslot);
711  return slot;
712  }
713 
715  {
716  /* We shouldn't be in this state if these are not set */
717  Assert(node->entry != NULL);
718  Assert(node->last_tuple != NULL);
719 
720  /* Skip to the next tuple to output */
721  node->last_tuple = node->last_tuple->next;
722 
723  /* No more tuples in the cache */
724  if (node->last_tuple == NULL)
725  {
726  node->rc_status = RC_END_OF_SCAN;
727  return NULL;
728  }
729 
730  slot = node->ss.ps.ps_ResultTupleSlot;
732  false);
733 
734  return slot;
735  }
736 
737  case RC_FILLING_CACHE:
738  {
739  TupleTableSlot *outerslot;
740  ResultCacheEntry *entry = node->entry;
741 
742  /* entry should already have been set by RC_CACHE_LOOKUP */
743  Assert(entry != NULL);
744 
745  /*
746  * When in the RC_FILLING_CACHE state, we've just had a cache
747  * miss and are populating the cache with the current scan
748  * tuples.
749  */
750  outerNode = outerPlanState(node);
751  outerslot = ExecProcNode(outerNode);
752  if (TupIsNull(outerslot))
753  {
754  /* No more tuples. Mark it as complete */
755  entry->complete = true;
756  node->rc_status = RC_END_OF_SCAN;
757  return NULL;
758  }
759 
760  /*
761  * Validate if the planner properly set the singlerow flag. It
762  * should only set that if each cache entry can, at most,
763  * return 1 row. XXX maybe this should be an Assert?
764  */
765  if (unlikely(entry->complete))
766  elog(ERROR, "cache entry already complete");
767 
768  /* Record the tuple in the current cache entry */
769  if (unlikely(!cache_store_tuple(node, outerslot)))
770  {
771  /* Couldn't store it? Handle overflow */
772  node->stats.cache_overflows += 1; /* stats update */
773 
775 
776  /*
777  * No need to clear out entry or last_tuple as we'll stay
778  * in bypass mode until the end of the scan.
779  */
780  }
781 
782  slot = node->ss.ps.ps_ResultTupleSlot;
783  ExecCopySlot(slot, outerslot);
784  return slot;
785  }
786 
788  {
789  TupleTableSlot *outerslot;
790 
791  /*
792  * When in bypass mode we just continue to read tuples without
793  * caching. We need to wait until the next rescan before we
794  * can come out of this mode.
795  */
796  outerNode = outerPlanState(node);
797  outerslot = ExecProcNode(outerNode);
798  if (TupIsNull(outerslot))
799  {
800  node->rc_status = RC_END_OF_SCAN;
801  return NULL;
802  }
803 
804  slot = node->ss.ps.ps_ResultTupleSlot;
805  ExecCopySlot(slot, outerslot);
806  return slot;
807  }
808 
809  case RC_END_OF_SCAN:
810 
811  /*
812  * We've already returned NULL for this scan, but just in case
813  * something calls us again by mistake.
814  */
815  return NULL;
816 
817  default:
818  elog(ERROR, "unrecognized resultcache state: %d",
819  (int) node->rc_status);
820  return NULL;
821  } /* switch */
822 }
823 
825 ExecInitResultCache(ResultCache *node, EState *estate, int eflags)
826 {
828  Plan *outerNode;
829  int i;
830  int nkeys;
831  Oid *eqfuncoids;
832 
833  /* check for unsupported flags */
834  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
835 
836  rcstate->ss.ps.plan = (Plan *) node;
837  rcstate->ss.ps.state = estate;
838  rcstate->ss.ps.ExecProcNode = ExecResultCache;
839 
840  /*
841  * Miscellaneous initialization
842  *
843  * create expression context for node
844  */
845  ExecAssignExprContext(estate, &rcstate->ss.ps);
846 
847  outerNode = outerPlan(node);
848  outerPlanState(rcstate) = ExecInitNode(outerNode, estate, eflags);
849 
850  /*
851  * Initialize return slot and type. No need to initialize projection info
852  * because this node doesn't do projections.
853  */
855  rcstate->ss.ps.ps_ProjInfo = NULL;
856 
857  /*
858  * Initialize scan slot and type.
859  */
861 
862  /*
863  * Set the state machine to lookup the cache. We won't find anything
864  * until we cache something, but this saves a special case to create the
865  * first entry.
866  */
867  rcstate->rc_status = RC_CACHE_LOOKUP;
868 
869  rcstate->nkeys = nkeys = node->numKeys;
870  rcstate->hashkeydesc = ExecTypeFromExprList(node->param_exprs);
871  rcstate->tableslot = MakeSingleTupleTableSlot(rcstate->hashkeydesc,
873  rcstate->probeslot = MakeSingleTupleTableSlot(rcstate->hashkeydesc,
874  &TTSOpsVirtual);
875 
876  rcstate->param_exprs = (ExprState **) palloc(nkeys * sizeof(ExprState *));
877  rcstate->collations = node->collations; /* Just point directly to the plan
878  * data */
879  rcstate->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
880 
881  eqfuncoids = palloc(nkeys * sizeof(Oid));
882 
883  for (i = 0; i < nkeys; i++)
884  {
885  Oid hashop = node->hashOperators[i];
886  Oid left_hashfn;
887  Oid right_hashfn;
888  Expr *param_expr = (Expr *) list_nth(node->param_exprs, i);
889 
890  if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
891  elog(ERROR, "could not find hash function for hash operator %u",
892  hashop);
893 
894  fmgr_info(left_hashfn, &rcstate->hashfunctions[i]);
895 
896  rcstate->param_exprs[i] = ExecInitExpr(param_expr, (PlanState *) rcstate);
897  eqfuncoids[i] = get_opcode(hashop);
898  }
899 
902  &TTSOpsVirtual,
903  eqfuncoids,
904  node->collations,
905  node->param_exprs,
906  (PlanState *) rcstate);
907 
908  pfree(eqfuncoids);
909  rcstate->mem_used = 0;
910 
911  /* Limit the total memory consumed by the cache to this */
912  rcstate->mem_limit = get_hash_mem() * 1024L;
913 
914  /* A memory context dedicated for the cache */
915  rcstate->tableContext = AllocSetContextCreate(CurrentMemoryContext,
916  "ResultCacheHashTable",
918 
919  dlist_init(&rcstate->lru_list);
920  rcstate->last_tuple = NULL;
921  rcstate->entry = NULL;
922 
923  /*
924  * Mark if we can assume the cache entry is completed after we get the
925  * first record for it. Some callers might not call us again after
926  * getting the first match. e.g. A join operator performing a unique join
927  * is able to skip to the next outer tuple after getting the first
928  * matching inner tuple. In this case, the cache entry is complete after
929  * getting the first tuple. This allows us to mark it as so.
930  */
931  rcstate->singlerow = node->singlerow;
932 
933  /* Zero the statistics counters */
934  memset(&rcstate->stats, 0, sizeof(ResultCacheInstrumentation));
935 
936  /* Allocate and set up the actual cache */
937  build_hash_table(rcstate, node->est_entries);
938 
939  return rcstate;
940 }
941 
942 void
944 {
945 #ifdef USE_ASSERT_CHECKING
946  /* Validate the memory accounting code is correct in assert builds. */
947  {
948  int count;
949  uint64 mem = 0;
950  resultcache_iterator i;
951  ResultCacheEntry *entry;
952 
953  resultcache_start_iterate(node->hashtable, &i);
954 
955  count = 0;
956  while ((entry = resultcache_iterate(node->hashtable, &i)) != NULL)
957  {
958  ResultCacheTuple *tuple = entry->tuplehead;
959 
960  mem += EMPTY_ENTRY_MEMORY_BYTES(entry);
961  while (tuple != NULL)
962  {
963  mem += CACHE_TUPLE_BYTES(tuple);
964  tuple = tuple->next;
965  }
966  count++;
967  }
968 
969  Assert(count == node->hashtable->members);
970  Assert(mem == node->mem_used);
971  }
972 #endif
973 
974  /*
975  * When ending a parallel worker, copy the statistics gathered by the
976  * worker back into shared memory so that it can be picked up by the main
977  * process to report in EXPLAIN ANALYZE.
978  */
979  if (node->shared_info != NULL && IsParallelWorker())
980  {
982 
983  /* Make mem_peak available for EXPLAIN */
984  if (node->stats.mem_peak == 0)
985  node->stats.mem_peak = node->mem_used;
986 
987  Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
989  memcpy(si, &node->stats, sizeof(ResultCacheInstrumentation));
990  }
991 
992  /* Remove the cache context */
994 
996  /* must drop pointer to cache result tuple */
998 
999  /*
1000  * free exprcontext
1001  */
1002  ExecFreeExprContext(&node->ss.ps);
1003 
1004  /*
1005  * shut down the subplan
1006  */
1007  ExecEndNode(outerPlanState(node));
1008 }
1009 
1010 void
1012 {
1014 
1015  /* Mark that we must lookup the cache for a new set of parameters */
1016  node->rc_status = RC_CACHE_LOOKUP;
1017 
1018  /* nullify pointers used for the last scan */
1019  node->entry = NULL;
1020  node->last_tuple = NULL;
1021 
1022  /*
1023  * if chgParam of subnode is not null then plan will be re-scanned by
1024  * first ExecProcNode.
1025  */
1026  if (outerPlan->chgParam == NULL)
1027  ExecReScan(outerPlan);
1028 
1029 }
1030 
1031 /*
1032  * ExecEstimateCacheEntryOverheadBytes
1033  * For use in the query planner to help it estimate the amount of memory
1034  * required to store a single entry in the cache.
1035  */
1036 double
1038 {
1039  return sizeof(ResultCacheEntry) + sizeof(ResultCacheKey) +
1040  sizeof(ResultCacheTuple) * ntuples;
1041 }
1042 
1043 /* ----------------------------------------------------------------
1044  * Parallel Query Support
1045  * ----------------------------------------------------------------
1046  */
1047 
1048  /* ----------------------------------------------------------------
1049  * ExecResultCacheEstimate
1050  *
1051  * Estimate space required to propagate result cache statistics.
1052  * ----------------------------------------------------------------
1053  */
1054 void
1056 {
1057  Size size;
1058 
1059  /* don't need this if not instrumenting or no workers */
1060  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
1061  return;
1062 
1063  size = mul_size(pcxt->nworkers, sizeof(ResultCacheInstrumentation));
1064  size = add_size(size, offsetof(SharedResultCacheInfo, sinstrument));
1065  shm_toc_estimate_chunk(&pcxt->estimator, size);
1066  shm_toc_estimate_keys(&pcxt->estimator, 1);
1067 }
1068 
1069 /* ----------------------------------------------------------------
1070  * ExecResultCacheInitializeDSM
1071  *
1072  * Initialize DSM space for result cache statistics.
1073  * ----------------------------------------------------------------
1074  */
1075 void
1077 {
1078  Size size;
1079 
1080  /* don't need this if not instrumenting or no workers */
1081  if (!node->ss.ps.instrument || pcxt->nworkers == 0)
1082  return;
1083 
1084  size = offsetof(SharedResultCacheInfo, sinstrument)
1085  + pcxt->nworkers * sizeof(ResultCacheInstrumentation);
1086  node->shared_info = shm_toc_allocate(pcxt->toc, size);
1087  /* ensure any unfilled slots will contain zeroes */
1088  memset(node->shared_info, 0, size);
1089  node->shared_info->num_workers = pcxt->nworkers;
1090  shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
1091  node->shared_info);
1092 }
1093 
1094 /* ----------------------------------------------------------------
1095  * ExecResultCacheInitializeWorker
1096  *
1097  * Attach worker to DSM space for result cache statistics.
1098  * ----------------------------------------------------------------
1099  */
1100 void
1102 {
1103  node->shared_info =
1104  shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
1105 }
1106 
1107 /* ----------------------------------------------------------------
1108  * ExecResultCacheRetrieveInstrumentation
1109  *
1110  * Transfer result cache statistics from DSM to private memory.
1111  * ----------------------------------------------------------------
1112  */
1113 void
1115 {
1116  Size size;
1118 
1119  if (node->shared_info == NULL)
1120  return;
1121 
1122  size = offsetof(SharedResultCacheInfo, sinstrument)
1124  si = palloc(size);
1125  memcpy(si, node->shared_info, size);
1126  node->shared_info = si;
1127 }
MemoryContext tableContext
Definition: execnodes.h:2100
static TupleTableSlot * ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)
Definition: tuptable.h:475
#define DatumGetUInt32(X)
Definition: postgres.h:530
void ExecResultCacheRetrieveInstrumentation(ResultCacheState *node)
uint32 est_entries
Definition: plannodes.h:799
Definition: fmgr.h:56
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
Definition: lsyscache.c:508
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1446
dlist_node * cur
Definition: ilist.h:180
#define likely(x)
Definition: c.h:272
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1005
struct ResultCacheEntry ResultCacheEntry
Instrumentation * instrument
Definition: execnodes.h:975
#define RC_FILLING_CACHE
struct ResultCacheKey ResultCacheKey
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:543
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
ScanState ss
Definition: execnodes.h:2086
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1238
ResultCacheTuple * tuplehead
#define castNode(_type_, nodeptr)
Definition: nodes.h:608
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:556
struct resultcache_hash * hashtable
Definition: execnodes.h:2089
TupleTableSlot * tableslot
Definition: execnodes.h:2091
ExprContext * ps_ExprContext
Definition: execnodes.h:1004
shm_toc_estimator estimator
Definition: parallel.h:42
ExprState * ExecBuildParamSetEqual(TupleDesc desc, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, const Oid *eqfunctions, const Oid *collations, const List *param_exprs, PlanState *parent)
Definition: execExpr.c:3840
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:317
struct ResultCacheInstrumentation ResultCacheInstrumentation
void ExecReScan(PlanState *node)
Definition: execAmi.c:78
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
int plan_node_id
Definition: plannodes.h:140
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
FmgrInfo * hashfunctions
Definition: execnodes.h:2096
static uint32 ResultCacheHash_hash(struct resultcache_hash *tb, const ResultCacheKey *key)
Datum * tts_values
Definition: tuptable.h:126
void ExecReScanResultCache(ResultCacheState *node)
TupleTableSlot * ss_ScanTupleSlot
Definition: execnodes.h:1379
SharedResultCacheInfo * shared_info
Definition: execnodes.h:2111
static uint32 murmurhash32(uint32 data)
Definition: hashfn.h:92
EState * state
Definition: execnodes.h:967
ResultCacheInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]
Definition: execnodes.h:2074
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:650
Oid * hashOperators
Definition: plannodes.h:793
static bool cache_store_tuple(ResultCacheState *rcstate, TupleTableSlot *slot)
dlist_node lru_node
static int ResultCacheHash_equal(struct resultcache_hash *tb, const ResultCacheKey *params1, const ResultCacheKey *params2)
ResultCacheInstrumentation stats
Definition: execnodes.h:2110
PlanState ps
Definition: execnodes.h:1376
#define dlist_container(type, membername, ptr)
Definition: ilist.h:496
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1003
void pfree(void *pointer)
Definition: mcxt.c:1169
ResultCacheKey * key
#define ERROR
Definition: elog.h:46
#define RC_CACHE_BYPASS_MODE
static void * list_nth(const List *list, int n)
Definition: pg_list.h:278
TupleDesc hashkeydesc
Definition: execnodes.h:2090
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
ResultCacheState * ExecInitResultCache(ResultCache *node, EState *estate, int eflags)
static void slot_getallattrs(TupleTableSlot *slot)
Definition: tuptable.h:354
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
#define EXEC_FLAG_BACKWARD
Definition: executor.h:58
#define outerPlanState(node)
Definition: execnodes.h:1061
ExprState ** param_exprs
Definition: execnodes.h:2094
dlist_head lru_list
Definition: execnodes.h:2101
static MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot)
Definition: tuptable.h:463
bool singlerow
Definition: plannodes.h:796
double ExecEstimateCacheEntryOverheadBytes(double ntuples)
struct ResultCacheEntry * entry
Definition: execnodes.h:2106
bool * tts_isnull
Definition: tuptable.h:128
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:316
#define EMPTY_ENTRY_MEMORY_BYTES(e)
int ParallelWorkerNumber
Definition: parallel.c:112
#define TupIsNull(slot)
Definition: tuptable.h:292
unsigned int uint32
Definition: c.h:441
MinimalTuple mintuple
struct ResultCacheTuple * last_tuple
Definition: execnodes.h:2102
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
static void dlist_delete(dlist_node *node)
Definition: ilist.h:358
void ExecResultCacheInitializeDSM(ResultCacheState *node, ParallelContext *pcxt)
#define IsParallelWorker()
Definition: parallel.h:61
Bitmapset * chgParam
Definition: execnodes.h:997
#define outerPlan(node)
Definition: plannodes.h:171
MinimalTuple params
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
Definition: executor.h:423
#define RC_CACHE_LOOKUP
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
static void entry_purge_tuples(ResultCacheState *rcstate, ResultCacheEntry *entry)
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:971
uintptr_t Datum
Definition: postgres.h:411
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1128
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:252
#define CACHE_TUPLE_BYTES(t)
static void dlist_move_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:404
void ExecEndResultCache(ResultCacheState *node)
Plan * plan
Definition: execnodes.h:965
static void dlist_init(dlist_head *head)
Definition: ilist.h:278
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1256
#define RC_END_OF_SCAN
#define makeNode(_type_)
Definition: nodes.h:587
#define Assert(condition)
Definition: c.h:804
static void build_hash_table(ResultCacheState *rcstate, uint32 size)
#define EXEC_FLAG_MARK
Definition: executor.h:59
Oid * collations
Definition: plannodes.h:794
static ResultCacheEntry * cache_lookup(ResultCacheState *rcstate, bool *found)
ExprState * cache_eq_expr
Definition: execnodes.h:2093
struct ResultCacheTuple ResultCacheTuple
void ExecResultCacheInitializeWorker(ResultCacheState *node, ParallelWorkerContext *pwcxt)
size_t Size
Definition: c.h:540
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:480
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1799
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
List * param_exprs
Definition: plannodes.h:795
static void remove_cache_entry(ResultCacheState *rcstate, ResultCacheEntry *entry)
static bool cache_reduce_memory(ResultCacheState *rcstate, ResultCacheKey *specialkey)
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
TupleDesc ExecTypeFromExprList(List *exprList)
Definition: execTuples.c:1997
void * palloc(Size size)
Definition: mcxt.c:1062
static TupleTableSlot * ExecResultCache(PlanState *pstate)
#define elog(elevel,...)
Definition: elog.h:232
int i
static void prepare_probe_slot(ResultCacheState *rcstate, ResultCacheKey *key)
TupleTableSlot * probeslot
Definition: execnodes.h:2092
#define unlikely(x)
Definition: c.h:273
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:123
#define RC_CACHE_FETCH_NEXT_TUPLE
void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)
Definition: execUtils.c:682
struct ResultCacheTuple * next
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:141
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:85
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define offsetof(type, field)
Definition: c.h:727
int get_hash_mem(void)
Definition: nodeHash.c:3389
void ExecResultCacheEstimate(ResultCacheState *node, ParallelContext *pcxt)
TupleTableSlot * ExecStoreVirtualTuple(TupleTableSlot *slot)
Definition: execTuples.c:1552
shm_toc * toc
Definition: parallel.h:45