PostgreSQL Source Code git master
Loading...
Searching...
No Matches
syncscan.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * syncscan.c
4 * scan synchronization support
5 *
6 * When multiple backends run a sequential scan on the same table, we try
7 * to keep them synchronized to reduce the overall I/O needed. The goal is
8 * to read each page into shared buffer cache only once, and let all backends
9 * that take part in the shared scan process the page before it falls out of
10 * the cache.
11 *
12 * Since the "leader" in a pack of backends doing a seqscan will have to wait
13 * for I/O, while the "followers" don't, there is a strong self-synchronizing
14 * effect once we can get the backends examining approximately the same part
15 * of the table at the same time. Hence all that is really needed is to get
16 * a new backend beginning a seqscan to begin it close to where other backends
17 * are reading. We can scan the table circularly, from block X up to the
18 * end and then from block 0 to X-1, to ensure we visit all rows while still
19 * participating in the common scan.
20 *
21 * To accomplish that, we keep track of the scan position of each table, and
22 * start new scans close to where the previous scan(s) are. We don't try to
23 * do any extra synchronization to keep the scans together afterwards; some
24 * scans might progress much more slowly than others, for example if the
25 * results need to be transferred to the client over a slow network, and we
26 * don't want such queries to slow down others.
27 *
28 * There can realistically only be a few large sequential scans on different
29 * tables in progress at any time. Therefore we just keep the scan positions
30 * in a small LRU list which we scan every time we need to look up or update a
31 * scan position. The whole mechanism is only applied for tables exceeding
32 * a threshold size (but that is not the concern of this module).
33 *
34 * INTERFACE ROUTINES
35 * ss_get_location - return current scan location of a relation
36 * ss_report_location - update current scan location
37 *
38 *
39 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
40 * Portions Copyright (c) 1994, Regents of the University of California
41 *
42 * IDENTIFICATION
43 * src/backend/access/common/syncscan.c
44 *
45 *-------------------------------------------------------------------------
46 */
47#include "postgres.h"
48
49#include "access/syncscan.h"
50#include "miscadmin.h"
51#include "storage/lwlock.h"
52#include "storage/shmem.h"
53#include "storage/subsystems.h"
54#include "utils/rel.h"
55
56
57/* GUC variables */
58#ifdef TRACE_SYNCSCAN
59bool trace_syncscan = false;
60#endif
61
62
63/*
64 * Size of the LRU list.
65 *
66 * Note: the code assumes that SYNC_SCAN_NELEM > 1.
67 *
68 * XXX: What's a good value? It should be large enough to hold the
69 * maximum number of large tables scanned simultaneously. But a larger value
70 * means more traversing of the LRU list when starting a new scan.
71 */
72#define SYNC_SCAN_NELEM 20
73
74/*
75 * Interval between reports of the location of the current scan, in pages.
76 *
77 * Note: This should be smaller than the ring size (see buffer/freelist.c)
78 * we use for bulk reads. Otherwise a scan joining other scans might start
79 * from a page that's no longer in the buffer cache. This is a bit fuzzy;
80 * there's no guarantee that the new scan will read the page before it leaves
81 * the buffer cache anyway, and on the other hand the page is most likely
82 * still in the OS cache.
83 */
84#define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
85
86
87/*
88 * The scan locations structure is essentially a doubly-linked LRU with head
89 * and tail pointer, but designed to hold a fixed maximum number of elements in
90 * fixed-size shared memory.
91 */
92typedef struct ss_scan_location_t
93{
94 RelFileLocator relfilelocator; /* identity of a relation */
95 BlockNumber location; /* last-reported location in the relation */
97
104
111
112#define SizeOfScanLocations(N) \
113 (offsetof(ss_scan_locations_t, items) + (N) * sizeof(ss_lru_item_t))
114
115static void SyncScanShmemRequest(void *arg);
116static void SyncScanShmemInit(void *arg);
117
122
123/* Pointer to struct in shared memory */
125
126/* prototypes for internal functions */
127static BlockNumber ss_search(RelFileLocator relfilelocator,
128 BlockNumber location, bool set);
129
130
131/*
132 * SyncScanShmemRequest --- register this module's shared memory
133 */
134static void
136{
137 ShmemRequestStruct(.name = "Sync Scan Locations List",
139 .ptr = (void **) &scan_locations,
140 );
141}
142
143/*
144 * SyncScanShmemInit --- initialize this module's shared memory
145 */
146static void
148{
149 int i;
150
153
154 for (i = 0; i < SYNC_SCAN_NELEM; i++)
155 {
157
158 /*
159 * Initialize all slots with invalid values. As scans are started,
160 * these invalid entries will fall off the LRU list and get replaced
161 * with real entries.
162 */
167
168 item->prev = (i > 0) ?
169 (&scan_locations->items[i - 1]) : NULL;
170 item->next = (i < SYNC_SCAN_NELEM - 1) ?
171 (&scan_locations->items[i + 1]) : NULL;
172 }
173}
174
175/*
176 * ss_search --- search the scan_locations structure for an entry with the
177 * given relfilelocator.
178 *
179 * If "set" is true, the location is updated to the given location. If no
180 * entry for the given relfilelocator is found, it will be created at the head
181 * of the list with the given location, even if "set" is false.
182 *
183 * In any case, the location after possible update is returned.
184 *
185 * Caller is responsible for having acquired suitable lock on the shared
186 * data structure.
187 */
188static BlockNumber
189ss_search(RelFileLocator relfilelocator, BlockNumber location, bool set)
190{
191 ss_lru_item_t *item;
192
193 item = scan_locations->head;
194 for (;;)
195 {
196 bool match;
197
199 relfilelocator);
200
201 if (match || item->next == NULL)
202 {
203 /*
204 * If we reached the end of list and no match was found, take over
205 * the last entry
206 */
207 if (!match)
208 {
209 item->location.relfilelocator = relfilelocator;
210 item->location.location = location;
211 }
212 else if (set)
213 item->location.location = location;
214
215 /* Move the entry to the front of the LRU list */
216 if (item != scan_locations->head)
217 {
218 /* unlink */
219 if (item == scan_locations->tail)
220 scan_locations->tail = item->prev;
221 item->prev->next = item->next;
222 if (item->next)
223 item->next->prev = item->prev;
224
225 /* link */
226 item->prev = NULL;
227 item->next = scan_locations->head;
228 scan_locations->head->prev = item;
229 scan_locations->head = item;
230 }
231
232 return item->location.location;
233 }
234
235 item = item->next;
236 }
237
238 /* not reached */
239}
240
241/*
242 * ss_get_location --- get the optimal starting location for scan
243 *
244 * Returns the last-reported location of a sequential scan on the
245 * relation, or 0 if no valid location is found.
246 *
247 * We expect the caller has just done RelationGetNumberOfBlocks(), and
248 * so that number is passed in rather than computing it again. The result
249 * is guaranteed less than relnblocks (assuming that's > 0).
250 */
253{
255
257 startloc = ss_search(rel->rd_locator, 0, false);
259
260 /*
261 * If the location is not a valid block number for this scan, start at 0.
262 *
263 * This can happen if for instance a VACUUM truncated the table since the
264 * location was saved.
265 */
266 if (startloc >= relnblocks)
267 startloc = 0;
268
269#ifdef TRACE_SYNCSCAN
270 if (trace_syncscan)
271 elog(LOG,
272 "SYNC_SCAN: start \"%s\" (size %u) at %u",
274#endif
275
276 return startloc;
277}
278
279/*
280 * ss_report_location --- update the current scan location
281 *
282 * Writes an entry into the shared Sync Scan state of the form
283 * (relfilelocator, blocknumber), overwriting any existing entry for the
284 * same relfilelocator.
285 */
286void
288{
289#ifdef TRACE_SYNCSCAN
290 if (trace_syncscan)
291 {
292 if ((location % 1024) == 0)
293 elog(LOG,
294 "SYNC_SCAN: scanning \"%s\" at %u",
295 RelationGetRelationName(rel), location);
296 }
297#endif
298
299 /*
300 * To reduce lock contention, only report scan progress every N pages. For
301 * the same reason, don't block if the lock isn't immediately available.
302 * Missing a few updates isn't critical, it just means that a new scan
303 * that wants to join the pack will start a little bit behind the head of
304 * the scan. Hopefully the pages are still in OS cache and the scan
305 * catches up quickly.
306 */
307 if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
308 {
310 {
311 (void) ss_search(rel->rd_locator, location, true);
313 }
314#ifdef TRACE_SYNCSCAN
315 else if (trace_syncscan)
316 elog(LOG,
317 "SYNC_SCAN: missed update for \"%s\" at %u",
318 RelationGetRelationName(rel), location);
319#endif
320 }
321}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:558
Datum arg
Definition elog.c:1322
#define LOG
Definition elog.h:32
#define elog(elevel,...)
Definition elog.h:228
int i
Definition isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1321
@ LW_EXCLUSIVE
Definition lwlock.h:104
#define InvalidOid
static int fb(int x)
#define RelationGetRelationName(relation)
Definition rel.h:550
#define RelFileLocatorEquals(locator1, locator2)
#define InvalidRelFileNumber
Definition relpath.h:26
#define ShmemRequestStruct(...)
Definition shmem.h:176
RelFileNumber relNumber
RelFileLocator rd_locator
Definition rel.h:57
ShmemRequestCallback request_fn
Definition shmem.h:133
ss_scan_location_t location
Definition syncscan.c:102
struct ss_lru_item_t * next
Definition syncscan.c:101
struct ss_lru_item_t * prev
Definition syncscan.c:100
RelFileLocator relfilelocator
Definition syncscan.c:94
BlockNumber location
Definition syncscan.c:95
ss_lru_item_t * head
Definition syncscan.c:107
ss_lru_item_t items[FLEXIBLE_ARRAY_MEMBER]
Definition syncscan.c:109
ss_lru_item_t * tail
Definition syncscan.c:108
void ss_report_location(Relation rel, BlockNumber location)
Definition syncscan.c:287
static void SyncScanShmemInit(void *arg)
Definition syncscan.c:147
static ss_scan_locations_t * scan_locations
Definition syncscan.c:124
static void SyncScanShmemRequest(void *arg)
Definition syncscan.c:135
BlockNumber ss_get_location(Relation rel, BlockNumber relnblocks)
Definition syncscan.c:252
const ShmemCallbacks SyncScanShmemCallbacks
Definition syncscan.c:118
static BlockNumber ss_search(RelFileLocator relfilelocator, BlockNumber location, bool set)
Definition syncscan.c:189
#define SYNC_SCAN_NELEM
Definition syncscan.c:72
#define SizeOfScanLocations(N)
Definition syncscan.c:112
#define SYNC_SCAN_REPORT_INTERVAL
Definition syncscan.c:84
const char * name