PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
queryjumblefuncs.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * queryjumblefuncs.c
4 * Query normalization and fingerprinting.
5 *
6 * Normalization is a process whereby similar queries, typically differing only
7 * in their constants (though the exact rules are somewhat more subtle than
8 * that) are recognized as equivalent, and are tracked as a single entry. This
9 * is particularly useful for non-prepared queries.
10 *
11 * Normalization is implemented by fingerprinting queries, selectively
12 * serializing those fields of each query tree's nodes that are judged to be
13 * essential to the query. This is referred to as a query jumble. This is
14 * distinct from a regular serialization in that various extraneous
15 * information is ignored as irrelevant or not essential to the query, such
16 * as the collations of Vars and, most notably, the values of constants.
17 *
18 * This jumble is acquired at the end of parse analysis of each query, and
19 * a 64-bit hash of it is stored into the query's Query.queryId field.
20 * The server then copies this value around, making it available in plan
21 * tree(s) generated from the query. The executor can then use this value
22 * to blame query costs on the proper queryId.
23 *
24 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994, Regents of the University of California
26 *
27 *
28 * IDENTIFICATION
29 * src/backend/nodes/queryjumblefuncs.c
30 *
31 *-------------------------------------------------------------------------
32 */
33#include "postgres.h"
34
35#include "common/hashfn.h"
36#include "miscadmin.h"
37#include "nodes/queryjumble.h"
38#include "parser/scansup.h"
39
40#define JUMBLE_SIZE 1024 /* query serialization buffer size */
41
42/* GUC parameters */
44
45/*
46 * True when compute_query_id is ON or AUTO, and a module requests them.
47 *
48 * Note that IsQueryIdEnabled() should be used instead of checking
49 * query_id_enabled or compute_query_id directly when we want to know
50 * whether query identifiers are computed in the core or not.
51 */
52bool query_id_enabled = false;
53
54static void AppendJumble(JumbleState *jstate,
55 const unsigned char *item, Size size);
56static void RecordConstLocation(JumbleState *jstate, int location);
57static void _jumbleNode(JumbleState *jstate, Node *node);
58static void _jumbleA_Const(JumbleState *jstate, Node *node);
59static void _jumbleList(JumbleState *jstate, Node *node);
60static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
61
62/*
63 * Given a possibly multi-statement source string, confine our attention to the
64 * relevant part of the string.
65 */
66const char *
67CleanQuerytext(const char *query, int *location, int *len)
68{
69 int query_location = *location;
70 int query_len = *len;
71
72 /* First apply starting offset, unless it's -1 (unknown). */
73 if (query_location >= 0)
74 {
75 Assert(query_location <= strlen(query));
76 query += query_location;
77 /* Length of 0 (or -1) means "rest of string" */
78 if (query_len <= 0)
79 query_len = strlen(query);
80 else
81 Assert(query_len <= strlen(query));
82 }
83 else
84 {
85 /* If query location is unknown, distrust query_len as well */
86 query_location = 0;
87 query_len = strlen(query);
88 }
89
90 /*
91 * Discard leading and trailing whitespace, too. Use scanner_isspace()
92 * not libc's isspace(), because we want to match the lexer's behavior.
93 *
94 * Note: the parser now strips leading comments and whitespace from the
95 * reported stmt_location, so this first loop will only iterate in the
96 * unusual case that the location didn't propagate to here. But the
97 * statement length will extend to the end-of-string or terminating
98 * semicolon, so the second loop often does something useful.
99 */
100 while (query_len > 0 && scanner_isspace(query[0]))
101 query++, query_location++, query_len--;
102 while (query_len > 0 && scanner_isspace(query[query_len - 1]))
103 query_len--;
104
105 *location = query_location;
106 *len = query_len;
107
108 return query;
109}
110
113{
114 JumbleState *jstate = NULL;
115
117
118 jstate = (JumbleState *) palloc(sizeof(JumbleState));
119
120 /* Set up workspace for query jumbling */
121 jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
122 jstate->jumble_len = 0;
123 jstate->clocations_buf_size = 32;
124 jstate->clocations = (LocationLen *)
125 palloc(jstate->clocations_buf_size * sizeof(LocationLen));
126 jstate->clocations_count = 0;
127 jstate->highest_extern_param_id = 0;
128
129 /* Compute query ID and mark the Query node with it */
130 _jumbleNode(jstate, (Node *) query);
131 query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
132 jstate->jumble_len,
133 0));
134
135 /*
136 * If we are unlucky enough to get a hash of zero, use 1 instead for
137 * normal statements and 2 for utility queries.
138 */
139 if (query->queryId == UINT64CONST(0))
140 {
141 if (query->utilityStmt)
142 query->queryId = UINT64CONST(2);
143 else
144 query->queryId = UINT64CONST(1);
145 }
146
147 return jstate;
148}
149
150/*
151 * Enables query identifier computation.
152 *
153 * Third-party plugins can use this function to inform core that they require
154 * a query identifier to be computed.
155 */
156void
158{
160 query_id_enabled = true;
161}
162
163/*
164 * AppendJumble: Append a value that is substantive in a given query to
165 * the current jumble.
166 */
167static void
168AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
169{
170 unsigned char *jumble = jstate->jumble;
171 Size jumble_len = jstate->jumble_len;
172
173 /*
174 * Whenever the jumble buffer is full, we hash the current contents and
175 * reset the buffer to contain just that hash value, thus relying on the
176 * hash to summarize everything so far.
177 */
178 while (size > 0)
179 {
180 Size part_size;
181
182 if (jumble_len >= JUMBLE_SIZE)
183 {
184 uint64 start_hash;
185
186 start_hash = DatumGetUInt64(hash_any_extended(jumble,
187 JUMBLE_SIZE, 0));
188 memcpy(jumble, &start_hash, sizeof(start_hash));
189 jumble_len = sizeof(start_hash);
190 }
191 part_size = Min(size, JUMBLE_SIZE - jumble_len);
192 memcpy(jumble + jumble_len, item, part_size);
193 jumble_len += part_size;
194 item += part_size;
195 size -= part_size;
196 }
197 jstate->jumble_len = jumble_len;
198}
199
200/*
201 * Record location of constant within query string of query tree
202 * that is currently being walked.
203 */
204static void
205RecordConstLocation(JumbleState *jstate, int location)
206{
207 /* -1 indicates unknown or undefined location */
208 if (location >= 0)
209 {
210 /* enlarge array if needed */
211 if (jstate->clocations_count >= jstate->clocations_buf_size)
212 {
213 jstate->clocations_buf_size *= 2;
214 jstate->clocations = (LocationLen *)
215 repalloc(jstate->clocations,
216 jstate->clocations_buf_size *
217 sizeof(LocationLen));
218 }
219 jstate->clocations[jstate->clocations_count].location = location;
220 /* initialize lengths to -1 to simplify third-party module usage */
221 jstate->clocations[jstate->clocations_count].length = -1;
222 jstate->clocations_count++;
223 }
224}
225
226#define JUMBLE_NODE(item) \
227 _jumbleNode(jstate, (Node *) expr->item)
228#define JUMBLE_LOCATION(location) \
229 RecordConstLocation(jstate, expr->location)
230#define JUMBLE_FIELD(item) \
231 AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
232#define JUMBLE_FIELD_SINGLE(item) \
233 AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
234#define JUMBLE_STRING(str) \
235do { \
236 if (expr->str) \
237 AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
238} while(0)
239
240#include "queryjumblefuncs.funcs.c"
241
242static void
244{
245 Node *expr = node;
246
247 if (expr == NULL)
248 return;
249
250 /* Guard against stack overflow due to overly complex expressions */
252
253 /*
254 * We always emit the node's NodeTag, then any additional fields that are
255 * considered significant, and then we recurse to any child nodes.
256 */
258
259 switch (nodeTag(expr))
260 {
261#include "queryjumblefuncs.switch.c"
262
263 case T_List:
264 case T_IntList:
265 case T_OidList:
266 case T_XidList:
267 _jumbleList(jstate, expr);
268 break;
269
270 default:
271 /* Only a warning, since we can stumble along anyway */
272 elog(WARNING, "unrecognized node type: %d",
273 (int) nodeTag(expr));
274 break;
275 }
276
277 /* Special cases to handle outside the automated code */
278 switch (nodeTag(expr))
279 {
280 case T_Param:
281 {
282 Param *p = (Param *) node;
283
284 /*
285 * Update the highest Param id seen, in order to start
286 * normalization correctly.
287 */
288 if (p->paramkind == PARAM_EXTERN &&
289 p->paramid > jstate->highest_extern_param_id)
290 jstate->highest_extern_param_id = p->paramid;
291 }
292 break;
293 default:
294 break;
295 }
296}
297
298static void
300{
301 List *expr = (List *) node;
302 ListCell *l;
303
304 switch (expr->type)
305 {
306 case T_List:
307 foreach(l, expr)
308 _jumbleNode(jstate, lfirst(l));
309 break;
310 case T_IntList:
311 foreach(l, expr)
313 break;
314 case T_OidList:
315 foreach(l, expr)
317 break;
318 case T_XidList:
319 foreach(l, expr)
321 break;
322 default:
323 elog(ERROR, "unrecognized list node type: %d",
324 (int) expr->type);
325 return;
326 }
327}
328
329static void
331{
332 A_Const *expr = (A_Const *) node;
333
334 JUMBLE_FIELD(isnull);
335 if (!expr->isnull)
336 {
337 JUMBLE_FIELD(val.node.type);
338 switch (nodeTag(&expr->val))
339 {
340 case T_Integer:
341 JUMBLE_FIELD(val.ival.ival);
342 break;
343 case T_Float:
344 JUMBLE_STRING(val.fval.fval);
345 break;
346 case T_Boolean:
347 JUMBLE_FIELD(val.boolval.boolval);
348 break;
349 case T_String:
350 JUMBLE_STRING(val.sval.sval);
351 break;
352 case T_BitString:
353 JUMBLE_STRING(val.bsval.bsval);
354 break;
355 default:
356 elog(ERROR, "unrecognized node type: %d",
357 (int) nodeTag(&expr->val));
358 break;
359 }
360 }
361}
362
363static void
365{
366 VariableSetStmt *expr = (VariableSetStmt *) node;
367
368 JUMBLE_FIELD(kind);
370
371 /*
372 * Account for the list of arguments in query jumbling only if told by the
373 * parser.
374 */
375 if (expr->jumble_args)
377 JUMBLE_FIELD(is_local);
378 JUMBLE_LOCATION(location);
379}
#define Min(x, y)
Definition: c.h:958
#define Assert(condition)
Definition: c.h:812
uint64_t uint64
Definition: c.h:486
#define UINT64CONST(x)
Definition: c.h:500
size_t Size
Definition: c.h:559
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
long val
Definition: informix.c:689
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define nodeTag(nodeptr)
Definition: nodes.h:133
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_int(lc)
Definition: pg_list.h:173
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define lfirst_xid(lc)
Definition: pg_list.h:175
static uint64 DatumGetUInt64(Datum X)
Definition: postgres.h:419
@ PARAM_EXTERN
Definition: primnodes.h:367
@ COMPUTE_QUERY_ID_AUTO
Definition: queryjumble.h:58
@ COMPUTE_QUERY_ID_OFF
Definition: queryjumble.h:56
static bool IsQueryIdEnabled(void)
Definition: queryjumble.h:77
#define JUMBLE_NODE(item)
JumbleState * JumbleQuery(Query *query)
bool query_id_enabled
static void _jumbleNode(JumbleState *jstate, Node *node)
static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
#define JUMBLE_SIZE
int compute_query_id
#define JUMBLE_LOCATION(location)
const char * CleanQuerytext(const char *query, int *location, int *len)
static void RecordConstLocation(JumbleState *jstate, int location)
static void _jumbleList(JumbleState *jstate, Node *node)
#define JUMBLE_STRING(str)
#define JUMBLE_FIELD_SINGLE(item)
static void _jumbleA_Const(JumbleState *jstate, Node *node)
void EnableQueryId(void)
#define JUMBLE_FIELD(item)
bool scanner_isspace(char ch)
Definition: scansup.c:117
static pg_noinline void Size size
Definition: slab.c:607
void check_stack_depth(void)
Definition: stack_depth.c:95
bool isnull
Definition: parsenodes.h:365
union ValUnion val
Definition: parsenodes.h:364
unsigned char * jumble
Definition: queryjumble.h:35
int clocations_buf_size
Definition: queryjumble.h:44
Size jumble_len
Definition: queryjumble.h:38
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: pg_list.h:54
NodeTag type
Definition: pg_list.h:55
Definition: nodes.h:129
int paramid
Definition: primnodes.h:377
ParamKind paramkind
Definition: primnodes.h:376
Node * utilityStmt
Definition: parsenodes.h:136
const char * type
const char * name