PostgreSQL Source Code  git master
queryjumblefuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * queryjumblefuncs.c
4  * Query normalization and fingerprinting.
5  *
6  * Normalization is a process whereby similar queries, typically differing only
7  * in their constants (though the exact rules are somewhat more subtle than
8  * that) are recognized as equivalent, and are tracked as a single entry. This
9  * is particularly useful for non-prepared queries.
10  *
11  * Normalization is implemented by fingerprinting queries, selectively
12  * serializing those fields of each query tree's nodes that are judged to be
13  * essential to the query. This is referred to as a query jumble. This is
14  * distinct from a regular serialization in that various extraneous
15  * information is ignored as irrelevant or not essential to the query, such
16  * as the collations of Vars and, most notably, the values of constants.
17  *
18  * This jumble is acquired at the end of parse analysis of each query, and
19  * a 64-bit hash of it is stored into the query's Query.queryId field.
20  * The server then copies this value around, making it available in plan
21  * tree(s) generated from the query. The executor can then use this value
22  * to blame query costs on the proper queryId.
23  *
24  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/nodes/queryjumblefuncs.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include "common/hashfn.h"
36 #include "miscadmin.h"
37 #include "nodes/queryjumble.h"
38 #include "parser/scansup.h"
39 
40 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 
42 /* GUC parameters */
44 
45 /*
46  * True when compute_query_id is ON or AUTO, and a module requests them.
47  *
48  * Note that IsQueryIdEnabled() should be used instead of checking
49  * query_id_enabled or compute_query_id directly when we want to know
50  * whether query identifiers are computed in the core or not.
51  */
52 bool query_id_enabled = false;
53 
54 static void AppendJumble(JumbleState *jstate,
55  const unsigned char *item, Size size);
56 static void RecordConstLocation(JumbleState *jstate, int location);
57 static void _jumbleNode(JumbleState *jstate, Node *node);
58 static void _jumbleA_Const(JumbleState *jstate, Node *node);
59 static void _jumbleList(JumbleState *jstate, Node *node);
60 static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
61 
62 /*
63  * Given a possibly multi-statement source string, confine our attention to the
64  * relevant part of the string.
65  */
66 const char *
67 CleanQuerytext(const char *query, int *location, int *len)
68 {
69  int query_location = *location;
70  int query_len = *len;
71 
72  /* First apply starting offset, unless it's -1 (unknown). */
73  if (query_location >= 0)
74  {
75  Assert(query_location <= strlen(query));
76  query += query_location;
77  /* Length of 0 (or -1) means "rest of string" */
78  if (query_len <= 0)
79  query_len = strlen(query);
80  else
81  Assert(query_len <= strlen(query));
82  }
83  else
84  {
85  /* If query location is unknown, distrust query_len as well */
86  query_location = 0;
87  query_len = strlen(query);
88  }
89 
90  /*
91  * Discard leading and trailing whitespace, too. Use scanner_isspace()
92  * not libc's isspace(), because we want to match the lexer's behavior.
93  */
94  while (query_len > 0 && scanner_isspace(query[0]))
95  query++, query_location++, query_len--;
96  while (query_len > 0 && scanner_isspace(query[query_len - 1]))
97  query_len--;
98 
99  *location = query_location;
100  *len = query_len;
101 
102  return query;
103 }
104 
105 JumbleState *
107 {
108  JumbleState *jstate = NULL;
109 
111 
112  jstate = (JumbleState *) palloc(sizeof(JumbleState));
113 
114  /* Set up workspace for query jumbling */
115  jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
116  jstate->jumble_len = 0;
117  jstate->clocations_buf_size = 32;
118  jstate->clocations = (LocationLen *)
119  palloc(jstate->clocations_buf_size * sizeof(LocationLen));
120  jstate->clocations_count = 0;
121  jstate->highest_extern_param_id = 0;
122 
123  /* Compute query ID and mark the Query node with it */
124  _jumbleNode(jstate, (Node *) query);
125  query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
126  jstate->jumble_len,
127  0));
128 
129  /*
130  * If we are unlucky enough to get a hash of zero, use 1 instead for
131  * normal statements and 2 for utility queries.
132  */
133  if (query->queryId == UINT64CONST(0))
134  {
135  if (query->utilityStmt)
136  query->queryId = UINT64CONST(2);
137  else
138  query->queryId = UINT64CONST(1);
139  }
140 
141  return jstate;
142 }
143 
144 /*
145  * Enables query identifier computation.
146  *
147  * Third-party plugins can use this function to inform core that they require
148  * a query identifier to be computed.
149  */
150 void
152 {
154  query_id_enabled = true;
155 }
156 
157 /*
158  * AppendJumble: Append a value that is substantive in a given query to
159  * the current jumble.
160  */
161 static void
162 AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
163 {
164  unsigned char *jumble = jstate->jumble;
165  Size jumble_len = jstate->jumble_len;
166 
167  /*
168  * Whenever the jumble buffer is full, we hash the current contents and
169  * reset the buffer to contain just that hash value, thus relying on the
170  * hash to summarize everything so far.
171  */
172  while (size > 0)
173  {
174  Size part_size;
175 
176  if (jumble_len >= JUMBLE_SIZE)
177  {
178  uint64 start_hash;
179 
180  start_hash = DatumGetUInt64(hash_any_extended(jumble,
181  JUMBLE_SIZE, 0));
182  memcpy(jumble, &start_hash, sizeof(start_hash));
183  jumble_len = sizeof(start_hash);
184  }
185  part_size = Min(size, JUMBLE_SIZE - jumble_len);
186  memcpy(jumble + jumble_len, item, part_size);
187  jumble_len += part_size;
188  item += part_size;
189  size -= part_size;
190  }
191  jstate->jumble_len = jumble_len;
192 }
193 
194 /*
195  * Record location of constant within query string of query tree
196  * that is currently being walked.
197  */
198 static void
199 RecordConstLocation(JumbleState *jstate, int location)
200 {
201  /* -1 indicates unknown or undefined location */
202  if (location >= 0)
203  {
204  /* enlarge array if needed */
205  if (jstate->clocations_count >= jstate->clocations_buf_size)
206  {
207  jstate->clocations_buf_size *= 2;
208  jstate->clocations = (LocationLen *)
209  repalloc(jstate->clocations,
210  jstate->clocations_buf_size *
211  sizeof(LocationLen));
212  }
213  jstate->clocations[jstate->clocations_count].location = location;
214  /* initialize lengths to -1 to simplify third-party module usage */
215  jstate->clocations[jstate->clocations_count].length = -1;
216  jstate->clocations_count++;
217  }
218 }
219 
220 #define JUMBLE_NODE(item) \
221  _jumbleNode(jstate, (Node *) expr->item)
222 #define JUMBLE_LOCATION(location) \
223  RecordConstLocation(jstate, expr->location)
224 #define JUMBLE_FIELD(item) \
225  AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
226 #define JUMBLE_FIELD_SINGLE(item) \
227  AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
228 #define JUMBLE_STRING(str) \
229 do { \
230  if (expr->str) \
231  AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
232 } while(0)
233 
234 #include "queryjumblefuncs.funcs.c"
235 
236 static void
237 _jumbleNode(JumbleState *jstate, Node *node)
238 {
239  Node *expr = node;
240 
241  if (expr == NULL)
242  return;
243 
244  /* Guard against stack overflow due to overly complex expressions */
246 
247  /*
248  * We always emit the node's NodeTag, then any additional fields that are
249  * considered significant, and then we recurse to any child nodes.
250  */
252 
253  switch (nodeTag(expr))
254  {
255 #include "queryjumblefuncs.switch.c"
256 
257  case T_List:
258  case T_IntList:
259  case T_OidList:
260  case T_XidList:
261  _jumbleList(jstate, expr);
262  break;
263 
264  default:
265  /* Only a warning, since we can stumble along anyway */
266  elog(WARNING, "unrecognized node type: %d",
267  (int) nodeTag(expr));
268  break;
269  }
270 
271  /* Special cases to handle outside the automated code */
272  switch (nodeTag(expr))
273  {
274  case T_Param:
275  {
276  Param *p = (Param *) node;
277 
278  /*
279  * Update the highest Param id seen, in order to start
280  * normalization correctly.
281  */
282  if (p->paramkind == PARAM_EXTERN &&
283  p->paramid > jstate->highest_extern_param_id)
284  jstate->highest_extern_param_id = p->paramid;
285  }
286  break;
287  default:
288  break;
289  }
290 }
291 
292 static void
293 _jumbleList(JumbleState *jstate, Node *node)
294 {
295  List *expr = (List *) node;
296  ListCell *l;
297 
298  switch (expr->type)
299  {
300  case T_List:
301  foreach(l, expr)
302  _jumbleNode(jstate, lfirst(l));
303  break;
304  case T_IntList:
305  foreach(l, expr)
307  break;
308  case T_OidList:
309  foreach(l, expr)
311  break;
312  case T_XidList:
313  foreach(l, expr)
315  break;
316  default:
317  elog(ERROR, "unrecognized list node type: %d",
318  (int) expr->type);
319  return;
320  }
321 }
322 
323 static void
325 {
326  A_Const *expr = (A_Const *) node;
327 
328  JUMBLE_FIELD(isnull);
329  if (!expr->isnull)
330  {
331  JUMBLE_FIELD(val.node.type);
332  switch (nodeTag(&expr->val))
333  {
334  case T_Integer:
335  JUMBLE_FIELD(val.ival.ival);
336  break;
337  case T_Float:
338  JUMBLE_STRING(val.fval.fval);
339  break;
340  case T_Boolean:
341  JUMBLE_FIELD(val.boolval.boolval);
342  break;
343  case T_String:
344  JUMBLE_STRING(val.sval.sval);
345  break;
346  case T_BitString:
347  JUMBLE_STRING(val.bsval.bsval);
348  break;
349  default:
350  elog(ERROR, "unrecognized node type: %d",
351  (int) nodeTag(&expr->val));
352  break;
353  }
354  }
355 }
356 
357 static void
359 {
360  RangeTblEntry *expr = (RangeTblEntry *) node;
361 
362  JUMBLE_FIELD(rtekind);
363  switch (expr->rtekind)
364  {
365  case RTE_RELATION:
366  JUMBLE_FIELD(relid);
367  JUMBLE_NODE(tablesample);
368  JUMBLE_FIELD(inh);
369  break;
370  case RTE_SUBQUERY:
371  JUMBLE_NODE(subquery);
372  break;
373  case RTE_JOIN:
374  JUMBLE_FIELD(jointype);
375  break;
376  case RTE_FUNCTION:
378  break;
379  case RTE_TABLEFUNC:
380  JUMBLE_NODE(tablefunc);
381  break;
382  case RTE_VALUES:
383  JUMBLE_NODE(values_lists);
384  break;
385  case RTE_CTE:
386 
387  /*
388  * Depending on the CTE name here isn't ideal, but it's the only
389  * info we have to identify the referenced WITH item.
390  */
391  JUMBLE_STRING(ctename);
392  JUMBLE_FIELD(ctelevelsup);
393  break;
394  case RTE_NAMEDTUPLESTORE:
395  JUMBLE_STRING(enrname);
396  break;
397  case RTE_RESULT:
398  break;
399  default:
400  elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
401  break;
402  }
403 }
#define Min(x, y)
Definition: c.h:993
size_t Size
Definition: c.h:594
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
long val
Definition: informix.c:664
Assert(fmt[strlen(fmt) - 1] !='\n')
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1451
void * palloc(Size size)
Definition: mcxt.c:1201
#define nodeTag(nodeptr)
Definition: nodes.h:133
@ RTE_JOIN
Definition: parsenodes.h:1008
@ RTE_CTE
Definition: parsenodes.h:1012
@ RTE_NAMEDTUPLESTORE
Definition: parsenodes.h:1013
@ RTE_VALUES
Definition: parsenodes.h:1011
@ RTE_SUBQUERY
Definition: parsenodes.h:1007
@ RTE_RESULT
Definition: parsenodes.h:1014
@ RTE_FUNCTION
Definition: parsenodes.h:1009
@ RTE_TABLEFUNC
Definition: parsenodes.h:1010
@ RTE_RELATION
Definition: parsenodes.h:1006
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_int(lc)
Definition: pg_list.h:173
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define lfirst_xid(lc)
Definition: pg_list.h:175
void check_stack_depth(void)
Definition: postgres.c:3523
static uint64 DatumGetUInt64(Datum X)
Definition: postgres.h:419
@ PARAM_EXTERN
Definition: primnodes.h:353
@ COMPUTE_QUERY_ID_AUTO
Definition: queryjumble.h:58
@ COMPUTE_QUERY_ID_OFF
Definition: queryjumble.h:56
static bool IsQueryIdEnabled(void)
Definition: queryjumble.h:77
JumbleState * JumbleQuery(Query *query)
#define JUMBLE_NODE(item)
bool query_id_enabled
static void _jumbleNode(JumbleState *jstate, Node *node)
static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
#define JUMBLE_SIZE
int compute_query_id
static void RecordConstLocation(JumbleState *jstate, int location)
static void _jumbleList(JumbleState *jstate, Node *node)
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node)
#define JUMBLE_STRING(str)
const char * CleanQuerytext(const char *query, int *location, int *len)
#define JUMBLE_FIELD_SINGLE(item)
static void _jumbleA_Const(JumbleState *jstate, Node *node)
void EnableQueryId(void)
#define JUMBLE_FIELD(item)
static const struct fns functions
Definition: regcomp.c:356
bool scanner_isspace(char ch)
Definition: scansup.c:117
bool isnull
Definition: parsenodes.h:353
union ValUnion val
Definition: parsenodes.h:352
unsigned char * jumble
Definition: queryjumble.h:35
int clocations_buf_size
Definition: queryjumble.h:44
Size jumble_len
Definition: queryjumble.h:38
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: pg_list.h:54
NodeTag type
Definition: pg_list.h:55
Definition: nodes.h:129
int paramid
Definition: primnodes.h:363
ParamKind paramkind
Definition: primnodes.h:362
Node * utilityStmt
Definition: parsenodes.h:135
RTEKind rtekind
Definition: parsenodes.h:1025
const char * type