PostgreSQL Source Code  git master
queryjumblefuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * queryjumblefuncs.c
4  * Query normalization and fingerprinting.
5  *
6  * Normalization is a process whereby similar queries, typically differing only
7  * in their constants (though the exact rules are somewhat more subtle than
8  * that) are recognized as equivalent, and are tracked as a single entry. This
9  * is particularly useful for non-prepared queries.
10  *
11  * Normalization is implemented by fingerprinting queries, selectively
12  * serializing those fields of each query tree's nodes that are judged to be
13  * essential to the query. This is referred to as a query jumble. This is
14  * distinct from a regular serialization in that various extraneous
15  * information is ignored as irrelevant or not essential to the query, such
16  * as the collations of Vars and, most notably, the values of constants.
17  *
18  * This jumble is acquired at the end of parse analysis of each query, and
19  * a 64-bit hash of it is stored into the query's Query.queryId field.
20  * The server then copies this value around, making it available in plan
21  * tree(s) generated from the query. The executor can then use this value
22  * to blame query costs on the proper queryId.
23  *
24  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/nodes/queryjumblefuncs.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include "common/hashfn.h"
36 #include "miscadmin.h"
37 #include "nodes/queryjumble.h"
38 #include "parser/scansup.h"
39 
40 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 
42 /* GUC parameters */
44 
45 /*
46  * True when compute_query_id is ON or AUTO, and a module requests them.
47  *
48  * Note that IsQueryIdEnabled() should be used instead of checking
49  * query_id_enabled or compute_query_id directly when we want to know
50  * whether query identifiers are computed in the core or not.
51  */
52 bool query_id_enabled = false;
53 
54 static void AppendJumble(JumbleState *jstate,
55  const unsigned char *item, Size size);
56 static void RecordConstLocation(JumbleState *jstate, int location);
57 static void _jumbleNode(JumbleState *jstate, Node *node);
58 static void _jumbleA_Const(JumbleState *jstate, Node *node);
59 static void _jumbleList(JumbleState *jstate, Node *node);
60 
61 /*
62  * Given a possibly multi-statement source string, confine our attention to the
63  * relevant part of the string.
64  */
65 const char *
66 CleanQuerytext(const char *query, int *location, int *len)
67 {
68  int query_location = *location;
69  int query_len = *len;
70 
71  /* First apply starting offset, unless it's -1 (unknown). */
72  if (query_location >= 0)
73  {
74  Assert(query_location <= strlen(query));
75  query += query_location;
76  /* Length of 0 (or -1) means "rest of string" */
77  if (query_len <= 0)
78  query_len = strlen(query);
79  else
80  Assert(query_len <= strlen(query));
81  }
82  else
83  {
84  /* If query location is unknown, distrust query_len as well */
85  query_location = 0;
86  query_len = strlen(query);
87  }
88 
89  /*
90  * Discard leading and trailing whitespace, too. Use scanner_isspace()
91  * not libc's isspace(), because we want to match the lexer's behavior.
92  */
93  while (query_len > 0 && scanner_isspace(query[0]))
94  query++, query_location++, query_len--;
95  while (query_len > 0 && scanner_isspace(query[query_len - 1]))
96  query_len--;
97 
98  *location = query_location;
99  *len = query_len;
100 
101  return query;
102 }
103 
104 JumbleState *
106 {
107  JumbleState *jstate = NULL;
108 
110 
111  jstate = (JumbleState *) palloc(sizeof(JumbleState));
112 
113  /* Set up workspace for query jumbling */
114  jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
115  jstate->jumble_len = 0;
116  jstate->clocations_buf_size = 32;
117  jstate->clocations = (LocationLen *)
118  palloc(jstate->clocations_buf_size * sizeof(LocationLen));
119  jstate->clocations_count = 0;
120  jstate->highest_extern_param_id = 0;
121 
122  /* Compute query ID and mark the Query node with it */
123  _jumbleNode(jstate, (Node *) query);
124  query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
125  jstate->jumble_len,
126  0));
127 
128  /*
129  * If we are unlucky enough to get a hash of zero, use 1 instead for
130  * normal statements and 2 for utility queries.
131  */
132  if (query->queryId == UINT64CONST(0))
133  {
134  if (query->utilityStmt)
135  query->queryId = UINT64CONST(2);
136  else
137  query->queryId = UINT64CONST(1);
138  }
139 
140  return jstate;
141 }
142 
143 /*
144  * Enables query identifier computation.
145  *
146  * Third-party plugins can use this function to inform core that they require
147  * a query identifier to be computed.
148  */
149 void
151 {
153  query_id_enabled = true;
154 }
155 
156 /*
157  * AppendJumble: Append a value that is substantive in a given query to
158  * the current jumble.
159  */
160 static void
161 AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
162 {
163  unsigned char *jumble = jstate->jumble;
164  Size jumble_len = jstate->jumble_len;
165 
166  /*
167  * Whenever the jumble buffer is full, we hash the current contents and
168  * reset the buffer to contain just that hash value, thus relying on the
169  * hash to summarize everything so far.
170  */
171  while (size > 0)
172  {
173  Size part_size;
174 
175  if (jumble_len >= JUMBLE_SIZE)
176  {
177  uint64 start_hash;
178 
179  start_hash = DatumGetUInt64(hash_any_extended(jumble,
180  JUMBLE_SIZE, 0));
181  memcpy(jumble, &start_hash, sizeof(start_hash));
182  jumble_len = sizeof(start_hash);
183  }
184  part_size = Min(size, JUMBLE_SIZE - jumble_len);
185  memcpy(jumble + jumble_len, item, part_size);
186  jumble_len += part_size;
187  item += part_size;
188  size -= part_size;
189  }
190  jstate->jumble_len = jumble_len;
191 }
192 
193 /*
194  * Record location of constant within query string of query tree
195  * that is currently being walked.
196  */
197 static void
198 RecordConstLocation(JumbleState *jstate, int location)
199 {
200  /* -1 indicates unknown or undefined location */
201  if (location >= 0)
202  {
203  /* enlarge array if needed */
204  if (jstate->clocations_count >= jstate->clocations_buf_size)
205  {
206  jstate->clocations_buf_size *= 2;
207  jstate->clocations = (LocationLen *)
208  repalloc(jstate->clocations,
209  jstate->clocations_buf_size *
210  sizeof(LocationLen));
211  }
212  jstate->clocations[jstate->clocations_count].location = location;
213  /* initialize lengths to -1 to simplify third-party module usage */
214  jstate->clocations[jstate->clocations_count].length = -1;
215  jstate->clocations_count++;
216  }
217 }
218 
219 #define JUMBLE_NODE(item) \
220  _jumbleNode(jstate, (Node *) expr->item)
221 #define JUMBLE_LOCATION(location) \
222  RecordConstLocation(jstate, expr->location)
223 #define JUMBLE_FIELD(item) \
224  AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
225 #define JUMBLE_FIELD_SINGLE(item) \
226  AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
227 #define JUMBLE_STRING(str) \
228 do { \
229  if (expr->str) \
230  AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
231 } while(0)
232 
233 #include "queryjumblefuncs.funcs.c"
234 
235 static void
236 _jumbleNode(JumbleState *jstate, Node *node)
237 {
238  Node *expr = node;
239 
240  if (expr == NULL)
241  return;
242 
243  /* Guard against stack overflow due to overly complex expressions */
245 
246  /*
247  * We always emit the node's NodeTag, then any additional fields that are
248  * considered significant, and then we recurse to any child nodes.
249  */
251 
252  switch (nodeTag(expr))
253  {
254 #include "queryjumblefuncs.switch.c"
255 
256  case T_List:
257  case T_IntList:
258  case T_OidList:
259  case T_XidList:
260  _jumbleList(jstate, expr);
261  break;
262 
263  default:
264  /* Only a warning, since we can stumble along anyway */
265  elog(WARNING, "unrecognized node type: %d",
266  (int) nodeTag(expr));
267  break;
268  }
269 
270  /* Special cases to handle outside the automated code */
271  switch (nodeTag(expr))
272  {
273  case T_Param:
274  {
275  Param *p = (Param *) node;
276 
277  /*
278  * Update the highest Param id seen, in order to start
279  * normalization correctly.
280  */
281  if (p->paramkind == PARAM_EXTERN &&
282  p->paramid > jstate->highest_extern_param_id)
283  jstate->highest_extern_param_id = p->paramid;
284  }
285  break;
286  default:
287  break;
288  }
289 }
290 
291 static void
292 _jumbleList(JumbleState *jstate, Node *node)
293 {
294  List *expr = (List *) node;
295  ListCell *l;
296 
297  switch (expr->type)
298  {
299  case T_List:
300  foreach(l, expr)
301  _jumbleNode(jstate, lfirst(l));
302  break;
303  case T_IntList:
304  foreach(l, expr)
306  break;
307  case T_OidList:
308  foreach(l, expr)
310  break;
311  case T_XidList:
312  foreach(l, expr)
314  break;
315  default:
316  elog(ERROR, "unrecognized list node type: %d",
317  (int) expr->type);
318  return;
319  }
320 }
321 
322 static void
324 {
325  A_Const *expr = (A_Const *) node;
326 
327  JUMBLE_FIELD(isnull);
328  if (!expr->isnull)
329  {
330  JUMBLE_FIELD(val.node.type);
331  switch (nodeTag(&expr->val))
332  {
333  case T_Integer:
334  JUMBLE_FIELD(val.ival.ival);
335  break;
336  case T_Float:
337  JUMBLE_STRING(val.fval.fval);
338  break;
339  case T_Boolean:
340  JUMBLE_FIELD(val.boolval.boolval);
341  break;
342  case T_String:
343  JUMBLE_STRING(val.sval.sval);
344  break;
345  case T_BitString:
346  JUMBLE_STRING(val.bsval.bsval);
347  break;
348  default:
349  elog(ERROR, "unrecognized node type: %d",
350  (int) nodeTag(&expr->val));
351  break;
352  }
353  }
354 }
#define Min(x, y)
Definition: c.h:1004
#define Assert(condition)
Definition: c.h:858
size_t Size
Definition: c.h:605
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
long val
Definition: informix.c:670
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define nodeTag(nodeptr)
Definition: nodes.h:133
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_int(lc)
Definition: pg_list.h:173
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define lfirst_xid(lc)
Definition: pg_list.h:175
void check_stack_depth(void)
Definition: postgres.c:3530
static uint64 DatumGetUInt64(Datum X)
Definition: postgres.h:419
@ PARAM_EXTERN
Definition: primnodes.h:367
@ COMPUTE_QUERY_ID_AUTO
Definition: queryjumble.h:58
@ COMPUTE_QUERY_ID_OFF
Definition: queryjumble.h:56
static bool IsQueryIdEnabled(void)
Definition: queryjumble.h:77
JumbleState * JumbleQuery(Query *query)
bool query_id_enabled
static void _jumbleNode(JumbleState *jstate, Node *node)
static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
#define JUMBLE_SIZE
int compute_query_id
static void RecordConstLocation(JumbleState *jstate, int location)
static void _jumbleList(JumbleState *jstate, Node *node)
#define JUMBLE_STRING(str)
const char * CleanQuerytext(const char *query, int *location, int *len)
#define JUMBLE_FIELD_SINGLE(item)
static void _jumbleA_Const(JumbleState *jstate, Node *node)
void EnableQueryId(void)
#define JUMBLE_FIELD(item)
bool scanner_isspace(char ch)
Definition: scansup.c:117
static pg_noinline void Size size
Definition: slab.c:607
bool isnull
Definition: parsenodes.h:363
union ValUnion val
Definition: parsenodes.h:362
unsigned char * jumble
Definition: queryjumble.h:35
int clocations_buf_size
Definition: queryjumble.h:44
Size jumble_len
Definition: queryjumble.h:38
int highest_extern_param_id
Definition: queryjumble.h:50
LocationLen * clocations
Definition: queryjumble.h:41
int clocations_count
Definition: queryjumble.h:47
Definition: pg_list.h:54
NodeTag type
Definition: pg_list.h:55
Definition: nodes.h:129
int paramid
Definition: primnodes.h:377
ParamKind paramkind
Definition: primnodes.h:376
Node * utilityStmt
Definition: parsenodes.h:136
const char * type