PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_cpu_x86.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_cpu_x86.c
4 * Runtime CPU feature detection for x86
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/port/pg_cpu_x86.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#ifndef FRONTEND
17#include "postgres.h"
18#else
19#include "postgres_fe.h"
20#endif
21
22#if defined(USE_SSE2) || defined(__i386__)
23
24#ifdef _MSC_VER
25#include <intrin.h>
26#else
27#include <cpuid.h>
28#endif
29
30#ifdef HAVE_XSAVE_INTRINSICS
31#include <immintrin.h>
32#endif
33
34#include "port/pg_cpu.h"
35
36/*
37 * XSAVE state component bits that we need
38 *
39 * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
40 * Chapter "MANAGING STATE USING THE XSAVE FEATURE SET"
41 */
42#define XMM (1<<1)
43#define YMM (1<<2)
44#define OPMASK (1<<5)
45#define ZMM0_15 (1<<6)
46#define ZMM16_31 (1<<7)
47
48
49/* array indexed by enum X86FeatureId */
51
52static bool
54{
55 return (value & mask) == mask;
56}
57
58/* Named indexes for CPUID register array */
59#define EAX 0
60#define EBX 1
61#define ECX 2
62#define EDX 3
63
64/*
65 * Request CPUID information for the specified leaf.
66 */
67static inline void
68pg_cpuid(int leaf, unsigned int *reg)
69{
70 memset(reg, 0, 4 * sizeof(unsigned int));
71#if defined(HAVE__GET_CPUID)
72 __get_cpuid(leaf, &reg[EAX], &reg[EBX], &reg[ECX], &reg[EDX]);
73#elif defined(HAVE__CPUID)
74 __cpuid((int *) reg, leaf);
75#endif
76}
77
78/*
79 * Request CPUID information for the specified leaf and subleaf.
80 *
81 * Returns true if the CPUID leaf/subleaf is supported, false otherwise.
82 */
83static inline bool
84pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
85{
86 memset(reg, 0, 4 * sizeof(unsigned int));
87#if defined(HAVE__GET_CPUID_COUNT)
88 return __get_cpuid_count(leaf, subleaf, &reg[EAX], &reg[EBX], &reg[ECX], &reg[EDX]) == 1;
89#elif defined(HAVE__CPUIDEX)
90 __cpuidex((int *) reg, leaf, subleaf);
91 return true;
92#else
93 return false;
94#endif
95}
96
97/*
98 * Parse the CPU ID info for runtime checks.
99 */
100#ifdef HAVE_XSAVE_INTRINSICS
101pg_attribute_target("xsave")
102#endif
103void
105{
106 unsigned int reg[4] = {0};
107 bool have_osxsave;
108
109 pg_cpuid(0x01, reg);
110
111 X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
112 X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
113 X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
114 have_osxsave = reg[ECX] >> 27 & 1;
115
116 pg_cpuid_subleaf(0x07, 0, reg);
117
118 X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
119
120 /* leaf 7 features that depend on OSXSAVE */
121 if (have_osxsave)
122 {
123 uint32 xcr0_val = 0;
124
125#ifdef HAVE_XSAVE_INTRINSICS
126 /* get value of Extended Control Register */
127 xcr0_val = _xgetbv(0);
128#endif
129
130 /* Are YMM registers enabled? */
132 X86Features[PG_AVX2] = reg[EBX] >> 5 & 1;
133
134 /* Are ZMM registers enabled? */
137 {
138 X86Features[PG_AVX512_BW] = reg[EBX] >> 30 & 1;
139 X86Features[PG_AVX512_VL] = reg[EBX] >> 31 & 1;
140
141 X86Features[PG_AVX512_VPCLMULQDQ] = reg[ECX] >> 10 & 1;
142 X86Features[PG_AVX512_VPOPCNTDQ] = reg[ECX] >> 14 & 1;
143 }
144 }
145
146 /* Check for other TSC related flags */
147 pg_cpuid(0x80000001, reg);
148 X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
149
150 pg_cpuid(0x80000007, reg);
151 X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
152
153 X86Features[INIT_PG_X86] = true;
154}
155
156/* TSC (Time-stamp Counter) handling code */
157
159
160/*
161 * Determine the TSC frequency of the CPU through CPUID, where supported.
162 *
163 * Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
164 * 0 indicates the frequency information was not accessible via CPUID.
165 *
166 * The optional source argument may contain a pre-allocated string of capacity
167 * source_size that will be concatenated with info on the TSC frequency source.
168 */
169uint32
170x86_tsc_frequency_khz(char *source, size_t source_size)
171{
172 unsigned int reg[4] = {0};
173
174 /*
175 * If we're inside a virtual machine, try to fetch the TSC frequency from
176 * the hypervisor, using a hypervisor specific method.
177 *
178 * Note it is not safe to utilize the regular 0x15/0x16 CPUID registers
179 * (i.e. the logic below) in virtual machines, as they have been observed
180 * to be wildly incorrect when virtualized.
181 */
183 {
185
186 if (source)
187 {
188 strlcat(source, ", hypervisor", source_size);
189 if (freq > 0)
190 strlcat(source, ", cpuid 0x40000010", source_size);
191 }
192 return freq;
193 }
194
195 /*
196 * On modern Intel CPUs, the TSC is implemented by invariant timekeeping
197 * hardware, also called "Always Running Timer", or ART. The ART stays
198 * consistent even if the CPU changes frequency due to changing power
199 * levels.
200 *
201 * As documented in "Determining the Processor Base Frequency" in the
202 * "IntelĀ® 64 and IA-32 Architectures Software Developer's Manual",
203 * February 2026 Edition, we can get the TSC frequency as follows:
204 *
205 * Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
206 * CPUID.15H:EAX[31:0]
207 *
208 * With CPUID.15H:ECX representing the nominal core crystal clock
209 * frequency, and EAX/EBX representing values used to translate the TSC
210 * value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
211 * that manual.
212 *
213 * Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
214 * such we fall back to alternate approaches.
215 */
216 pg_cpuid(0x15, reg);
217 if (reg[ECX] > 0)
218 {
219 /*
220 * EBX not being set indicates invariant TSC is not available. Require
221 * EAX being non-zero too, to avoid a theoretical divide by zero.
222 */
223 if (reg[EAX] == 0 || reg[EBX] == 0)
224 return 0;
225
226 if (source)
227 strlcat(source, ", cpuid 0x15", source_size);
228
229 return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
230 }
231
232 /*
233 * When CPUID.15H is not available/incomplete, we can instead try to get
234 * the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
235 * Frequency Information Leaf".
236 */
237 pg_cpuid(0x16, reg);
238 if (reg[EAX] > 0)
239 {
240 if (source)
241 strlcat(source, ", cpuid 0x16", source_size);
242
243 return reg[EAX] * 1000;
244 }
245
246 return 0;
247}
248
249/*
250 * Support for reading TSC frequency for hypervisors passing it to a guest VM.
251 *
252 * Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
253 * available at the vendor-specific 0x40000010 leaf in the EAX register.
254 *
255 * For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
256 * need to access a model-specific register (MSR) to get the frequency. MSRs are
257 * separate from CPUID and typically not available for unprivileged processes,
258 * so we can't get the frequency this way.
259 */
260#define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177) /* VMwareVMware */
261#define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d) /* KVMKVMKVM */
262static uint32
264{
265#if defined(HAVE__CPUIDEX)
266 unsigned int reg[4] = {0};
267
268 /*
269 * The hypervisor is determined using the 0x40000000 Hypervisor
270 * information leaf, which requires use of __cpuidex to set ECX to 0 to
271 * access it.
272 *
273 * The similar __get_cpuid_count function does not work as expected since
274 * it contains a check for __get_cpuid_max, which has been observed to be
275 * lower than the special Hypervisor leaf, despite it being available.
276 */
277 __cpuidex((int *) reg, 0x40000000, 0);
278
279 if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
280 {
281 __cpuidex((int *) reg, 0x40000010, 0);
282 if (reg[EAX] > 0)
283 return reg[EAX];
284 }
285#endif /* HAVE__CPUIDEX */
286
287 return 0;
288}
289
290#else /* defined(USE_SSE2) || defined(__i386__) */
291
292/* prevent linker complaints about empty module */
295
296#endif /* ! (USE_SSE2 || __i386__) */
uint32_t uint32
Definition c.h:624
#define pg_attribute_target(...)
Definition c.h:238
static struct @177 value
int pg_cpu_x86_dummy_variable
Definition pg_cpu_x86.c:294
static rewind_source * source
Definition pg_rewind.c:89
size_t strlcat(char *dst, const char *src, size_t siz)
Definition strlcat.c:34
static int fb(int x)