PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
arch-x86.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * arch-x86.h
4 * Atomic operations considerations specific to intel x86
5 *
6 * Note that we actually require a 486 upwards because the 386 doesn't have
7 * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
8 * anymore that's not much of a restriction luckily.
9 *
10 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * NOTES:
14 *
15 * src/include/port/atomics/arch-x86.h
16 *
17 *-------------------------------------------------------------------------
18 */
19
20/*
21 * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
22 * or stores to be reordered with other stores, but a load can be performed
23 * before a subsequent store.
24 *
25 * Technically, some x86-ish chips support uncached memory access and/or
26 * special instructions that are weakly ordered. In those cases we'd need
27 * the read and write barriers to be lfence and sfence. But since we don't
28 * do those things, a compiler barrier should be enough.
29 *
30 * "lock; addl" has worked for longer than "mfence". It's also rumored to be
31 * faster in many scenarios.
32 */
33
34#if defined(__GNUC__) || defined(__INTEL_COMPILER)
35#if defined(__i386__) || defined(__i386)
36#define pg_memory_barrier_impl() \
37 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
38#elif defined(__x86_64__)
39#define pg_memory_barrier_impl() \
40 __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
41#endif
42#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
43
44#define pg_read_barrier_impl() pg_compiler_barrier_impl()
45#define pg_write_barrier_impl() pg_compiler_barrier_impl()
46
47/*
48 * Provide implementation for atomics using inline assembly on x86 gcc. It's
49 * nice to support older gcc's and the compare/exchange implementation here is
50 * actually more efficient than the * __sync variant.
51 */
52#if defined(__GNUC__) || defined(__INTEL_COMPILER)
53
54#define PG_HAVE_ATOMIC_FLAG_SUPPORT
55typedef struct pg_atomic_flag
56{
57 volatile char value;
58} pg_atomic_flag;
59
60#define PG_HAVE_ATOMIC_U32_SUPPORT
61typedef struct pg_atomic_uint32
62{
63 volatile uint32 value;
65
66/*
67 * It's too complicated to write inline asm for 64bit types on 32bit and the
68 * 486 can't do it anyway.
69 */
70#ifdef __x86_64__
71#define PG_HAVE_ATOMIC_U64_SUPPORT
72typedef struct pg_atomic_uint64
73{
74 /* alignment guaranteed due to being on a 64bit platform */
75 volatile uint64 value;
77#endif /* __x86_64__ */
78
79#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
80
81#if !defined(PG_HAVE_SPIN_DELAY)
82/*
83 * This sequence is equivalent to the PAUSE instruction ("rep" is
84 * ignored by old IA32 processors if the following instruction is
85 * not a string operation); the IA-32 Architecture Software
86 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
87 * PAUSE in the inner loop of a spin lock is necessary for good
88 * performance:
89 *
90 * The PAUSE instruction improves the performance of IA-32
91 * processors supporting Hyper-Threading Technology when
92 * executing spin-wait loops and other routines where one
93 * thread is accessing a shared lock or semaphore in a tight
94 * polling loop. When executing a spin-wait loop, the
95 * processor can suffer a severe performance penalty when
96 * exiting the loop because it detects a possible memory order
97 * violation and flushes the core processor's pipeline. The
98 * PAUSE instruction provides a hint to the processor that the
99 * code sequence is a spin-wait loop. The processor uses this
100 * hint to avoid the memory order violation and prevent the
101 * pipeline flush. In addition, the PAUSE instruction
102 * de-pipelines the spin-wait loop to prevent it from
103 * consuming execution resources excessively.
104 */
105#if defined(__GNUC__) || defined(__INTEL_COMPILER)
106#define PG_HAVE_SPIN_DELAY
107static __inline__ void
109{
110 __asm__ __volatile__(" rep; nop \n");
111}
112#elif defined(_MSC_VER) && defined(__x86_64__)
113#define PG_HAVE_SPIN_DELAY
114static __forceinline void
116{
117 _mm_pause();
118}
119#elif defined(_MSC_VER)
120#define PG_HAVE_SPIN_DELAY
121static __forceinline void
123{
124 /* See comment for gcc code. Same code, MASM syntax */
125 __asm rep nop;
126}
127#endif
128#endif /* !defined(PG_HAVE_SPIN_DELAY) */
129
130
131#if defined(__GNUC__) || defined(__INTEL_COMPILER)
132
133#define PG_HAVE_ATOMIC_TEST_SET_FLAG
134static inline bool
135pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
136{
137 char _res = 1;
138
139 __asm__ __volatile__(
140 " lock \n"
141 " xchgb %0,%1 \n"
142: "+q"(_res), "+m"(ptr->value)
143:
144: "memory");
145 return _res == 0;
146}
147
148#define PG_HAVE_ATOMIC_CLEAR_FLAG
149static inline void
150pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
151{
152 /*
153 * On a TSO architecture like x86 it's sufficient to use a compiler
154 * barrier to achieve release semantics.
155 */
156 __asm__ __volatile__("" ::: "memory");
157 ptr->value = 0;
158}
159
160#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
161static inline bool
163 uint32 *expected, uint32 newval)
164{
165 char ret;
166
167 /*
168 * Perform cmpxchg and use the zero flag which it implicitly sets when
169 * equal to measure the success.
170 */
171 __asm__ __volatile__(
172 " lock \n"
173 " cmpxchgl %4,%5 \n"
174 " setz %2 \n"
175: "=a" (*expected), "=m"(ptr->value), "=q" (ret)
176: "a" (*expected), "r" (newval), "m"(ptr->value)
177: "memory", "cc");
178 return (bool) ret;
179}
180
181#define PG_HAVE_ATOMIC_FETCH_ADD_U32
182static inline uint32
184{
185 uint32 res;
186 __asm__ __volatile__(
187 " lock \n"
188 " xaddl %0,%1 \n"
189: "=q"(res), "=m"(ptr->value)
190: "0" (add_), "m"(ptr->value)
191: "memory", "cc");
192 return res;
193}
194
195#ifdef __x86_64__
196
197#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
198static inline bool
200 uint64 *expected, uint64 newval)
201{
202 char ret;
203
204 AssertPointerAlignment(expected, 8);
205
206 /*
207 * Perform cmpxchg and use the zero flag which it implicitly sets when
208 * equal to measure the success.
209 */
210 __asm__ __volatile__(
211 " lock \n"
212 " cmpxchgq %4,%5 \n"
213 " setz %2 \n"
214: "=a" (*expected), "=m"(ptr->value), "=q" (ret)
215: "a" (*expected), "r" (newval), "m"(ptr->value)
216: "memory", "cc");
217 return (bool) ret;
218}
219
220#define PG_HAVE_ATOMIC_FETCH_ADD_U64
221static inline uint64
223{
224 uint64 res;
225 __asm__ __volatile__(
226 " lock \n"
227 " xaddq %0,%1 \n"
228: "=q"(res), "=m"(ptr->value)
229: "0" (add_), "m"(ptr->value)
230: "memory", "cc");
231 return res;
232}
233
234#endif /* __x86_64__ */
235
236#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
237
238/*
239 * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
240 * since at least the 586. As well as on all x86-64 cpus.
241 */
242#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \
243 (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
244 defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
245#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
246#endif /* 8 byte single-copy atomicity */
static bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: arch-ppc.h:80
static uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: arch-ppc.h:131
struct pg_atomic_uint32 pg_atomic_uint32
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:62
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:34
#define AssertPointerAlignment(ptr, bndr)
Definition: c.h:851
int64_t int64
Definition: c.h:485
int32_t int32
Definition: c.h:484
uint64_t uint64
Definition: c.h:489
uint32_t uint32
Definition: c.h:488
struct pg_atomic_uint64 pg_atomic_uint64
#define pg_spin_delay_impl()
Definition: generic.h:33
#define newval
static struct @162 value
volatile uint32 value
Definition: arch-ppc.h:31
volatile uint64 value
Definition: fallback.h:29