PostgreSQL Source Code  git master
arch-x86.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * arch-x86.h
4  * Atomic operations considerations specific to intel x86
5  *
6  * Note that we actually require a 486 upwards because the 386 doesn't have
7  * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
8  * anymore that's not much of a restriction luckily.
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * NOTES:
14  *
15  * src/include/port/atomics/arch-x86.h
16  *
17  *-------------------------------------------------------------------------
18  */
19 
20 /*
21  * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
22  * or stores to be reordered with other stores, but a load can be performed
23  * before a subsequent store.
24  *
25  * Technically, some x86-ish chips support uncached memory access and/or
26  * special instructions that are weakly ordered. In those cases we'd need
27  * the read and write barriers to be lfence and sfence. But since we don't
28  * do those things, a compiler barrier should be enough.
29  *
30  * "lock; addl" has worked for longer than "mfence". It's also rumored to be
31  * faster in many scenarios.
32  */
33 
34 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
35 #if defined(__i386__) || defined(__i386)
36 #define pg_memory_barrier_impl() \
37  __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
38 #elif defined(__x86_64__)
39 #define pg_memory_barrier_impl() \
40  __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
41 #endif
42 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
43 
44 #define pg_read_barrier_impl() pg_compiler_barrier_impl()
45 #define pg_write_barrier_impl() pg_compiler_barrier_impl()
46 
47 /*
48  * Provide implementation for atomics using inline assembly on x86 gcc. It's
49  * nice to support older gcc's and the compare/exchange implementation here is
50  * actually more efficient than the * __sync variant.
51  */
52 #if defined(HAVE_ATOMICS)
53 
54 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
55 
56 #define PG_HAVE_ATOMIC_FLAG_SUPPORT
57 typedef struct pg_atomic_flag
58 {
59  volatile char value;
61 
62 #define PG_HAVE_ATOMIC_U32_SUPPORT
63 typedef struct pg_atomic_uint32
64 {
65  volatile uint32 value;
67 
68 /*
69  * It's too complicated to write inline asm for 64bit types on 32bit and the
70  * 486 can't do it anyway.
71  */
72 #ifdef __x86_64__
73 #define PG_HAVE_ATOMIC_U64_SUPPORT
74 typedef struct pg_atomic_uint64
75 {
76  /* alignment guaranteed due to being on a 64bit platform */
77  volatile uint64 value;
79 #endif /* __x86_64__ */
80 
81 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
82 
83 #endif /* defined(HAVE_ATOMICS) */
84 
85 #if !defined(PG_HAVE_SPIN_DELAY)
86 /*
87  * This sequence is equivalent to the PAUSE instruction ("rep" is
88  * ignored by old IA32 processors if the following instruction is
89  * not a string operation); the IA-32 Architecture Software
90  * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
91  * PAUSE in the inner loop of a spin lock is necessary for good
92  * performance:
93  *
94  * The PAUSE instruction improves the performance of IA-32
95  * processors supporting Hyper-Threading Technology when
96  * executing spin-wait loops and other routines where one
97  * thread is accessing a shared lock or semaphore in a tight
98  * polling loop. When executing a spin-wait loop, the
99  * processor can suffer a severe performance penalty when
100  * exiting the loop because it detects a possible memory order
101  * violation and flushes the core processor's pipeline. The
102  * PAUSE instruction provides a hint to the processor that the
103  * code sequence is a spin-wait loop. The processor uses this
104  * hint to avoid the memory order violation and prevent the
105  * pipeline flush. In addition, the PAUSE instruction
106  * de-pipelines the spin-wait loop to prevent it from
107  * consuming execution resources excessively.
108  */
109 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
110 #define PG_HAVE_SPIN_DELAY
111 static __inline__ void
112 pg_spin_delay_impl(void)
113 {
114  __asm__ __volatile__(" rep; nop \n");
115 }
116 #elif defined(_MSC_VER) && defined(__x86_64__)
117 #define PG_HAVE_SPIN_DELAY
118 static __forceinline void
119 pg_spin_delay_impl(void)
120 {
121  _mm_pause();
122 }
123 #elif defined(_MSC_VER)
124 #define PG_HAVE_SPIN_DELAY
125 static __forceinline void
126 pg_spin_delay_impl(void)
127 {
128  /* See comment for gcc code. Same code, MASM syntax */
129  __asm rep nop;
130 }
131 #endif
132 #endif /* !defined(PG_HAVE_SPIN_DELAY) */
133 
134 
135 #if defined(HAVE_ATOMICS)
136 
137 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
138 
139 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
140 static inline bool
142 {
143  char _res = 1;
144 
145  __asm__ __volatile__(
146  " lock \n"
147  " xchgb %0,%1 \n"
148 : "+q"(_res), "+m"(ptr->value)
149 :
150 : "memory");
151  return _res == 0;
152 }
153 
154 #define PG_HAVE_ATOMIC_CLEAR_FLAG
155 static inline void
157 {
158  /*
159  * On a TSO architecture like x86 it's sufficient to use a compiler
160  * barrier to achieve release semantics.
161  */
162  __asm__ __volatile__("" ::: "memory");
163  ptr->value = 0;
164 }
165 
166 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
167 static inline bool
169  uint32 *expected, uint32 newval)
170 {
171  char ret;
172 
173  /*
174  * Perform cmpxchg and use the zero flag which it implicitly sets when
175  * equal to measure the success.
176  */
177  __asm__ __volatile__(
178  " lock \n"
179  " cmpxchgl %4,%5 \n"
180  " setz %2 \n"
181 : "=a" (*expected), "=m"(ptr->value), "=q" (ret)
182 : "a" (*expected), "r" (newval), "m"(ptr->value)
183 : "memory", "cc");
184  return (bool) ret;
185 }
186 
187 #define PG_HAVE_ATOMIC_FETCH_ADD_U32
188 static inline uint32
190 {
191  uint32 res;
192  __asm__ __volatile__(
193  " lock \n"
194  " xaddl %0,%1 \n"
195 : "=q"(res), "=m"(ptr->value)
196 : "0" (add_), "m"(ptr->value)
197 : "memory", "cc");
198  return res;
199 }
200 
201 #ifdef __x86_64__
202 
203 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
204 static inline bool
206  uint64 *expected, uint64 newval)
207 {
208  char ret;
209 
210  /*
211  * Perform cmpxchg and use the zero flag which it implicitly sets when
212  * equal to measure the success.
213  */
214  __asm__ __volatile__(
215  " lock \n"
216  " cmpxchgq %4,%5 \n"
217  " setz %2 \n"
218 : "=a" (*expected), "=m"(ptr->value), "=q" (ret)
219 : "a" (*expected), "r" (newval), "m"(ptr->value)
220 : "memory", "cc");
221  return (bool) ret;
222 }
223 
224 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
225 static inline uint64
226 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
227 {
228  uint64 res;
229  __asm__ __volatile__(
230  " lock \n"
231  " xaddq %0,%1 \n"
232 : "=q"(res), "=m"(ptr->value)
233 : "0" (add_), "m"(ptr->value)
234 : "memory", "cc");
235  return res;
236 }
237 
238 #endif /* __x86_64__ */
239 
240 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
241 
242 /*
243  * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
244  * since at least the 586. As well as on all x86-64 cpus.
245  */
246 #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \
247  (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
248  defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
249 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
250 #endif /* 8 byte single-copy atomicity */
251 
252 #endif /* HAVE_ATOMICS */
struct pg_atomic_uint32 pg_atomic_uint32
bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.c:137
void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
Definition: atomics.c:89
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:228
uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.c:165
bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
Definition: atomics.c:76
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:200
unsigned int uint32
Definition: c.h:506
signed int int32
Definition: c.h:494
struct pg_atomic_flag pg_atomic_flag
struct pg_atomic_uint64 pg_atomic_uint64
#define pg_spin_delay_impl()
Definition: generic.h:33
#define newval
volatile bool value
Definition: fallback.h:83
volatile uint32 value
Definition: arch-ppc.h:31
volatile uint64 value
Definition: fallback.h:119