PostgreSQL Source Code  git master
arch-x86.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * arch-x86.h
4  * Atomic operations considerations specific to intel x86
5  *
6  * Note that we actually require a 486 upwards because the 386 doesn't have
7  * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
8  * anymore that's not much of a restriction luckily.
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * NOTES:
14  *
15  * src/include/port/atomics/arch-x86.h
16  *
17  *-------------------------------------------------------------------------
18  */
19 
20 /*
21  * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
22  * or stores to be reordered with other stores, but a load can be performed
23  * before a subsequent store.
24  *
25  * Technically, some x86-ish chips support uncached memory access and/or
26  * special instructions that are weakly ordered. In those cases we'd need
27  * the read and write barriers to be lfence and sfence. But since we don't
28  * do those things, a compiler barrier should be enough.
29  *
30  * "lock; addl" has worked for longer than "mfence". It's also rumored to be
31  * faster in many scenarios.
32  */
33 
34 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
35 #if defined(__i386__) || defined(__i386)
36 #define pg_memory_barrier_impl() \
37  __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
38 #elif defined(__x86_64__)
39 #define pg_memory_barrier_impl() \
40  __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
41 #endif
42 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
43 
44 #define pg_read_barrier_impl() pg_compiler_barrier_impl()
45 #define pg_write_barrier_impl() pg_compiler_barrier_impl()
46 
47 /*
48  * Provide implementation for atomics using inline assembly on x86 gcc. It's
49  * nice to support older gcc's and the compare/exchange implementation here is
50  * actually more efficient than the * __sync variant.
51  */
52 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
53 
54 #define PG_HAVE_ATOMIC_FLAG_SUPPORT
55 typedef struct pg_atomic_flag
56 {
57  volatile char value;
58 } pg_atomic_flag;
59 
60 #define PG_HAVE_ATOMIC_U32_SUPPORT
61 typedef struct pg_atomic_uint32
62 {
63  volatile uint32 value;
65 
66 /*
67  * It's too complicated to write inline asm for 64bit types on 32bit and the
68  * 486 can't do it anyway.
69  */
70 #ifdef __x86_64__
71 #define PG_HAVE_ATOMIC_U64_SUPPORT
72 typedef struct pg_atomic_uint64
73 {
74  /* alignment guaranteed due to being on a 64bit platform */
75  volatile uint64 value;
77 #endif /* __x86_64__ */
78 
79 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
80 
81 #if !defined(PG_HAVE_SPIN_DELAY)
82 /*
83  * This sequence is equivalent to the PAUSE instruction ("rep" is
84  * ignored by old IA32 processors if the following instruction is
85  * not a string operation); the IA-32 Architecture Software
86  * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
87  * PAUSE in the inner loop of a spin lock is necessary for good
88  * performance:
89  *
90  * The PAUSE instruction improves the performance of IA-32
91  * processors supporting Hyper-Threading Technology when
92  * executing spin-wait loops and other routines where one
93  * thread is accessing a shared lock or semaphore in a tight
94  * polling loop. When executing a spin-wait loop, the
95  * processor can suffer a severe performance penalty when
96  * exiting the loop because it detects a possible memory order
97  * violation and flushes the core processor's pipeline. The
98  * PAUSE instruction provides a hint to the processor that the
99  * code sequence is a spin-wait loop. The processor uses this
100  * hint to avoid the memory order violation and prevent the
101  * pipeline flush. In addition, the PAUSE instruction
102  * de-pipelines the spin-wait loop to prevent it from
103  * consuming execution resources excessively.
104  */
105 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
106 #define PG_HAVE_SPIN_DELAY
107 static __inline__ void
108 pg_spin_delay_impl(void)
109 {
110  __asm__ __volatile__(" rep; nop \n");
111 }
112 #elif defined(_MSC_VER) && defined(__x86_64__)
113 #define PG_HAVE_SPIN_DELAY
114 static __forceinline void
115 pg_spin_delay_impl(void)
116 {
117  _mm_pause();
118 }
119 #elif defined(_MSC_VER)
120 #define PG_HAVE_SPIN_DELAY
121 static __forceinline void
122 pg_spin_delay_impl(void)
123 {
124  /* See comment for gcc code. Same code, MASM syntax */
125  __asm rep nop;
126 }
127 #endif
128 #endif /* !defined(PG_HAVE_SPIN_DELAY) */
129 
130 
131 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
132 
133 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
134 static inline bool
135 pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
136 {
137  char _res = 1;
138 
139  __asm__ __volatile__(
140  " lock \n"
141  " xchgb %0,%1 \n"
142 : "+q"(_res), "+m"(ptr->value)
143 :
144 : "memory");
145  return _res == 0;
146 }
147 
148 #define PG_HAVE_ATOMIC_CLEAR_FLAG
149 static inline void
150 pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
151 {
152  /*
153  * On a TSO architecture like x86 it's sufficient to use a compiler
154  * barrier to achieve release semantics.
155  */
156  __asm__ __volatile__("" ::: "memory");
157  ptr->value = 0;
158 }
159 
160 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
161 static inline bool
163  uint32 *expected, uint32 newval)
164 {
165  char ret;
166 
167  /*
168  * Perform cmpxchg and use the zero flag which it implicitly sets when
169  * equal to measure the success.
170  */
171  __asm__ __volatile__(
172  " lock \n"
173  " cmpxchgl %4,%5 \n"
174  " setz %2 \n"
175 : "=a" (*expected), "=m"(ptr->value), "=q" (ret)
176 : "a" (*expected), "r" (newval), "m"(ptr->value)
177 : "memory", "cc");
178  return (bool) ret;
179 }
180 
181 #define PG_HAVE_ATOMIC_FETCH_ADD_U32
182 static inline uint32
184 {
185  uint32 res;
186  __asm__ __volatile__(
187  " lock \n"
188  " xaddl %0,%1 \n"
189 : "=q"(res), "=m"(ptr->value)
190 : "0" (add_), "m"(ptr->value)
191 : "memory", "cc");
192  return res;
193 }
194 
195 #ifdef __x86_64__
196 
197 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
198 static inline bool
200  uint64 *expected, uint64 newval)
201 {
202  char ret;
203 
204  AssertPointerAlignment(expected, 8);
205 
206  /*
207  * Perform cmpxchg and use the zero flag which it implicitly sets when
208  * equal to measure the success.
209  */
210  __asm__ __volatile__(
211  " lock \n"
212  " cmpxchgq %4,%5 \n"
213  " setz %2 \n"
214 : "=a" (*expected), "=m"(ptr->value), "=q" (ret)
215 : "a" (*expected), "r" (newval), "m"(ptr->value)
216 : "memory", "cc");
217  return (bool) ret;
218 }
219 
220 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
221 static inline uint64
222 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
223 {
224  uint64 res;
225  __asm__ __volatile__(
226  " lock \n"
227  " xaddq %0,%1 \n"
228 : "=q"(res), "=m"(ptr->value)
229 : "0" (add_), "m"(ptr->value)
230 : "memory", "cc");
231  return res;
232 }
233 
234 #endif /* __x86_64__ */
235 
236 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
237 
238 /*
239  * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
240  * since at least the 586. As well as on all x86-64 cpus.
241  */
242 #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \
243  (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
244  defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
245 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
246 #endif /* 8 byte single-copy atomicity */
static bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: arch-ppc.h:80
static uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: arch-ppc.h:131
struct pg_atomic_uint32 pg_atomic_uint32
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:62
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:34
unsigned int uint32
Definition: c.h:506
signed int int32
Definition: c.h:494
#define AssertPointerAlignment(ptr, bndr)
Definition: c.h:894
struct pg_atomic_uint64 pg_atomic_uint64
#define pg_spin_delay_impl()
Definition: generic.h:33
#define newval
static struct @157 value
volatile uint32 value
Definition: arch-ppc.h:31
volatile uint64 value
Definition: fallback.h:29