PostgreSQL Source Code  git master
arch-ppc.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * arch-ppc.h
4  * Atomic operations considerations specific to PowerPC
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * NOTES:
10  *
11  * src/include/port/atomics/arch-ppc.h
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #if defined(__GNUC__)
17 
18 /*
19  * lwsync orders loads with respect to each other, and similarly with stores.
20  * But a load can be performed before a subsequent store, so sync must be used
21  * for a full memory barrier.
22  */
23 #define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
24 #define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
25 #define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
26 #endif
27 
28 #define PG_HAVE_ATOMIC_U32_SUPPORT
29 typedef struct pg_atomic_uint32
30 {
31  volatile uint32 value;
33 
34 /* 64bit atomics are only supported in 64bit mode */
35 #if SIZEOF_VOID_P >= 8
36 #define PG_HAVE_ATOMIC_U64_SUPPORT
37 typedef struct pg_atomic_uint64
38 {
39  volatile uint64 value pg_attribute_aligned(8);
41 
42 #endif
43 
44 /*
45  * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
46  * code generation differs at the end. __atomic_compare_exchange_n():
47  * 100: isync
48  * 104: mfcr r3
49  * 108: rlwinm r3,r3,3,31,31
50  * 10c: bne 120 <.eb+0x10>
51  * 110: clrldi r3,r3,63
52  * 114: addi r1,r1,112
53  * 118: blr
54  * 11c: nop
55  * 120: clrldi r3,r3,63
56  * 124: stw r9,0(r4)
57  * 128: addi r1,r1,112
58  * 12c: blr
59  *
60  * This:
61  * f0: isync
62  * f4: mfcr r9
63  * f8: rldicl. r3,r9,35,63
64  * fc: bne 104 <.eb>
65  * 100: stw r10,0(r4)
66  * 104: addi r1,r1,112
67  * 108: blr
68  *
69  * This implementation may or may not have materially different performance.
70  * It's not exploiting the fact that cr0 still holds the relevant comparison
71  * bits, set during the __asm__. One could fix that by moving more code into
72  * the __asm__. (That would remove the freedom to eliminate dead stores when
73  * the caller ignores "expected", but few callers do.)
74  *
75  * Recognizing constant "newval" would be superfluous, because there's no
76  * immediate-operand version of stwcx.
77  */
78 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
79 static inline bool
81  uint32 *expected, uint32 newval)
82 {
83  uint32 found;
84  uint32 condition_register;
85  bool ret;
86 
87 #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
88  if (__builtin_constant_p(*expected) &&
89  (int32) *expected <= PG_INT16_MAX &&
90  (int32) *expected >= PG_INT16_MIN)
91  __asm__ __volatile__(
92  " sync \n"
93  " lwarx %0,0,%5,1 \n"
94  " cmpwi %0,%3 \n"
95  " bne $+12 \n" /* branch to lwsync */
96  " stwcx. %4,0,%5 \n"
97  " bne $-16 \n" /* branch to lwarx */
98  " lwsync \n"
99  " mfcr %1 \n"
100 : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
101 : "i"(*expected), "r"(newval), "r"(&ptr->value)
102 : "memory", "cc");
103  else
104 #endif
105  __asm__ __volatile__(
106  " sync \n"
107  " lwarx %0,0,%5,1 \n"
108  " cmpw %0,%3 \n"
109  " bne $+12 \n" /* branch to lwsync */
110  " stwcx. %4,0,%5 \n"
111  " bne $-16 \n" /* branch to lwarx */
112  " lwsync \n"
113  " mfcr %1 \n"
114 : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
115 : "r"(*expected), "r"(newval), "r"(&ptr->value)
116 : "memory", "cc");
117 
118  ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
119  if (!ret)
120  *expected = found;
121  return ret;
122 }
123 
124 /*
125  * This mirrors gcc __sync_fetch_and_add().
126  *
127  * Like tas(), use constraint "=&b" to avoid allocating r0.
128  */
129 #define PG_HAVE_ATOMIC_FETCH_ADD_U32
130 static inline uint32
132 {
133  uint32 _t;
134  uint32 res;
135 
136 #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
137  if (__builtin_constant_p(add_) &&
138  add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
139  __asm__ __volatile__(
140  " sync \n"
141  " lwarx %1,0,%4,1 \n"
142  " addi %0,%1,%3 \n"
143  " stwcx. %0,0,%4 \n"
144  " bne $-12 \n" /* branch to lwarx */
145  " lwsync \n"
146 : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
147 : "i"(add_), "r"(&ptr->value)
148 : "memory", "cc");
149  else
150 #endif
151  __asm__ __volatile__(
152  " sync \n"
153  " lwarx %1,0,%4,1 \n"
154  " add %0,%1,%3 \n"
155  " stwcx. %0,0,%4 \n"
156  " bne $-12 \n" /* branch to lwarx */
157  " lwsync \n"
158 : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
159 : "r"(add_), "r"(&ptr->value)
160 : "memory", "cc");
161 
162  return res;
163 }
164 
165 #ifdef PG_HAVE_ATOMIC_U64_SUPPORT
166 
167 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
168 static inline bool
170  uint64 *expected, uint64 newval)
171 {
172  uint64 found;
173  uint32 condition_register;
174  bool ret;
175 
176  /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
177 #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
178  if (__builtin_constant_p(*expected) &&
179  (int64) *expected <= PG_INT16_MAX &&
180  (int64) *expected >= PG_INT16_MIN)
181  __asm__ __volatile__(
182  " sync \n"
183  " ldarx %0,0,%5,1 \n"
184  " cmpdi %0,%3 \n"
185  " bne $+12 \n" /* branch to lwsync */
186  " stdcx. %4,0,%5 \n"
187  " bne $-16 \n" /* branch to ldarx */
188  " lwsync \n"
189  " mfcr %1 \n"
190 : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
191 : "i"(*expected), "r"(newval), "r"(&ptr->value)
192 : "memory", "cc");
193  else
194 #endif
195  __asm__ __volatile__(
196  " sync \n"
197  " ldarx %0,0,%5,1 \n"
198  " cmpd %0,%3 \n"
199  " bne $+12 \n" /* branch to lwsync */
200  " stdcx. %4,0,%5 \n"
201  " bne $-16 \n" /* branch to ldarx */
202  " lwsync \n"
203  " mfcr %1 \n"
204 : "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
205 : "r"(*expected), "r"(newval), "r"(&ptr->value)
206 : "memory", "cc");
207 
208  ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
209  if (!ret)
210  *expected = found;
211  return ret;
212 }
213 
214 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
215 static inline uint64
216 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
217 {
218  uint64 _t;
219  uint64 res;
220 
221  /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
222 #ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
223  if (__builtin_constant_p(add_) &&
224  add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
225  __asm__ __volatile__(
226  " sync \n"
227  " ldarx %1,0,%4,1 \n"
228  " addi %0,%1,%3 \n"
229  " stdcx. %0,0,%4 \n"
230  " bne $-12 \n" /* branch to ldarx */
231  " lwsync \n"
232 : "=&r"(_t), "=&b"(res), "+m"(ptr->value)
233 : "i"(add_), "r"(&ptr->value)
234 : "memory", "cc");
235  else
236 #endif
237  __asm__ __volatile__(
238  " sync \n"
239  " ldarx %1,0,%4,1 \n"
240  " add %0,%1,%3 \n"
241  " stdcx. %0,0,%4 \n"
242  " bne $-12 \n" /* branch to ldarx */
243  " lwsync \n"
244 : "=&r"(_t), "=&r"(res), "+m"(ptr->value)
245 : "r"(add_), "r"(&ptr->value)
246 : "memory", "cc");
247 
248  return res;
249 }
250 
251 #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
252 
253 /* per architecture manual doubleword accesses have single copy atomicity */
254 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
static bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: arch-ppc.h:80
static uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: arch-ppc.h:131
struct pg_atomic_uint32 pg_atomic_uint32
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:228
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:200
unsigned int uint32
Definition: c.h:506
signed int int32
Definition: c.h:494
#define PG_INT16_MIN
Definition: c.h:585
#define PG_INT16_MAX
Definition: c.h:586
struct pg_atomic_uint64 pg_atomic_uint64
#define newval
volatile uint32 value
Definition: arch-ppc.h:31
volatile uint64 value
Definition: fallback.h:119