PostgreSQL Source Code git master
arch-ppc.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * arch-ppc.h
4 * Atomic operations considerations specific to PowerPC
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * NOTES:
10 *
11 * src/include/port/atomics/arch-ppc.h
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#if defined(__GNUC__)
17
18/*
19 * lwsync orders loads with respect to each other, and similarly with stores.
20 * But a load can be performed before a subsequent store, so sync must be used
21 * for a full memory barrier.
22 */
23#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
24#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
25#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
26#endif
27
28#define PG_HAVE_ATOMIC_U32_SUPPORT
29typedef struct pg_atomic_uint32
30{
31 volatile uint32 value;
33
34/* 64bit atomics are only supported in 64bit mode */
35#if SIZEOF_VOID_P >= 8
36#define PG_HAVE_ATOMIC_U64_SUPPORT
37typedef struct pg_atomic_uint64
38{
41
42#endif
43
44/*
45 * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
46 * code generation differs at the end. __atomic_compare_exchange_n():
47 * 100: isync
48 * 104: mfcr r3
49 * 108: rlwinm r3,r3,3,31,31
50 * 10c: bne 120 <.eb+0x10>
51 * 110: clrldi r3,r3,63
52 * 114: addi r1,r1,112
53 * 118: blr
54 * 11c: nop
55 * 120: clrldi r3,r3,63
56 * 124: stw r9,0(r4)
57 * 128: addi r1,r1,112
58 * 12c: blr
59 *
60 * This:
61 * f0: isync
62 * f4: mfcr r9
63 * f8: rldicl. r3,r9,35,63
64 * fc: bne 104 <.eb>
65 * 100: stw r10,0(r4)
66 * 104: addi r1,r1,112
67 * 108: blr
68 *
69 * This implementation may or may not have materially different performance.
70 * It's not exploiting the fact that cr0 still holds the relevant comparison
71 * bits, set during the __asm__. One could fix that by moving more code into
72 * the __asm__. (That would remove the freedom to eliminate dead stores when
73 * the caller ignores "expected", but few callers do.)
74 *
75 * Recognizing constant "newval" would be superfluous, because there's no
76 * immediate-operand version of stwcx.
77 */
78#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
79static inline bool
81 uint32 *expected, uint32 newval)
82{
83 uint32 found;
84 uint32 condition_register;
85 bool ret;
86
87#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
88 if (__builtin_constant_p(*expected) &&
89 (int32) *expected <= PG_INT16_MAX &&
90 (int32) *expected >= PG_INT16_MIN)
91 __asm__ __volatile__(
92 " sync \n"
93 " lwarx %0,0,%5,1 \n"
94 " cmpwi %0,%3 \n"
95 " bne $+12 \n" /* branch to lwsync */
96 " stwcx. %4,0,%5 \n"
97 " bne $-16 \n" /* branch to lwarx */
98 " lwsync \n"
99 " mfcr %1 \n"
100: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
101: "i"(*expected), "r"(newval), "r"(&ptr->value)
102: "memory", "cc");
103 else
104#endif
105 __asm__ __volatile__(
106 " sync \n"
107 " lwarx %0,0,%5,1 \n"
108 " cmpw %0,%3 \n"
109 " bne $+12 \n" /* branch to lwsync */
110 " stwcx. %4,0,%5 \n"
111 " bne $-16 \n" /* branch to lwarx */
112 " lwsync \n"
113 " mfcr %1 \n"
114: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
115: "r"(*expected), "r"(newval), "r"(&ptr->value)
116: "memory", "cc");
117
118 ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
119 if (!ret)
120 *expected = found;
121 return ret;
122}
123
124/*
125 * This mirrors gcc __sync_fetch_and_add().
126 *
127 * Like tas(), use constraint "=&b" to avoid allocating r0.
128 */
129#define PG_HAVE_ATOMIC_FETCH_ADD_U32
130static inline uint32
132{
133 uint32 _t;
134 uint32 res;
135
136#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
137 if (__builtin_constant_p(add_) &&
138 add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
139 __asm__ __volatile__(
140 " sync \n"
141 " lwarx %1,0,%4,1 \n"
142 " addi %0,%1,%3 \n"
143 " stwcx. %0,0,%4 \n"
144 " bne $-12 \n" /* branch to lwarx */
145 " lwsync \n"
146: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
147: "i"(add_), "r"(&ptr->value)
148: "memory", "cc");
149 else
150#endif
151 __asm__ __volatile__(
152 " sync \n"
153 " lwarx %1,0,%4,1 \n"
154 " add %0,%1,%3 \n"
155 " stwcx. %0,0,%4 \n"
156 " bne $-12 \n" /* branch to lwarx */
157 " lwsync \n"
158: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
159: "r"(add_), "r"(&ptr->value)
160: "memory", "cc");
161
162 return res;
163}
164
165#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
166
167#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
168static inline bool
170 uint64 *expected, uint64 newval)
171{
172 uint64 found;
173 uint32 condition_register;
174 bool ret;
175
176 AssertPointerAlignment(expected, 8);
177
178 /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
179#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
180 if (__builtin_constant_p(*expected) &&
181 (int64) *expected <= PG_INT16_MAX &&
182 (int64) *expected >= PG_INT16_MIN)
183 __asm__ __volatile__(
184 " sync \n"
185 " ldarx %0,0,%5,1 \n"
186 " cmpdi %0,%3 \n"
187 " bne $+12 \n" /* branch to lwsync */
188 " stdcx. %4,0,%5 \n"
189 " bne $-16 \n" /* branch to ldarx */
190 " lwsync \n"
191 " mfcr %1 \n"
192: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
193: "i"(*expected), "r"(newval), "r"(&ptr->value)
194: "memory", "cc");
195 else
196#endif
197 __asm__ __volatile__(
198 " sync \n"
199 " ldarx %0,0,%5,1 \n"
200 " cmpd %0,%3 \n"
201 " bne $+12 \n" /* branch to lwsync */
202 " stdcx. %4,0,%5 \n"
203 " bne $-16 \n" /* branch to ldarx */
204 " lwsync \n"
205 " mfcr %1 \n"
206: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
207: "r"(*expected), "r"(newval), "r"(&ptr->value)
208: "memory", "cc");
209
210 ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
211 if (!ret)
212 *expected = found;
213 return ret;
214}
215
216#define PG_HAVE_ATOMIC_FETCH_ADD_U64
217static inline uint64
219{
220 uint64 _t;
221 uint64 res;
222
223 /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
224#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
225 if (__builtin_constant_p(add_) &&
226 add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
227 __asm__ __volatile__(
228 " sync \n"
229 " ldarx %1,0,%4,1 \n"
230 " addi %0,%1,%3 \n"
231 " stdcx. %0,0,%4 \n"
232 " bne $-12 \n" /* branch to ldarx */
233 " lwsync \n"
234: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
235: "i"(add_), "r"(&ptr->value)
236: "memory", "cc");
237 else
238#endif
239 __asm__ __volatile__(
240 " sync \n"
241 " ldarx %1,0,%4,1 \n"
242 " add %0,%1,%3 \n"
243 " stdcx. %0,0,%4 \n"
244 " bne $-12 \n" /* branch to ldarx */
245 " lwsync \n"
246: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
247: "r"(add_), "r"(&ptr->value)
248: "memory", "cc");
249
250 return res;
251}
252
253#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
254
255/* per architecture manual doubleword accesses have single copy atomicity */
256#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
static bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: arch-ppc.h:80
static uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: arch-ppc.h:131
struct pg_atomic_uint32 pg_atomic_uint32
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:62
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:34
#define AssertPointerAlignment(ptr, bndr)
Definition: c.h:851
int64_t int64
Definition: c.h:485
#define PG_INT16_MIN
Definition: c.h:542
int32_t int32
Definition: c.h:484
uint64_t uint64
Definition: c.h:489
uint32_t uint32
Definition: c.h:488
#define PG_INT16_MAX
Definition: c.h:543
struct pg_atomic_uint64 pg_atomic_uint64
struct pg_attribute_aligned(8) pg_atomic_uint64
Definition: generic-msvc.h:40
#define newval
volatile uint32 value
Definition: arch-ppc.h:31
volatile uint64 value
Definition: fallback.h:29