arch-ppc_8h_source.html

/*-------------------------------------------------------------------------

 *

 * arch-ppc.h

 *    Atomic operations considerations specific to PowerPC

 *

 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group

 * Portions Copyright (c) 1994, Regents of the University of California

 *

 * NOTES:

 *

 * src/include/port/atomics/arch-ppc.h

 *

 *-------------------------------------------------------------------------

 */


#if defined(__GNUC__)


/*

 * lwsync orders loads with respect to each other, and similarly with stores.

 * But a load can be performed before a subsequent store, so sync must be used

 * for a full memory barrier.

 */

#define pg_memory_barrier_impl()    __asm__ __volatile__ ("sync" : : : "memory")

#define pg_read_barrier_impl()      __asm__ __volatile__ ("lwsync" : : : "memory")

#define pg_write_barrier_impl()     __asm__ __volatile__ ("lwsync" : : : "memory")

#endif


#define PG_HAVE_ATOMIC_U32_SUPPORT

typedef struct pg_atomic_uint32

{

    volatile uint32 value;

} pg_atomic_uint32;


/* 64bit atomics are only supported in 64bit mode */

#if SIZEOF_VOID_P >= 8

#define PG_HAVE_ATOMIC_U64_SUPPORT

typedef struct pg_atomic_uint64

{

    volatile uint64 value pg_attribute_aligned(8);

} pg_atomic_uint64;


#endif


/*

 * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but

 * code generation differs at the end.  __atomic_compare_exchange_n():

 *  100:    isync

 *  104:    mfcr    r3

 *  108:    rlwinm  r3,r3,3,31,31

 *  10c:    bne     120 <.eb+0x10>

 *  110:    clrldi  r3,r3,63

 *  114:    addi    r1,r1,112

 *  118:    blr

 *  11c:    nop

 *  120:    clrldi  r3,r3,63

 *  124:    stw     r9,0(r4)

 *  128:    addi    r1,r1,112

 *  12c:    blr

 *

 * This:

 *   f0:    isync

 *   f4:    mfcr    r9

 *   f8:    rldicl. r3,r9,35,63

 *   fc:    bne     104 <.eb>

 *  100:    stw     r10,0(r4)

 *  104:    addi    r1,r1,112

 *  108:    blr

 *

 * This implementation may or may not have materially different performance.

 * It's not exploiting the fact that cr0 still holds the relevant comparison

 * bits, set during the __asm__.  One could fix that by moving more code into

 * the __asm__.  (That would remove the freedom to eliminate dead stores when

 * the caller ignores "expected", but few callers do.)

 *

 * Recognizing constant "newval" would be superfluous, because there's no

 * immediate-operand version of stwcx.

 */

#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32

static inline bool

pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,

                                    uint32 *expected, uint32 newval)

{

    uint32 found;

    uint32 condition_register;

    bool ret;


#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P

    if (__builtin_constant_p(*expected) &&

        (int32) *expected <= PG_INT16_MAX &&

        (int32) *expected >= PG_INT16_MIN)

        __asm__ __volatile__(

            "   sync                \n"

            "   lwarx   %0,0,%5,1   \n"

            "   cmpwi   %0,%3       \n"

            "   bne     $+12        \n"     /* branch to lwsync */

            "   stwcx.  %4,0,%5     \n"

            "   bne     $-16        \n"     /* branch to lwarx */

            "   lwsync              \n"

            "   mfcr    %1          \n"

:           "=&r"(found), "=r"(condition_register), "+m"(ptr->value)

:           "i"(*expected), "r"(newval), "r"(&ptr->value)

:           "memory", "cc");

    else

#endif

        __asm__ __volatile__(

            "   sync                \n"

            "   lwarx   %0,0,%5,1   \n"

            "   cmpw    %0,%3       \n"

            "   bne     $+12        \n"     /* branch to lwsync */

            "   stwcx.  %4,0,%5     \n"

            "   bne     $-16        \n"     /* branch to lwarx */

            "   lwsync              \n"

            "   mfcr    %1          \n"

:           "=&r"(found), "=r"(condition_register), "+m"(ptr->value)

:           "r"(*expected), "r"(newval), "r"(&ptr->value)

:           "memory", "cc");


    ret = (condition_register >> 29) & 1;   /* test eq bit of cr0 */

    if (!ret)

        *expected = found;

    return ret;

}


/*

 * This mirrors gcc __sync_fetch_and_add().

 *

 * Like tas(), use constraint "=&b" to avoid allocating r0.

 */

#define PG_HAVE_ATOMIC_FETCH_ADD_U32

static inline uint32

pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)

{

    uint32 _t;

    uint32 res;


#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P

    if (__builtin_constant_p(add_) &&

        add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)

        __asm__ __volatile__(

            "   sync                \n"

            "   lwarx   %1,0,%4,1   \n"

            "   addi    %0,%1,%3    \n"

            "   stwcx.  %0,0,%4     \n"

            "   bne     $-12        \n"     /* branch to lwarx */

            "   lwsync              \n"

:           "=&r"(_t), "=&b"(res), "+m"(ptr->value)

:           "i"(add_), "r"(&ptr->value)

:           "memory", "cc");

    else

#endif

        __asm__ __volatile__(

            "   sync                \n"

            "   lwarx   %1,0,%4,1   \n"

            "   add     %0,%1,%3    \n"

            "   stwcx.  %0,0,%4     \n"

            "   bne     $-12        \n"     /* branch to lwarx */

            "   lwsync              \n"

:           "=&r"(_t), "=&r"(res), "+m"(ptr->value)

:           "r"(add_), "r"(&ptr->value)

:           "memory", "cc");


    return res;

}


#ifdef PG_HAVE_ATOMIC_U64_SUPPORT


#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64

static inline bool

pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,

                                    uint64 *expected, uint64 newval)

{

    uint64 found;

    uint32 condition_register;

    bool ret;


    AssertPointerAlignment(expected, 8);


    /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */

#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P

    if (__builtin_constant_p(*expected) &&

        (int64) *expected <= PG_INT16_MAX &&

        (int64) *expected >= PG_INT16_MIN)

        __asm__ __volatile__(

            "   sync                \n"

            "   ldarx   %0,0,%5,1   \n"

            "   cmpdi   %0,%3       \n"

            "   bne     $+12        \n"     /* branch to lwsync */

            "   stdcx.  %4,0,%5     \n"

            "   bne     $-16        \n"     /* branch to ldarx */

            "   lwsync              \n"

            "   mfcr    %1          \n"

:           "=&r"(found), "=r"(condition_register), "+m"(ptr->value)

:           "i"(*expected), "r"(newval), "r"(&ptr->value)

:           "memory", "cc");

    else

#endif

        __asm__ __volatile__(

            "   sync                \n"

            "   ldarx   %0,0,%5,1   \n"

            "   cmpd    %0,%3       \n"

            "   bne     $+12        \n"     /* branch to lwsync */

            "   stdcx.  %4,0,%5     \n"

            "   bne     $-16        \n"     /* branch to ldarx */

            "   lwsync              \n"

            "   mfcr    %1          \n"

:           "=&r"(found), "=r"(condition_register), "+m"(ptr->value)

:           "r"(*expected), "r"(newval), "r"(&ptr->value)

:           "memory", "cc");


    ret = (condition_register >> 29) & 1;   /* test eq bit of cr0 */

    if (!ret)

        *expected = found;

    return ret;

}


#define PG_HAVE_ATOMIC_FETCH_ADD_U64

static inline uint64

pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)

{

    uint64 _t;

    uint64 res;


    /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */

#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P

    if (__builtin_constant_p(add_) &&

        add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)

        __asm__ __volatile__(

            "   sync                \n"

            "   ldarx   %1,0,%4,1   \n"

            "   addi    %0,%1,%3    \n"

            "   stdcx.  %0,0,%4     \n"

            "   bne     $-12        \n"     /* branch to ldarx */

            "   lwsync              \n"

:           "=&r"(_t), "=&b"(res), "+m"(ptr->value)

:           "i"(add_), "r"(&ptr->value)

:           "memory", "cc");

    else

#endif

        __asm__ __volatile__(

            "   sync                \n"

            "   ldarx   %1,0,%4,1   \n"

            "   add     %0,%1,%3    \n"

            "   stdcx.  %0,0,%4     \n"

            "   bne     $-12        \n"     /* branch to ldarx */

            "   lwsync              \n"

:           "=&r"(_t), "=&r"(res), "+m"(ptr->value)

:           "r"(add_), "r"(&ptr->value)

:           "memory", "cc");


    return res;

}


#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */


/* per architecture manual doubleword accesses have single copy atomicity */

#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY

pg_atomic_compare_exchange_u32_impl
static bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: arch-ppc.h:80

pg_atomic_fetch_add_u32_impl
static uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: arch-ppc.h:131

pg_atomic_uint32
struct pg_atomic_uint32 pg_atomic_uint32

pg_atomic_fetch_add_u64_impl
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:62

pg_atomic_compare_exchange_u64_impl
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:34

AssertPointerAlignment
#define AssertPointerAlignment(ptr, bndr)
Definition: c.h:865

int64
int64_t int64
Definition: c.h:499

PG_INT16_MIN
#define PG_INT16_MIN
Definition: c.h:556

int32
int32_t int32
Definition: c.h:498

uint64
uint64_t uint64
Definition: c.h:503

uint32
uint32_t uint32
Definition: c.h:502

PG_INT16_MAX
#define PG_INT16_MAX
Definition: c.h:557

pg_atomic_uint64
struct pg_atomic_uint64 pg_atomic_uint64

pg_attribute_aligned
struct pg_attribute_aligned(8) pg_atomic_uint64
Definition: generic-msvc.h:40

newval
#define newval

pg_atomic_uint32
Definition: arch-ppc.h:30

pg_atomic_uint32::value
volatile uint32 value
Definition: arch-ppc.h:31

pg_atomic_uint64
Definition: fallback.h:27

pg_atomic_uint64::value
volatile uint64 value
Definition: fallback.h:29