arch-x86_8h_source.html

/*-------------------------------------------------------------------------

 *

 * arch-x86.h

 *    Atomic operations considerations specific to intel x86

 *

 * Note that we actually require a 486 upwards because the 386 doesn't have

 * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere

 * anymore that's not much of a restriction luckily.

 *

 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group

 * Portions Copyright (c) 1994, Regents of the University of California

 *

 * NOTES:

 *

 * src/include/port/atomics/arch-x86.h

 *

 *-------------------------------------------------------------------------

 */


/*

 * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,

 * or stores to be reordered with other stores, but a load can be performed

 * before a subsequent store.

 *

 * Technically, some x86-ish chips support uncached memory access and/or

 * special instructions that are weakly ordered.  In those cases we'd need

 * the read and write barriers to be lfence and sfence.  But since we don't

 * do those things, a compiler barrier should be enough.

 *

 * "lock; addl" has worked for longer than "mfence". It's also rumored to be

 * faster in many scenarios.

 */


#if defined(__GNUC__) || defined(__INTEL_COMPILER)

#if defined(__i386__) || defined(__i386)

#define pg_memory_barrier_impl()        \

    __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")

#elif defined(__x86_64__)

#define pg_memory_barrier_impl()        \

    __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")

#endif

#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */


#define pg_read_barrier_impl()      pg_compiler_barrier_impl()

#define pg_write_barrier_impl()     pg_compiler_barrier_impl()


/*

 * Provide implementation for atomics using inline assembly on x86 gcc. It's

 * nice to support older gcc's and the compare/exchange implementation here is

 * actually more efficient than the * __sync variant.

 */

#if defined(__GNUC__) || defined(__INTEL_COMPILER)


#define PG_HAVE_ATOMIC_FLAG_SUPPORT

typedef struct pg_atomic_flag

{

    volatile char value;

} pg_atomic_flag;


#define PG_HAVE_ATOMIC_U32_SUPPORT

typedef struct pg_atomic_uint32

{

    volatile uint32 value;

} pg_atomic_uint32;


/*

 * It's too complicated to write inline asm for 64bit types on 32bit and the

 * 486 can't do it anyway.

 */

#ifdef __x86_64__

#define PG_HAVE_ATOMIC_U64_SUPPORT

typedef struct pg_atomic_uint64

{

    /* alignment guaranteed due to being on a 64bit platform */

    volatile uint64 value;

} pg_atomic_uint64;

#endif  /* __x86_64__ */


#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */


#if !defined(PG_HAVE_SPIN_DELAY)

/*

 * This sequence is equivalent to the PAUSE instruction ("rep" is

 * ignored by old IA32 processors if the following instruction is

 * not a string operation); the IA-32 Architecture Software

 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using

 * PAUSE in the inner loop of a spin lock is necessary for good

 * performance:

 *

 *     The PAUSE instruction improves the performance of IA-32

 *     processors supporting Hyper-Threading Technology when

 *     executing spin-wait loops and other routines where one

 *     thread is accessing a shared lock or semaphore in a tight

 *     polling loop. When executing a spin-wait loop, the

 *     processor can suffer a severe performance penalty when

 *     exiting the loop because it detects a possible memory order

 *     violation and flushes the core processor's pipeline. The

 *     PAUSE instruction provides a hint to the processor that the

 *     code sequence is a spin-wait loop. The processor uses this

 *     hint to avoid the memory order violation and prevent the

 *     pipeline flush. In addition, the PAUSE instruction

 *     de-pipelines the spin-wait loop to prevent it from

 *     consuming execution resources excessively.

 */

#if defined(__GNUC__) || defined(__INTEL_COMPILER)

#define PG_HAVE_SPIN_DELAY

static __inline__ void

pg_spin_delay_impl(void)

{

    __asm__ __volatile__(" rep; nop         \n");

}

#elif defined(_MSC_VER) && defined(__x86_64__)

#define PG_HAVE_SPIN_DELAY

static __forceinline void

pg_spin_delay_impl(void)

{

    _mm_pause();

}

#elif defined(_MSC_VER)

#define PG_HAVE_SPIN_DELAY

static __forceinline void

pg_spin_delay_impl(void)

{

    /* See comment for gcc code. Same code, MASM syntax */

    __asm rep nop;

}

#endif

#endif /* !defined(PG_HAVE_SPIN_DELAY) */


#if defined(__GNUC__) || defined(__INTEL_COMPILER)


#define PG_HAVE_ATOMIC_TEST_SET_FLAG

static inline bool

pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)

{

    char        _res = 1;


    __asm__ __volatile__(

        "   lock            \n"

        "   xchgb   %0,%1   \n"

:       "+q"(_res), "+m"(ptr->value)

:

:       "memory");

    return _res == 0;

}


#define PG_HAVE_ATOMIC_CLEAR_FLAG

static inline void

pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)

{

    /*

     * On a TSO architecture like x86 it's sufficient to use a compiler

     * barrier to achieve release semantics.

     */

    __asm__ __volatile__("" ::: "memory");

    ptr->value = 0;

}


#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32

static inline bool

pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,

                                    uint32 *expected, uint32 newval)

{

    char    ret;


    /*

     * Perform cmpxchg and use the zero flag which it implicitly sets when

     * equal to measure the success.

     */

    __asm__ __volatile__(

        "   lock                \n"

        "   cmpxchgl    %4,%5   \n"

        "   setz        %2      \n"

:       "=a" (*expected), "=m"(ptr->value), "=q" (ret)

:       "a" (*expected), "r" (newval), "m"(ptr->value)

:       "memory", "cc");

    return (bool) ret;

}


#define PG_HAVE_ATOMIC_FETCH_ADD_U32

static inline uint32

pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)

{

    uint32 res;

    __asm__ __volatile__(

        "   lock                \n"

        "   xaddl   %0,%1       \n"

:       "=q"(res), "=m"(ptr->value)

:       "0" (add_), "m"(ptr->value)

:       "memory", "cc");

    return res;

}


#ifdef __x86_64__


#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64

static inline bool

pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,

                                    uint64 *expected, uint64 newval)

{

    char    ret;


    AssertPointerAlignment(expected, 8);


    /*

     * Perform cmpxchg and use the zero flag which it implicitly sets when

     * equal to measure the success.

     */

    __asm__ __volatile__(

        "   lock                \n"

        "   cmpxchgq    %4,%5   \n"

        "   setz        %2      \n"

:       "=a" (*expected), "=m"(ptr->value), "=q" (ret)

:       "a" (*expected), "r" (newval), "m"(ptr->value)

:       "memory", "cc");

    return (bool) ret;

}


#define PG_HAVE_ATOMIC_FETCH_ADD_U64

static inline uint64

pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)

{

    uint64 res;

    __asm__ __volatile__(

        "   lock                \n"

        "   xaddq   %0,%1       \n"

:       "=q"(res), "=m"(ptr->value)

:       "0" (add_), "m"(ptr->value)

:       "memory", "cc");

    return res;

}


#endif /* __x86_64__ */


#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */


/*

 * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms

 * since at least the 586. As well as on all x86-64 cpus.

 */

#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \

    (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \

    defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */

#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY

#endif /* 8 byte single-copy atomicity */

pg_atomic_compare_exchange_u32_impl
static bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: arch-ppc.h:80

pg_atomic_fetch_add_u32_impl
static uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: arch-ppc.h:131

pg_atomic_uint32
struct pg_atomic_uint32 pg_atomic_uint32

pg_atomic_fetch_add_u64_impl
uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.c:62

pg_atomic_compare_exchange_u64_impl
bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition: atomics.c:34

AssertPointerAlignment
#define AssertPointerAlignment(ptr, bndr)
Definition: c.h:865

int64
int64_t int64
Definition: c.h:499

int32
int32_t int32
Definition: c.h:498

uint64
uint64_t uint64
Definition: c.h:503

uint32
uint32_t uint32
Definition: c.h:502

pg_atomic_uint64
struct pg_atomic_uint64 pg_atomic_uint64

pg_spin_delay_impl
#define pg_spin_delay_impl()
Definition: generic.h:33

newval
#define newval

value
static struct @165 value

pg_atomic_uint32
Definition: arch-ppc.h:30

pg_atomic_uint32::value
volatile uint32 value
Definition: arch-ppc.h:31

pg_atomic_uint64
Definition: fallback.h:27

pg_atomic_uint64::value
volatile uint64 value
Definition: fallback.h:29