/*
* Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
* Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
* Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
* Copyright (c) 2009-2016 Ivan Maidanski
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program
* for any purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is granted,
* provided the above notices are retained, and a notice that the code was
* modified is included with the above copyright notice.
*
* Some of the machine specific code was borrowed from our GC distribution.
*/
/* The following really assume we have a 486 or better. */
#include "../all_aligned_atomic_load_store.h"
#include "../test_and_set_t_is_char.h"
#if !defined(AO_USE_PENTIUM4_INSTRS) && !defined(__i386)
/* "mfence" (SSE2) is supported on all x86_64/amd64 chips. */
# define AO_USE_PENTIUM4_INSTRS
#endif
#else
/* We could use the cpuid instruction. But that seems to be slower */
/* than the default implementation based on test_and_set_full. Thus */
/* we omit that bit of misinformation here. */
#endif /* !AO_USE_PENTIUM4_INSTRS */
/* As far as we can tell, the lfence and sfence instructions are not */
/* currently needed or useful for cached memory accesses. */
/* Really only works for 486 and later */
#ifndef AO_PREFER_GENERALIZED
AO_INLINE AO_t
AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
{
AO_t result;
# ifndef AO_NO_CMPXCHG8B
# include "../standard_ao_double_t.h"
/* Reading or writing a quadword aligned on a 64-bit boundary is */
/* always carried out atomically (requires at least a Pentium). */
# define AO_ACCESS_double_CHECK_ALIGNED
# include "../loadstore/double_atomic_load_store.h"
/* Returns nonzero if the comparison succeeded. */
/* Really requires at least a Pentium. */
AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
AO_t old_val1, AO_t old_val2,
AO_t new_val1, AO_t new_val2)
{
AO_t dummy; /* an output for clobbered edx */
char result;
/* Real X86 implementations, except for some old 32-bit WinChips, */
/* appear to enforce ordering between memory operations, EXCEPT that */
/* a later read can pass earlier writes, presumably due to the visible */
/* presence of store buffers. */
/* We ignore both the WinChips and the fact that the official specs */
/* seem to be much weaker (and arguably too weak to be usable). */
#include "../ordered_except_wr.h"