From 8a2bd747877ac269d3ae06dc14029f479582acfd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 9 Jun 2023 10:58:20 -0700 Subject: [PATCH] host/include/x86_64: Use __m128i for "x" constraints The macOS catalina compiler produces an error for __int128_t as the type for allocation with SSE inline asm constraint. Create a new X86Int128Union type and use the vector type for all SSE register inputs and outputs. Tested-by: Peter Maydell Signed-off-by: Richard Henderson --- host/include/x86_64/host/atomic128-ldst.h | 25 ++++++++++++------- .../x86_64/host/load-extract-al16-al8.h | 8 +++--- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h index adc9332f91..8d6f909d3c 100644 --- a/host/include/x86_64/host/atomic128-ldst.h +++ b/host/include/x86_64/host/atomic128-ldst.h @@ -8,12 +8,19 @@ * atomic primitive is meant to provide. */ -#ifndef AARCH64_ATOMIC128_LDST_H -#define AARCH64_ATOMIC128_LDST_H +#ifndef X86_64_ATOMIC128_LDST_H +#define X86_64_ATOMIC128_LDST_H #ifdef CONFIG_INT128_TYPE #include "host/cpuinfo.h" #include "tcg/debug-assert.h" +#include + +typedef union { + __m128i v; + __int128_t i; + Int128 s; +} X86Int128Union; /* * Through clang 16, with -mcx16, __atomic_load_n is incorrectly @@ -25,10 +32,10 @@ static inline Int128 atomic16_read_ro(const Int128 *ptr) { - Int128Alias r; + X86Int128Union r; tcg_debug_assert(HAVE_ATOMIC128_RO); - asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr)); + asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr)); return r.s; } @@ -36,10 +43,10 @@ static inline Int128 atomic16_read_ro(const Int128 *ptr) static inline Int128 atomic16_read_rw(Int128 *ptr) { __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); - Int128Alias r; + X86Int128Union r; if (HAVE_ATOMIC128_RO) { - asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align)); + asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align)); } else { r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0); } @@ -49,10 +56,10 @@ static inline Int128 atomic16_read_rw(Int128 *ptr) static inline void atomic16_set(Int128 *ptr, Int128 val) { __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); - Int128Alias new = { .s = val }; + X86Int128Union new = { .s = val }; if (HAVE_ATOMIC128_RO) { - asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i)); + asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.v)); } else { __int128_t old; do { @@ -65,4 +72,4 @@ static inline void atomic16_set(Int128 *ptr, Int128 val) #include "host/include/generic/host/atomic128-ldst.h" #endif -#endif /* AARCH64_ATOMIC128_LDST_H */ +#endif /* X86_64_ATOMIC128_LDST_H */ diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h index 31b6fe8c45..baa506b7b5 100644 --- a/host/include/x86_64/host/load-extract-al16-al8.h +++ b/host/include/x86_64/host/load-extract-al16-al8.h @@ -9,7 +9,7 @@ #define X86_64_LOAD_EXTRACT_AL16_AL8_H #ifdef CONFIG_INT128_TYPE -#include "host/cpuinfo.h" +#include "host/atomic128-ldst.h" /** * load_atom_extract_al16_or_al8: @@ -26,7 +26,7 @@ load_atom_extract_al16_or_al8(void *pv, int s) uintptr_t pi = (uintptr_t)pv; __int128_t *ptr_align = (__int128_t *)(pi & ~7); int shr = (pi & 7) * 8; - Int128Alias r; + X86Int128Union r; /* * ptr_align % 16 is now only 0 or 8. @@ -35,9 +35,9 @@ load_atom_extract_al16_or_al8(void *pv, int s) * when ptr_align % 16 == 0 for 16-byte atomicity. */ if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) { - asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align)); + asm("vmovdqu %1, %0" : "=x" (r.v) : "m" (*ptr_align)); } else { - asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align)); + asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align)); } return int128_getlo(int128_urshift(r.s, shr)); }