Support ARM64EC ABI
diff --git a/src/threadpool-atomics.h b/src/threadpool-atomics.h
index 44772e2..eaa0707 100644
--- a/src/threadpool-atomics.h
+++ b/src/threadpool-atomics.h
@@ -5,7 +5,7 @@
#include <stdint.h>
/* SSE-specific headers */
-#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) && !defined(_M_ARM64EC)
#include <xmmintrin.h>
#endif
@@ -369,365 +369,6 @@
static inline void pthreadpool_fence_release() {
__sync_synchronize();
}
-#elif defined(_MSC_VER) && defined(_M_X64)
- typedef volatile uint32_t pthreadpool_atomic_uint32_t;
- typedef volatile size_t pthreadpool_atomic_size_t;
- typedef void *volatile pthreadpool_atomic_void_p;
-
- static inline uint32_t pthreadpool_load_relaxed_uint32_t(
- pthreadpool_atomic_uint32_t* address)
- {
- return *address;
- }
-
- static inline size_t pthreadpool_load_relaxed_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return *address;
- }
-
- static inline void* pthreadpool_load_relaxed_void_p(
- pthreadpool_atomic_void_p* address)
- {
- return *address;
- }
-
- static inline uint32_t pthreadpool_load_acquire_uint32_t(
- pthreadpool_atomic_uint32_t* address)
- {
- /* x86-64 loads always have acquire semantics; use only a compiler barrier */
- const uint32_t value = *address;
- _ReadBarrier();
- return value;
- }
-
- static inline size_t pthreadpool_load_acquire_size_t(
- pthreadpool_atomic_size_t* address)
- {
- /* x86-64 loads always have acquire semantics; use only a compiler barrier */
- const size_t value = *address;
- _ReadBarrier();
- return value;
- }
-
- static inline void pthreadpool_store_relaxed_uint32_t(
- pthreadpool_atomic_uint32_t* address,
- uint32_t value)
- {
- *address = value;
- }
-
- static inline void pthreadpool_store_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t value)
- {
- *address = value;
- }
-
- static inline void pthreadpool_store_relaxed_void_p(
- pthreadpool_atomic_void_p* address,
- void* value)
- {
- *address = value;
- }
-
- static inline void pthreadpool_store_release_uint32_t(
- pthreadpool_atomic_uint32_t* address,
- uint32_t value)
- {
- /* x86-64 stores always have release semantics; use only a compiler barrier */
- _WriteBarrier();
- *address = value;
- }
-
- static inline void pthreadpool_store_release_size_t(
- pthreadpool_atomic_size_t* address,
- size_t value)
- {
- /* x86-64 stores always have release semantics; use only a compiler barrier */
- _WriteBarrier();
- *address = value;
- }
-
- static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement64((volatile __int64*) address);
- }
-
- static inline size_t pthreadpool_decrement_fetch_release_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement64((volatile __int64*) address);
- }
-
- static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement64((volatile __int64*) address);
- }
-
- static inline bool pthreadpool_try_decrement_relaxed_size_t(
- pthreadpool_atomic_size_t* value)
- {
- size_t actual_value = *value;
- while (actual_value != 0) {
- const size_t new_value = actual_value - 1;
- const size_t expected_value = actual_value;
- actual_value = _InterlockedCompareExchange64(
- (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
- if (actual_value == expected_value) {
- return true;
- }
- }
- return false;
- }
-
- static inline void pthreadpool_fence_acquire() {
- _mm_lfence();
- _ReadBarrier();
- }
-
- static inline void pthreadpool_fence_release() {
- _WriteBarrier();
- _mm_sfence();
- }
-#elif defined(_MSC_VER) && defined(_M_IX86)
- typedef volatile uint32_t pthreadpool_atomic_uint32_t;
- typedef volatile size_t pthreadpool_atomic_size_t;
- typedef void *volatile pthreadpool_atomic_void_p;
-
- static inline uint32_t pthreadpool_load_relaxed_uint32_t(
- pthreadpool_atomic_uint32_t* address)
- {
- return *address;
- }
-
- static inline size_t pthreadpool_load_relaxed_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return *address;
- }
-
- static inline void* pthreadpool_load_relaxed_void_p(
- pthreadpool_atomic_void_p* address)
- {
- return *address;
- }
-
- static inline uint32_t pthreadpool_load_acquire_uint32_t(
- pthreadpool_atomic_uint32_t* address)
- {
- /* x86 loads always have acquire semantics; use only a compiler barrier */
- const uint32_t value = *address;
- _ReadBarrier();
- return value;
- }
-
- static inline size_t pthreadpool_load_acquire_size_t(
- pthreadpool_atomic_size_t* address)
- {
- /* x86 loads always have acquire semantics; use only a compiler barrier */
- const size_t value = *address;
- _ReadBarrier();
- return value;
- }
-
- static inline void pthreadpool_store_relaxed_uint32_t(
- pthreadpool_atomic_uint32_t* address,
- uint32_t value)
- {
- *address = value;
- }
-
- static inline void pthreadpool_store_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t value)
- {
- *address = value;
- }
-
- static inline void pthreadpool_store_relaxed_void_p(
- pthreadpool_atomic_void_p* address,
- void* value)
- {
- *address = value;
- }
-
- static inline void pthreadpool_store_release_uint32_t(
- pthreadpool_atomic_uint32_t* address,
- uint32_t value)
- {
- /* x86 stores always have release semantics; use only a compiler barrier */
- _WriteBarrier();
- *address = value;
- }
-
- static inline void pthreadpool_store_release_size_t(
- pthreadpool_atomic_size_t* address,
- size_t value)
- {
- /* x86 stores always have release semantics; use only a compiler barrier */
- _WriteBarrier();
- *address = value;
- }
-
- static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement((volatile long*) address);
- }
-
- static inline size_t pthreadpool_decrement_fetch_release_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement((volatile long*) address);
- }
-
- static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement((volatile long*) address);
- }
-
- static inline bool pthreadpool_try_decrement_relaxed_size_t(
- pthreadpool_atomic_size_t* value)
- {
- size_t actual_value = *value;
- while (actual_value != 0) {
- const size_t new_value = actual_value - 1;
- const size_t expected_value = actual_value;
- actual_value = _InterlockedCompareExchange(
- (volatile long*) value, (long) new_value, (long) expected_value);
- if (actual_value == expected_value) {
- return true;
- }
- }
- return false;
- }
-
- static inline void pthreadpool_fence_acquire() {
- _mm_lfence();
- }
-
- static inline void pthreadpool_fence_release() {
- _mm_sfence();
- }
-#elif defined(_MSC_VER) && defined(_M_ARM64)
- typedef volatile uint32_t pthreadpool_atomic_uint32_t;
- typedef volatile size_t pthreadpool_atomic_size_t;
- typedef void *volatile pthreadpool_atomic_void_p;
-
- static inline uint32_t pthreadpool_load_relaxed_uint32_t(
- pthreadpool_atomic_uint32_t* address)
- {
- return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
- }
-
- static inline size_t pthreadpool_load_relaxed_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) __iso_volatile_load64((const volatile __int64*) address);
- }
-
- static inline void* pthreadpool_load_relaxed_void_p(
- pthreadpool_atomic_void_p* address)
- {
- return (void*) __iso_volatile_load64((const volatile __int64*) address);
- }
-
- static inline uint32_t pthreadpool_load_acquire_uint32_t(
- pthreadpool_atomic_uint32_t* address)
- {
- return (uint32_t) __ldar32((volatile unsigned __int32*) address);
- }
-
- static inline size_t pthreadpool_load_acquire_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) __ldar64((volatile unsigned __int64*) address);
- }
-
- static inline void pthreadpool_store_relaxed_uint32_t(
- pthreadpool_atomic_uint32_t* address,
- uint32_t value)
- {
- __iso_volatile_store32((volatile __int32*) address, (__int32) value);
- }
-
- static inline void pthreadpool_store_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t value)
- {
- __iso_volatile_store64((volatile __int64*) address, (__int64) value);
- }
-
- static inline void pthreadpool_store_relaxed_void_p(
- pthreadpool_atomic_void_p* address,
- void* value)
- {
- __iso_volatile_store64((volatile __int64*) address, (__int64) value);
- }
-
- static inline void pthreadpool_store_release_uint32_t(
- pthreadpool_atomic_uint32_t* address,
- uint32_t value)
- {
- _WriteBarrier();
- __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
- }
-
- static inline void pthreadpool_store_release_size_t(
- pthreadpool_atomic_size_t* address,
- size_t value)
- {
- _WriteBarrier();
- __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
- }
-
- static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
- }
-
- static inline size_t pthreadpool_decrement_fetch_release_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
- }
-
- static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
- pthreadpool_atomic_size_t* address)
- {
- return (size_t) _InterlockedDecrement64((volatile __int64*) address);
- }
-
- static inline bool pthreadpool_try_decrement_relaxed_size_t(
- pthreadpool_atomic_size_t* value)
- {
- size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
- while (actual_value != 0) {
- const size_t new_value = actual_value - 1;
- const size_t expected_value = actual_value;
- actual_value = _InterlockedCompareExchange64_nf(
- (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
- if (actual_value == expected_value) {
- return true;
- }
- }
- return false;
- }
-
- static inline void pthreadpool_fence_acquire() {
- __dmb(_ARM64_BARRIER_ISHLD);
- _ReadBarrier();
- }
-
- static inline void pthreadpool_fence_release() {
- _WriteBarrier();
- __dmb(_ARM64_BARRIER_ISH);
- }
#elif defined(_MSC_VER) && defined(_M_ARM)
typedef volatile uint32_t pthreadpool_atomic_uint32_t;
typedef volatile size_t pthreadpool_atomic_size_t;
@@ -851,15 +492,370 @@
_WriteBarrier();
__dmb(_ARM_BARRIER_ISH);
}
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+ typedef volatile uint32_t pthreadpool_atomic_uint32_t;
+ typedef volatile size_t pthreadpool_atomic_size_t;
+ typedef void *volatile pthreadpool_atomic_void_p;
+
+ static inline uint32_t pthreadpool_load_relaxed_uint32_t(
+ pthreadpool_atomic_uint32_t* address)
+ {
+ return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
+ }
+
+ static inline size_t pthreadpool_load_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) __iso_volatile_load64((const volatile __int64*) address);
+ }
+
+ static inline void* pthreadpool_load_relaxed_void_p(
+ pthreadpool_atomic_void_p* address)
+ {
+ return (void*) __iso_volatile_load64((const volatile __int64*) address);
+ }
+
+ static inline uint32_t pthreadpool_load_acquire_uint32_t(
+ pthreadpool_atomic_uint32_t* address)
+ {
+ return (uint32_t) __ldar32((volatile unsigned __int32*) address);
+ }
+
+ static inline size_t pthreadpool_load_acquire_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) __ldar64((volatile unsigned __int64*) address);
+ }
+
+ static inline void pthreadpool_store_relaxed_uint32_t(
+ pthreadpool_atomic_uint32_t* address,
+ uint32_t value)
+ {
+ __iso_volatile_store32((volatile __int32*) address, (__int32) value);
+ }
+
+ static inline void pthreadpool_store_relaxed_size_t(
+ pthreadpool_atomic_size_t* address,
+ size_t value)
+ {
+ __iso_volatile_store64((volatile __int64*) address, (__int64) value);
+ }
+
+ static inline void pthreadpool_store_relaxed_void_p(
+ pthreadpool_atomic_void_p* address,
+ void* value)
+ {
+ __iso_volatile_store64((volatile __int64*) address, (__int64) value);
+ }
+
+ static inline void pthreadpool_store_release_uint32_t(
+ pthreadpool_atomic_uint32_t* address,
+ uint32_t value)
+ {
+ _WriteBarrier();
+ __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
+ }
+
+ static inline void pthreadpool_store_release_size_t(
+ pthreadpool_atomic_size_t* address,
+ size_t value)
+ {
+ _WriteBarrier();
+ __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_release_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement64((volatile __int64*) address);
+ }
+
+ static inline bool pthreadpool_try_decrement_relaxed_size_t(
+ pthreadpool_atomic_size_t* value)
+ {
+ size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
+ while (actual_value != 0) {
+ const size_t new_value = actual_value - 1;
+ const size_t expected_value = actual_value;
+ actual_value = _InterlockedCompareExchange64_nf(
+ (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
+ if (actual_value == expected_value) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static inline void pthreadpool_fence_acquire() {
+ __dmb(_ARM64_BARRIER_ISHLD);
+ _ReadBarrier();
+ }
+
+ static inline void pthreadpool_fence_release() {
+ _WriteBarrier();
+ __dmb(_ARM64_BARRIER_ISH);
+ }
+#elif defined(_MSC_VER) && defined(_M_IX86)
+ typedef volatile uint32_t pthreadpool_atomic_uint32_t;
+ typedef volatile size_t pthreadpool_atomic_size_t;
+ typedef void *volatile pthreadpool_atomic_void_p;
+
+ static inline uint32_t pthreadpool_load_relaxed_uint32_t(
+ pthreadpool_atomic_uint32_t* address)
+ {
+ return *address;
+ }
+
+ static inline size_t pthreadpool_load_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return *address;
+ }
+
+ static inline void* pthreadpool_load_relaxed_void_p(
+ pthreadpool_atomic_void_p* address)
+ {
+ return *address;
+ }
+
+ static inline uint32_t pthreadpool_load_acquire_uint32_t(
+ pthreadpool_atomic_uint32_t* address)
+ {
+ /* x86 loads always have acquire semantics; use only a compiler barrier */
+ const uint32_t value = *address;
+ _ReadBarrier();
+ return value;
+ }
+
+ static inline size_t pthreadpool_load_acquire_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ /* x86 loads always have acquire semantics; use only a compiler barrier */
+ const size_t value = *address;
+ _ReadBarrier();
+ return value;
+ }
+
+ static inline void pthreadpool_store_relaxed_uint32_t(
+ pthreadpool_atomic_uint32_t* address,
+ uint32_t value)
+ {
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_relaxed_size_t(
+ pthreadpool_atomic_size_t* address,
+ size_t value)
+ {
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_relaxed_void_p(
+ pthreadpool_atomic_void_p* address,
+ void* value)
+ {
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_release_uint32_t(
+ pthreadpool_atomic_uint32_t* address,
+ uint32_t value)
+ {
+ /* x86 stores always have release semantics; use only a compiler barrier */
+ _WriteBarrier();
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_release_size_t(
+ pthreadpool_atomic_size_t* address,
+ size_t value)
+ {
+ /* x86 stores always have release semantics; use only a compiler barrier */
+ _WriteBarrier();
+ *address = value;
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement((volatile long*) address);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_release_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement((volatile long*) address);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement((volatile long*) address);
+ }
+
+ static inline bool pthreadpool_try_decrement_relaxed_size_t(
+ pthreadpool_atomic_size_t* value)
+ {
+ size_t actual_value = *value;
+ while (actual_value != 0) {
+ const size_t new_value = actual_value - 1;
+ const size_t expected_value = actual_value;
+ actual_value = _InterlockedCompareExchange(
+ (volatile long*) value, (long) new_value, (long) expected_value);
+ if (actual_value == expected_value) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static inline void pthreadpool_fence_acquire() {
+ _mm_lfence();
+ }
+
+ static inline void pthreadpool_fence_release() {
+ _mm_sfence();
+ }
+#elif defined(_MSC_VER) && defined(_M_X64)
+ typedef volatile uint32_t pthreadpool_atomic_uint32_t;
+ typedef volatile size_t pthreadpool_atomic_size_t;
+ typedef void *volatile pthreadpool_atomic_void_p;
+
+ static inline uint32_t pthreadpool_load_relaxed_uint32_t(
+ pthreadpool_atomic_uint32_t* address)
+ {
+ return *address;
+ }
+
+ static inline size_t pthreadpool_load_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return *address;
+ }
+
+ static inline void* pthreadpool_load_relaxed_void_p(
+ pthreadpool_atomic_void_p* address)
+ {
+ return *address;
+ }
+
+ static inline uint32_t pthreadpool_load_acquire_uint32_t(
+ pthreadpool_atomic_uint32_t* address)
+ {
+ /* x86-64 loads always have acquire semantics; use only a compiler barrier */
+ const uint32_t value = *address;
+ _ReadBarrier();
+ return value;
+ }
+
+ static inline size_t pthreadpool_load_acquire_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ /* x86-64 loads always have acquire semantics; use only a compiler barrier */
+ const size_t value = *address;
+ _ReadBarrier();
+ return value;
+ }
+
+ static inline void pthreadpool_store_relaxed_uint32_t(
+ pthreadpool_atomic_uint32_t* address,
+ uint32_t value)
+ {
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_relaxed_size_t(
+ pthreadpool_atomic_size_t* address,
+ size_t value)
+ {
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_relaxed_void_p(
+ pthreadpool_atomic_void_p* address,
+ void* value)
+ {
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_release_uint32_t(
+ pthreadpool_atomic_uint32_t* address,
+ uint32_t value)
+ {
+ /* x86-64 stores always have release semantics; use only a compiler barrier */
+ _WriteBarrier();
+ *address = value;
+ }
+
+ static inline void pthreadpool_store_release_size_t(
+ pthreadpool_atomic_size_t* address,
+ size_t value)
+ {
+ /* x86-64 stores always have release semantics; use only a compiler barrier */
+ _WriteBarrier();
+ *address = value;
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement64((volatile __int64*) address);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_release_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement64((volatile __int64*) address);
+ }
+
+ static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
+ pthreadpool_atomic_size_t* address)
+ {
+ return (size_t) _InterlockedDecrement64((volatile __int64*) address);
+ }
+
+ static inline bool pthreadpool_try_decrement_relaxed_size_t(
+ pthreadpool_atomic_size_t* value)
+ {
+ size_t actual_value = *value;
+ while (actual_value != 0) {
+ const size_t new_value = actual_value - 1;
+ const size_t expected_value = actual_value;
+ actual_value = _InterlockedCompareExchange64(
+ (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
+ if (actual_value == expected_value) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static inline void pthreadpool_fence_acquire() {
+ _mm_lfence();
+ _ReadBarrier();
+ }
+
+ static inline void pthreadpool_fence_release() {
+ _WriteBarrier();
+ _mm_sfence();
+ }
#else
#error "Platform-specific implementation of threadpool-atomics.h required"
#endif
-#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
- static inline void pthreadpool_yield() {
- _mm_pause();
- }
-#elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#if defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC))
static inline void pthreadpool_yield() {
__yield();
}
@@ -867,6 +863,10 @@
static inline void pthreadpool_yield() {
__asm__ __volatile__("yield");
}
+#elif defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+ static inline void pthreadpool_yield() {
+ _mm_pause();
+ }
#else
static inline void pthreadpool_yield() {
pthreadpool_fence_acquire();
diff --git a/src/threadpool-utils.h b/src/threadpool-utils.h
index 91e2445..0b81360 100644
--- a/src/threadpool-utils.h
+++ b/src/threadpool-utils.h
@@ -4,7 +4,7 @@
#include <stddef.h>
/* SSE-specific headers */
-#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) && !defined(_M_ARM64EC) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
#include <xmmintrin.h>
#endif
@@ -15,12 +15,12 @@
struct fpu_state {
-#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
- uint32_t mxcsr;
-#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) || defined(_MSC_VER) && defined(_M_ARM)
+#if defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) || defined(_MSC_VER) && defined(_M_ARM)
uint32_t fpscr;
-#elif defined(__GNUC__) && defined(__aarch64__) || defined(_MSC_VER) && defined(_M_ARM64)
+#elif defined(__GNUC__) && defined(__aarch64__) || defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
uint64_t fpcr;
+#elif defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+ uint32_t mxcsr;
#else
char unused;
#endif
@@ -28,12 +28,12 @@
static inline struct fpu_state get_fpu_state() {
struct fpu_state state = { 0 };
-#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
- state.mxcsr = (uint32_t) _mm_getcsr();
-#elif defined(_MSC_VER) && defined(_M_ARM)
+#if defined(_MSC_VER) && defined(_M_ARM)
state.fpscr = (uint32_t) _MoveFromCoprocessor(10, 7, 1, 0, 0);
-#elif defined(_MSC_VER) && defined(_M_ARM64)
+#elif defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
state.fpcr = (uint64_t) _ReadStatusReg(0x5A20);
+#elif defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+ state.mxcsr = (uint32_t) _mm_getcsr();
#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
__asm__ __volatile__("VMRS %[fpscr], fpscr" : [fpscr] "=r" (state.fpscr));
#elif defined(__GNUC__) && defined(__aarch64__)
@@ -43,27 +43,25 @@
}
static inline void set_fpu_state(const struct fpu_state state) {
-#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
- _mm_setcsr((unsigned int) state.mxcsr);
-#elif defined(_MSC_VER) && defined(_M_ARM)
+#if defined(_MSC_VER) && defined(_M_ARM)
_MoveToCoprocessor((int) state.fpscr, 10, 7, 1, 0, 0);
-#elif defined(_MSC_VER) && defined(_M_ARM64)
+#elif defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
_WriteStatusReg(0x5A20, (__int64) state.fpcr);
#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
__asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [fpscr] "r" (state.fpscr));
#elif defined(__GNUC__) && defined(__aarch64__)
__asm__ __volatile__("MSR fpcr, %[fpcr]" : : [fpcr] "r" (state.fpcr));
+#elif defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+ _mm_setcsr((unsigned int) state.mxcsr);
#endif
}
static inline void disable_fpu_denormals() {
-#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
- _mm_setcsr(_mm_getcsr() | 0x8040);
-#elif defined(_MSC_VER) && defined(_M_ARM)
+#if defined(_MSC_VER) && defined(_M_ARM)
int fpscr = _MoveFromCoprocessor(10, 7, 1, 0, 0);
fpscr |= 0x1000000;
_MoveToCoprocessor(fpscr, 10, 7, 1, 0, 0);
-#elif defined(_MSC_VER) && defined(_M_ARM64)
+#elif defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
__int64 fpcr = _ReadStatusReg(0x5A20);
fpcr |= 0x1080000;
_WriteStatusReg(0x5A20, fpcr);
@@ -92,6 +90,8 @@
"ORR %w[fpcr], %w[fpcr], 0x80000\n"
"MSR fpcr, %[fpcr]\n"
: [fpcr] "=r" (fpcr));
+#elif defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+ _mm_setcsr(_mm_getcsr() | 0x8040);
#endif
}