[clang] [X86] Do not apply fast-math to the logic intriniscs (PR #118603)
Phoebe Wang via cfe-commits
cfe-commits at lists.llvm.org
Wed Dec 4 00:28:47 PST 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/118603
Fixes: #118152
>From 6ec0b5b1e18e262c8eae131ce6f29866392494f2 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 4 Dec 2024 16:26:12 +0800
Subject: [PATCH] [X86] Do not apply fast-math to the logic intriniscs
Fixes: #118152
---
clang/lib/Headers/avx512dqintrin.h | 3 +++
clang/lib/Headers/avxintrin.h | 3 +++
clang/lib/Headers/xmmintrin.h | 3 +++
3 files changed, 9 insertions(+)
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 88b48e3a32070b..d3ae91e29f9afb 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -167,6 +167,8 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
(__v8di)_mm512_setzero_si512());
}
+#pragma float_control(push)
+#pragma float_control(precise, on)
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_xor_pd(__m512d __A, __m512d __B) {
return (__m512d)((__v8du)__A ^ (__v8du)__B);
@@ -318,6 +320,7 @@ _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_andnot_ps(__A, __B),
(__v16sf)_mm512_setzero_ps());
}
+#pragma float_control(pop)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epi64 (__m512d __A) {
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 8e497a98234994..9fd3dbb5519e51 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -542,6 +542,8 @@ _mm256_rcp_ps(__m256 __a)
/// \returns A 256-bit vector of [8 x float] containing the rounded down values.
#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
+#pragma float_control(push)
+#pragma float_control(precise, on)
/* Logical */
/// Performs a bitwise AND of two 256-bit vectors of [4 x double].
///
@@ -692,6 +694,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
{
return (__m256)((__v8su)__a ^ (__v8su)__b);
}
+#pragma float_control(pop)
/* Horizontal arithmetic */
/// Horizontally adds the adjacent pairs of values contained in two
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 20e66d190113a3..dbc3f0d8df68fe 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -425,6 +425,8 @@ _mm_max_ps(__m128 __a, __m128 __b)
return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
}
+#pragma float_control(push)
+#pragma float_control(precise, on)
/// Performs a bitwise AND of two 128-bit vectors of [4 x float].
///
/// \headerfile <x86intrin.h>
@@ -497,6 +499,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_xor_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4su)__a ^ (__v4su)__b);
}
+#pragma float_control(pop)
/// Compares two 32-bit float values in the low-order bits of both
/// operands for equality.
More information about the cfe-commits
mailing list