[clang] [Headers][X86] Group related AVX512VL FMA intrinsics together (NFC) (PR #156794)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 3 20:48:09 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Chaitanya Koparkar (ckoparkar)
<details>
<summary>Changes</summary>
Follow-up of #<!-- -->156385.
---
Patch is 74.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156794.diff
2 Files Affected:
- (modified) clang/lib/Headers/avx512vlintrin.h (+96-96)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+265-265)
``````````diff
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index aa186c136bb53..ee4b7110e2ce7 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -925,6 +925,12 @@ _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
(__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__A);
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+ return (__m128d)__builtin_ia32_selectpd_128(
+ (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
+}
+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_selectpd_128(
@@ -932,6 +938,12 @@ _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
(__v2df)_mm_setzero_pd());
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+ return (__m128d)__builtin_ia32_selectpd_128(
+ (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
+}
+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
return (__m128d)__builtin_ia32_selectpd_128(
@@ -945,6 +957,18 @@ _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
(__v2df)_mm_setzero_pd());
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+ return (__m128d)__builtin_ia32_selectpd_128(
+ (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+ return (__m128d)__builtin_ia32_selectpd_128(
+ (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
+}
+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_selectpd_128(
@@ -977,6 +1001,12 @@ _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
(__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__A);
}
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+ return (__m256d)__builtin_ia32_selectpd_256(
+ (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
+}
+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_selectpd_256(
@@ -984,6 +1014,12 @@ _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
(__v4df)_mm256_setzero_pd());
}
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+ return (__m256d)__builtin_ia32_selectpd_256(
+ (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
+}
+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
return (__m256d)__builtin_ia32_selectpd_256(
@@ -997,6 +1033,18 @@ _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
(__v4df)_mm256_setzero_pd());
}
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+ return (__m256d)__builtin_ia32_selectpd_256(
+ (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+ return (__m256d)__builtin_ia32_selectpd_256(
+ (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
+}
+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1029,6 +1077,12 @@ _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
(__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__A);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+ return (__m128)__builtin_ia32_selectps_128(
+ (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
+}
+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_selectps_128(
@@ -1036,6 +1090,12 @@ _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
(__v4sf)_mm_setzero_ps());
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+ return (__m128)__builtin_ia32_selectps_128(
+ (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
+}
+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
return (__m128)__builtin_ia32_selectps_128(
@@ -1049,6 +1109,18 @@ _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
(__v4sf)_mm_setzero_ps());
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+ return (__m128)__builtin_ia32_selectps_128(
+ (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+ return (__m128)__builtin_ia32_selectps_128(
+ (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
+}
+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_selectps_128(
@@ -1081,6 +1153,12 @@ _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
(__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__A);
}
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+ return (__m256)__builtin_ia32_selectps_256(
+ (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
+}
+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_selectps_256(
@@ -1088,6 +1166,12 @@ _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
(__v8sf)_mm256_setzero_ps());
}
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+ return (__m256)__builtin_ia32_selectps_256(
+ (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
+}
+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
return (__m256)__builtin_ia32_selectps_256(
@@ -1101,6 +1185,18 @@ _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
(__v8sf)_mm256_setzero_ps());
}
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+ return (__m256)__builtin_ia32_selectps_256(
+ (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+ return (__m256)__builtin_ia32_selectps_256(
+ (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
+}
+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_selectps_256(
@@ -1308,30 +1404,6 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
- return (__m128d)__builtin_ia32_selectpd_128(
- (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
- return (__m256d)__builtin_ia32_selectpd_256(
- (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
- return (__m128)__builtin_ia32_selectps_128(
- (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
- return (__m256)__builtin_ia32_selectps_256(
- (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
-}
-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
@@ -1372,78 +1444,6 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
(__v8sf) __C);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
- return (__m128d)__builtin_ia32_selectpd_128(
- (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
- return (__m256d)__builtin_ia32_selectpd_256(
- (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
- return (__m128)__builtin_ia32_selectps_128(
- (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
- return (__m256)__builtin_ia32_selectps_256(
- (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
- return (__m128d)__builtin_ia32_selectpd_128(
- (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
- return (__m128d)__builtin_ia32_selectpd_128(
- (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
- return (__m256d)__builtin_ia32_selectpd_256(
- (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
- return (__m256d)__builtin_ia32_selectpd_256(
- (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
- return (__m128)__builtin_ia32_selectps_128(
- (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
- return (__m128)__builtin_ia32_selectps_128(
- (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
- return (__m256)__builtin_ia32_selectps_256(
- (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
- return (__m256)__builtin_ia32_selectps_256(
- (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
-}
-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 533896938a205..39250ccefd09d 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -2853,6 +2853,26 @@ __m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __
TEST_CONSTEXPR(match_m128d(_mm_mask_fmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, 14.0));
TEST_CONSTEXPR(match_m128d(_mm_mask_fmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 2.0));
+__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+ // CHECK-LABEL: test_mm_mask3_fmadd_pd
+ // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
+ return _mm_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 14.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 8.0, 6.0));
+
+__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+ // CHECK-LABEL: test_mm_maskz_fmadd_pd
+ // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
+ return _mm_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 14.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 0.0));
+
__m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_mask_fmsub_pd
// CHECK: fneg <2 x double> %{{.*}}
@@ -2864,47 +2884,49 @@ __m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __
TEST_CONSTEXPR(match_m128d(_mm_mask_fmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, 2.0));
TEST_CONSTEXPR(match_m128d(_mm_mask_fmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -2.0, 2.0));
-__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
- // CHECK-LABEL: test_mm_mask3_fmadd_pd
+__m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+ // CHECK-LABEL: test_mm_mask3_fmsub_pd
+ // CHECK: fneg <2 x double> %{{.*}}
// CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
- return _mm_mask3_fmadd_pd(__A, __B, __C, __U);
+ return _mm_mask3_fmsub_pd(__A, __B, __C, __U);
}
-TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 14.0));
-TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 8.0, 6.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 2.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), -2.0, 6.0));
-__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
- // CHECK-LABEL: test_mm_mask3_fnmadd_pd
+__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+ // CHECK-LABEL: test_mm_maskz_fmsub_pd
// CHECK: fneg <2 x double> %{{.*}}
// CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
- return _mm_mask3_fnmadd_pd(__A, __B, __C, __U);
+ return _mm_maskz_fmsub_pd(__U, __A, __B, __C);
}
-TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, -2.0));
-TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 2.0, 6.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 2.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -2.0, 0.0));
-__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
- // CHECK-LABEL: test_mm_maskz_fmadd_pd
+__m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+ // CHECK-LABEL: test_mm_mask_fnmadd_pd
+ // CHECK: fneg <2 x double> %{{.*}}
// CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
- return _mm_maskz_fmadd_pd(__U, __A, __B, __C);
+ return _mm_mask_fnmadd_pd(__A, __U, __B, __C);
}
-TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 14.0));
-TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 0.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask_fnmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, -2.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask_fnmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 2.0, 2.0));
-__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
- // CHECK-LABEL: test_mm_maskz_fmsub_pd
+__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+ // CHECK-LABEL: test_mm_mask3_fnmadd_pd
// CHECK: fneg <2 x double> %{{.*}}
// CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
- return _mm_maskz_fmsub_pd(__U, __A, __B, __C);
+ return _mm_mask3_fnmadd_pd(__A, __B, __C, __U);
}
-TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 2.0));
-TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b01, (__m...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156794
More information about the cfe-commits
mailing list