[clang] [Headers][X86] Allow FMA intrinsics to be used in constexpr (PR #156385)
Chaitanya Koparkar via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 1 19:25:40 PDT 2025
https://github.com/ckoparkar created https://github.com/llvm/llvm-project/pull/156385
Fixes #155265
Add constexpr support for the following:
_mm512_fmadd_pd _mm512_mask_fmadd_pd _mm512_mask3_fmadd_pd _mm512_maskz_fmadd_pd _mm512_fmadd_ps _mm512_mask_fmadd_ps _mm512_mask3_fmadd_ps _mm512_maskz_fmadd_ps _mm_mask_fmadd_pd _mm_mask3_fmadd_pd _mm_maskz_fmadd_pd _mm_mask_fmadd_ps _mm_mask3_fmadd_ps _mm_maskz_fmadd_ps _mm256_mask_fmadd_pd _mm256_mask3_fmadd_pd _mm256_maskz_fmadd_pd _mm256_mask_fmadd_ps _mm256_mask3_fmadd_ps _mm256_maskz_fmadd_ps
_mm512_fmsub_pd _mm512_mask_fmsub_pd _mm512_mask3_fmsub_pd _mm512_maskz_fmsub_pd _mm512_fmsub_ps _mm512_mask_fmsub_ps _mm512_mask3_fmsub_ps _mm512_maskz_fmsub_ps _mm_mask_fmsub_pd _mm_mask3_fmsub_pd _mm_maskz_fmsub_pd _mm_mask_fmsub_ps _mm_mask3_fmsub_ps _mm_maskz_fmsub_ps _mm256_mask_fmsub_pd _mm256_mask3_fmsub_pd _mm256_maskz_fmsub_pd _mm256_mask_fmsub_ps _mm256_mask3_fmsub_ps _mm256_maskz_fmsub_ps
_mm512_fnmadd_pd _mm512_mask_fnmadd_pd _mm512_mask3_fnmadd_pd _mm512_maskz_fnmadd_pd _mm512_fnmsub_pd _mm512_mask_fnmsub_pd _mm512_mask3_fnmsub_pd _mm512_maskz_fnmsub_pd _mm_mask_fnmadd_pd _mm_mask3_fnmadd_pd _mm_maskz_fnmadd_pd _mm_mask_fnmadd_ps _mm_mask3_fnmadd_ps _mm_maskz_fnmadd_ps _mm256_mask_fnmadd_pd _mm256_mask3_fnmadd_pd _mm256_maskz_fnmadd_pd _mm256_mask_fnmadd_ps _mm256_mask3_fnmadd_ps _mm256_maskz_fnmadd_ps
_mm512_fnmadd_ps _mm512_mask_fnmadd_ps _mm512_mask3_fnmadd_ps _mm512_maskz_fnmadd_ps _mm512_fnmsub_ps _mm512_mask_fnmsub_ps _mm512_mask3_fnmsub_ps _mm512_maskz_fnmsub_ps _mm_mask_fnmsub_pd _mm_mask3_fnmsub_pd _mm_maskz_fnmsub_pd _mm_mask_fnmsub_ps _mm_mask3_fnmsub_ps _mm_maskz_fnmsub_ps _mm256_mask_fnmsub_pd _mm256_mask3_fnmsub_pd _mm256_maskz_fnmsub_pd _mm256_mask_fnmsub_ps _mm256_mask3_fnmsub_ps _mm256_maskz_fnmsub_ps
>From 2eb5209645af0d3cf0a9de892661465a8c8c4e8b Mon Sep 17 00:00:00 2001
From: Chaitanya Koparkar <ckoparkar at gmail.com>
Date: Mon, 1 Sep 2025 12:34:18 -0400
Subject: [PATCH] [Headers][X86] Allow FMA intrinsics to be used in constexpr
Fixes #155265
Add constexpr support for the following:
_mm512_fmadd_pd _mm512_mask_fmadd_pd _mm512_mask3_fmadd_pd _mm512_maskz_fmadd_pd
_mm512_fmadd_ps _mm512_mask_fmadd_ps _mm512_mask3_fmadd_ps _mm512_maskz_fmadd_ps
_mm_mask_fmadd_pd _mm_mask3_fmadd_pd _mm_maskz_fmadd_pd
_mm_mask_fmadd_ps _mm_mask3_fmadd_ps _mm_maskz_fmadd_ps
_mm256_mask_fmadd_pd _mm256_mask3_fmadd_pd _mm256_maskz_fmadd_pd
_mm256_mask_fmadd_ps _mm256_mask3_fmadd_ps _mm256_maskz_fmadd_ps
_mm512_fmsub_pd _mm512_mask_fmsub_pd _mm512_mask3_fmsub_pd _mm512_maskz_fmsub_pd
_mm512_fmsub_ps _mm512_mask_fmsub_ps _mm512_mask3_fmsub_ps _mm512_maskz_fmsub_ps
_mm_mask_fmsub_pd _mm_mask3_fmsub_pd _mm_maskz_fmsub_pd
_mm_mask_fmsub_ps _mm_mask3_fmsub_ps _mm_maskz_fmsub_ps
_mm256_mask_fmsub_pd _mm256_mask3_fmsub_pd _mm256_maskz_fmsub_pd
_mm256_mask_fmsub_ps _mm256_mask3_fmsub_ps _mm256_maskz_fmsub_ps
_mm512_fnmadd_pd _mm512_mask_fnmadd_pd _mm512_mask3_fnmadd_pd _mm512_maskz_fnmadd_pd
_mm512_fnmsub_pd _mm512_mask_fnmsub_pd _mm512_mask3_fnmsub_pd _mm512_maskz_fnmsub_pd
_mm_mask_fnmadd_pd _mm_mask3_fnmadd_pd _mm_maskz_fnmadd_pd
_mm_mask_fnmadd_ps _mm_mask3_fnmadd_ps _mm_maskz_fnmadd_ps
_mm256_mask_fnmadd_pd _mm256_mask3_fnmadd_pd _mm256_maskz_fnmadd_pd
_mm256_mask_fnmadd_ps _mm256_mask3_fnmadd_ps _mm256_maskz_fnmadd_ps
_mm512_fnmadd_ps _mm512_mask_fnmadd_ps _mm512_mask3_fnmadd_ps _mm512_maskz_fnmadd_ps
_mm512_fnmsub_ps _mm512_mask_fnmsub_ps _mm512_mask3_fnmsub_ps _mm512_maskz_fnmsub_ps
_mm_mask_fnmsub_pd _mm_mask3_fnmsub_pd _mm_maskz_fnmsub_pd
_mm_mask_fnmsub_ps _mm_mask3_fnmsub_ps _mm_maskz_fnmsub_ps
_mm256_mask_fnmsub_pd _mm256_mask3_fnmsub_pd _mm256_maskz_fnmsub_pd
_mm256_mask_fnmsub_ps _mm256_mask3_fnmsub_ps _mm256_maskz_fnmsub_ps
---
clang/lib/Headers/avx512fintrin.h | 392 +++++++++------------
clang/lib/Headers/avx512vlintrin.h | 98 +++---
clang/test/CodeGen/X86/avx512f-builtins.c | 208 +++++++----
clang/test/CodeGen/X86/avx512vl-builtins.c | 96 +++++
4 files changed, 445 insertions(+), 349 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e23b1c0381ab1..741ce26aaa043 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -2502,124 +2502,136 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __A);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __A);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma((__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- -(__v8df) __B,
- (__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, (__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- -(__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) __C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(
+ (__mmask8) __U,
+ __builtin_elementwise_fma(-(__v8df) __A, (__v8df) __B, -(__v8df) __C),
+ (__v8df) _mm512_setzero_pd());
}
#define _mm512_fmadd_round_ps(A, B, C, R) \
@@ -2706,124 +2718,136 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
(__mmask16)(U), (int)(R)))
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512 ) __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__A);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__A);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- -(__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- -(__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_selectps_512(
+ (__mmask16)__U,
+ __builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
+ (__v16sf)_mm512_setzero_ps());
}
#define _mm512_fmaddsub_round_pd(A, B, C, R) \
@@ -3071,15 +3095,6 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
- return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
@@ -3087,16 +3102,6 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
(__v16sf)(__m512)(C), \
(__mmask16)(U), (int)(R)))
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
- return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(B), \
@@ -3138,16 +3143,6 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- -(__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
-(__v16sf)(__m512)(B), \
@@ -3155,16 +3150,6 @@ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
(__mmask16)(U), (int)(R)))
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- -(__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
-(__v8df)(__m512d)(B), \
@@ -3179,26 +3164,6 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
- -(__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
- return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
- (__v8df) __B,
- (__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
-(__v16sf)(__m512)(B), \
@@ -3213,27 +3178,6 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
(__mmask16)(U), (int)(R)))
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
- -(__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
- return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-
/* Vector permutations */
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index fa66d7cba632a..f9765a99a08e0 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -902,7 +902,7 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
(__v2df)(__m128d)(b), (int)(p), \
(__mmask8)(m)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -911,7 +911,7 @@ _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
(__v2df)__A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -920,7 +920,7 @@ _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
(__v2df)__C);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -929,7 +929,7 @@ _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -938,7 +938,7 @@ _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
(__v2df)__A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -947,7 +947,7 @@ _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -956,7 +956,7 @@ _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
(__v2df)__C);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -965,7 +965,7 @@ _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -974,7 +974,7 @@ _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -983,7 +983,7 @@ _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
(__v4df)__A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -992,7 +992,7 @@ _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
(__v4df)__C);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1001,7 +1001,7 @@ _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1010,7 +1010,7 @@ _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
(__v4df)__A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1019,7 +1019,7 @@ _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1028,7 +1028,7 @@ _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
(__v4df)__C);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1037,7 +1037,7 @@ _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1046,7 +1046,7 @@ _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1055,7 +1055,7 @@ _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
(__v4sf)__A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1064,7 +1064,7 @@ _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
(__v4sf)__C);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1073,7 +1073,7 @@ _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1082,7 +1082,7 @@ _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
(__v4sf)__A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1091,7 +1091,7 @@ _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1100,7 +1100,7 @@ _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
(__v4sf)__C);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1109,7 +1109,7 @@ _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1118,7 +1118,7 @@ _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1127,7 +1127,7 @@ _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
(__v8sf)__A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1136,7 +1136,7 @@ _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
(__v8sf)__C);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1145,7 +1145,7 @@ _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1154,7 +1154,7 @@ _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
(__v8sf)__A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1163,7 +1163,7 @@ _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1172,7 +1172,7 @@ _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
(__v8sf)__C);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1181,7 +1181,7 @@ _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1190,7 +1190,7 @@ _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1391,7 +1391,7 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -1400,7 +1400,7 @@ _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
(__v2df)__C);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1409,7 +1409,7 @@ _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
(__v4df)__C);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1418,7 +1418,7 @@ _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
(__v4sf)__C);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1467,7 +1467,7 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
(__v8sf) __C);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -1476,7 +1476,7 @@ _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
(__v2df)__A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1485,7 +1485,7 @@ _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
(__v4df)__A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1494,7 +1494,7 @@ _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
(__v4sf)__A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1503,7 +1503,7 @@ _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
(__v8sf)__A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -1512,7 +1512,7 @@ _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
(__v2df)__A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
{
return (__m128d)__builtin_ia32_selectpd_128(
@@ -1521,7 +1521,7 @@ _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
(__v2df)__C);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1530,7 +1530,7 @@ _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
(__v4df)__A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
{
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1539,7 +1539,7 @@ _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
(__v4df)__C);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1548,7 +1548,7 @@ _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
(__v4sf)__A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
{
return (__m128)__builtin_ia32_selectps_128(
@@ -1557,7 +1557,7 @@ _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
(__v4sf)__C);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_selectps_256(
@@ -1566,7 +1566,7 @@ _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
(__v8sf)__A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
{
return (__m256)__builtin_ia32_selectps_256(
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index c32ff697cba98..67ea49a48b7e8 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -608,6 +608,7 @@ __m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
return _mm512_fmadd_pd(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 86.0, 106.0, 128.0, 152.0));
__m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_mask_fmadd_pd
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
@@ -615,6 +616,8 @@ __m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_fmadd_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 86.0, 106.0, 128.0, 152.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 5.0, 6.0, 7.0, 8.0));
__m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm512_mask3_fmadd_pd
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
@@ -622,19 +625,24 @@ __m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask3_fmadd_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 86.0, 106.0, 128.0, 152.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 26.0, 38.0, 52.0, 68.0, 21.0, 22.0, 23.0, 24.0));
__m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_maskz_fmadd_pd
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double>
return _mm512_maskz_fmadd_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmadd_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 86.0, 106.0, 128.0, 152.0));
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmadd_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 0.0, 0.0, 0.0, 0.0));
__m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fmsub_pd
// CHECK: fneg <8 x double> %{{.*}}
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
return _mm512_fmsub_pd(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 44.0, 62.0, 82.0, 104.0));
__m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_mask_fmsub_pd
// CHECK: fneg <8 x double> %{{.*}}
@@ -643,20 +651,45 @@ __m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_fmsub_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 44.0, 62.0, 82.0, 104.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 5.0, 6.0, 7.0, 8.0));
__m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_maskz_fmsub_pd
// CHECK: fneg <8 x double> %{{.*}}
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double>
return _mm512_maskz_fmsub_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmsub_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 44.0, 62.0, 82.0, 104.0));
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fmsub_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 0.0, 0.0, 0.0, 0.0));
+__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+ // CHECK-LABEL: test_mm512_mask3_fmsub_pd
+ // CHECK: fneg <8 x double> %{{.*}}
+ // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 44.0, 62.0, 82.0, 104.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -8.0, 2.0, 14.0, 28.0, 21.0, 22.0, 23.0, 24.0));
__m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fnmadd_pd
// CHECK: fneg <8 x double> %{{.*}}
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
return _mm512_fnmadd_pd(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, -44.0, -62.0, -82.0, -104.0));
+__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+ // CHECK-LABEL: test_mm512_mask_fnmadd_pd
+ // CHECK: fneg <8 x double> %{{.*}}
+ // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -44.0, -62.0, -82.0, -104.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 5.0, 6.0, 7.0, 8.0));
__m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm512_mask3_fnmadd_pd
// CHECK: fneg <8 x double> %{{.*}}
@@ -665,14 +698,18 @@ __m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmas
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -44.0, -62.0, -82.0, -104.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmadd_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 8.0, -2.0, -14.0, -28.0, 21.0, 22.0, 23.0, 24.0));
__m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_maskz_fnmadd_pd
// CHECK: fneg <8 x double> %{{.*}}
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double>
return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmadd_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 00.0, -44.0, -62.0, -82.0, -104.0));
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmadd_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 0.0, 0.0, 0.0, 0.0));
__m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fnmsub_pd
// CHECK: fneg <8 x double> %{{.*}}
@@ -680,15 +717,40 @@ __m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
return _mm512_fnmsub_pd(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, -86.0, -106.0, -128.0, -152.0));
+__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+ // CHECK-LABEL: test_mm512_mask_fnmsub_pd
+ // CHECK: fneg <8 x double> %{{.*}}
+ // CHECK: fneg <8 x double> %{{.*}}
+ // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -86.0, -106.0, -128.0, -152.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 5.0, 6.0, 7.0, 8.0));
+__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+ // CHECK-LABEL: test_mm512_mask3_fnmsub_pd
+ // CHECK: fneg <8 x double> %{{.*}}
+ // CHECK: fneg <8 x double> %{{.*}}
+ // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -86.0, -106.0, -128.0, -152.0));
+TEST_CONSTEXPR(match_m512d(_mm512_mask3_fnmsub_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -26.0, -38.0, -52.0, -68.0, 21.0, 22.0, 23.0, 24.0));
__m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_maskz_fnmsub_pd
// CHECK: fneg <8 x double> %{{.*}}
// CHECK: fneg <8 x double> %{{.*}}
// CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double>
return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmsub_pd((__mmask8)0b11110000, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, -86.0, -106.0, -128.0, -152.0));
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_fnmsub_pd((__mmask8)0b00001111, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512d){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 0.0, 0.0, 0.0, 0.0));
__m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_fmadd_round_ps
// CHECK: @llvm.x86.avx512.vfmadd.ps.512
@@ -780,6 +842,7 @@ __m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
return _mm512_fmadd_ps(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0));
__m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_mask_fmadd_ps
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
@@ -787,6 +850,8 @@ __m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_fmadd_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0));
__m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
// CHECK-LABEL: test_mm512_mask3_fmadd_ps
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
@@ -794,19 +859,24 @@ __m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 _
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask3_fmadd_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0));
__m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_maskz_fmadd_ps
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float>
return _mm512_maskz_fmadd_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fmadd_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 266.0, 302.0, 340.0, 380.0, 422.0, 466.0, 512.0, 560.0));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fmadd_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 50.0, 70.0, 92.0, 116.0, 142.0, 170.0, 200.0, 232.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0));
__m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_fmsub_ps
// CHECK: fneg <16 x float> %{{.*}}
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
return _mm512_fmsub_ps(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0));
__m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_mask_fmsub_ps
// CHECK: fneg <16 x float> %{{.*}}
@@ -815,20 +885,45 @@ __m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_fmsub_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0));
+__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+ // CHECK-LABEL: test_mm512_mask3_fmsub_ps
+ // CHECK: fneg <16 x float> %{{.*}}
+ // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
+ // CHECK: bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0));
__m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_maskz_fmsub_ps
// CHECK: fneg <16 x float> %{{.*}}
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float>
return _mm512_maskz_fmsub_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fmsub_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 184.0, 218.0, 254.0, 292.0, 332.0, 374.0, 418.0, 464.0));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fmsub_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -16.0, 2.0, 22.0, 44.0, 68.0, 94.0, 122.0, 152.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0));
__m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_fnmadd_ps
// CHECK: fneg <16 x float> %{{.*}}
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
return _mm512_fnmadd_ps(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0));
+__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+ // CHECK-LABEL: test_mm512_mask_fnmadd_ps
+ // CHECK: fneg <16 x float> %{{.*}}
+ // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
+ // CHECK: bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+TEST_CONSTEXPR(match_m512(_mm512_mask_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0));
__m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
// CHECK-LABEL: test_mm512_mask3_fnmadd_ps
// CHECK: fneg <16 x float> %{{.*}}
@@ -837,14 +932,18 @@ __m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmadd_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0));
__m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_maskz_fnmadd_ps
// CHECK: fneg <16 x float> %{{.*}}
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float>
return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmadd_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -184.0, -218.0, -254.0, -292.0, -332.0, -374.0, -418.0, -464.0));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmadd_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 16.0, -2.0, -22.0, -44.0, -68.0, -94.0, -122.0, -152.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0));
__m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_fnmsub_ps
// CHECK: fneg <16 x float> %{{.*}}
@@ -852,15 +951,40 @@ __m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
return _mm512_fnmsub_ps(__A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0));
+__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+ // CHECK-LABEL: test_mm512_mask_fnmsub_ps
+ // CHECK: fneg <16 x float> %{{.*}}
+ // CHECK: fneg <16 x float> %{{.*}}
+ // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
+ // CHECK: bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+TEST_CONSTEXPR(match_m512(_mm512_mask_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b1111111100000000, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__mmask16)0b0000000011111111, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0));
+__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+ // CHECK-LABEL: test_mm512_mask3_fnmsub_ps
+ // CHECK: fneg <16 x float> %{{.*}}
+ // CHECK: fneg <16 x float> %{{.*}}
+ // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
+ // CHECK: bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b1111111100000000), 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0));
+TEST_CONSTEXPR(match_m512(_mm512_mask3_fnmsub_ps((__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}, (__mmask16)0b0000000011111111), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0));
__m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_maskz_fnmsub_ps
// CHECK: fneg <16 x float> %{{.*}}
// CHECK: fneg <16 x float> %{{.*}}
// CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float>
return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmsub_ps((__mmask16)0b1111111100000000, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -266.0, -302.0, -340.0, -380.0, -422.0, -466.0, -512.0, -560.0));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_fnmsub_ps((__mmask16)0b0000000011111111, (__m512){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m512){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0}, (__m512){33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0}), -50.0, -70.0, -92.0, -116.0, -142.0, -170.0, -200.0, -232.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0));
__m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fmaddsub_round_pd
// CHECK: @llvm.x86.avx512.vfmaddsub.pd.512
@@ -1069,14 +1193,6 @@ __m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, _
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
-__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
- // CHECK-LABEL: test_mm512_mask3_fmsub_pd
- // CHECK: fneg <8 x double> %{{.*}}
- // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_mask3_fmsub_pd(__A, __B, __C, __U);
-}
__m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
// CHECK-LABEL: test_mm512_mask3_fmsub_round_ps
// CHECK: fneg <16 x float> %{{.*}}
@@ -1085,14 +1201,6 @@ __m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mma
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
-__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
- // CHECK-LABEL: test_mm512_mask3_fmsub_ps
- // CHECK: fneg <16 x float> %{{.*}}
- // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_mask3_fmsub_ps(__A, __B, __C, __U);
-}
__m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm512_mask3_fmsubadd_round_pd
// CHECK: fneg <8 x double> %{{.*}}
@@ -1133,14 +1241,6 @@ __m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B,
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
-__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
- // CHECK-LABEL: test_mm512_mask_fnmadd_pd
- // CHECK: fneg <8 x double> %{{.*}}
- // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_mask_fnmadd_pd(__A, __U, __B, __C);
-}
__m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_mask_fnmadd_round_ps
// CHECK: fneg <16 x float> %{{.*}}
@@ -1149,14 +1249,6 @@ __m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
-__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
- // CHECK-LABEL: test_mm512_mask_fnmadd_ps
- // CHECK: fneg <16 x float> %{{.*}}
- // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_mask_fnmadd_ps(__A, __U, __B, __C);
-}
__m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_mask_fnmsub_round_pd
// CHECK: fneg <8 x double>
@@ -1175,24 +1267,6 @@ __m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C,
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
-__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
- // CHECK-LABEL: test_mm512_mask_fnmsub_pd
- // CHECK: fneg <8 x double> %{{.*}}
- // CHECK: fneg <8 x double> %{{.*}}
- // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_mask_fnmsub_pd(__A, __U, __B, __C);
-}
-__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
- // CHECK-LABEL: test_mm512_mask3_fnmsub_pd
- // CHECK: fneg <8 x double> %{{.*}}
- // CHECK: fneg <8 x double> %{{.*}}
- // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: bitcast i8 %{{.*}} to <8 x i1>
- // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
- return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U);
-}
__m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
// CHECK-LABEL: test_mm512_mask_fnmsub_round_ps
// CHECK: fneg <16 x float> %{{.*}}
@@ -1211,24 +1285,6 @@ __m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mm
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
-__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
- // CHECK-LABEL: test_mm512_mask_fnmsub_ps
- // CHECK: fneg <16 x float> %{{.*}}
- // CHECK: fneg <16 x float> %{{.*}}
- // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_mask_fnmsub_ps(__A, __U, __B, __C);
-}
-__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
- // CHECK-LABEL: test_mm512_mask3_fnmsub_ps
- // CHECK: fneg <16 x float> %{{.*}}
- // CHECK: fneg <16 x float> %{{.*}}
- // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: bitcast i16 %{{.*}} to <16 x i1>
- // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U);
-}
__mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
// CHECK-LABEL: test_mm512_cmpeq_epi32_mask
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 7043927185a3a..1ffe34f9fde0e 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -2848,6 +2848,8 @@ __m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_fmadd_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_fmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, 14.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask_fmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 2.0));
__m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_mask_fmsub_pd
@@ -2857,6 +2859,8 @@ __m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_fmsub_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_fmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, 2.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask_fmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -2.0, 2.0));
__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fmadd_pd
@@ -2865,6 +2869,8 @@ __m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 _
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask3_fmadd_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 14.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 8.0, 6.0));
__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fnmadd_pd
@@ -2874,6 +2880,8 @@ __m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask3_fnmadd_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, -2.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmadd_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), 2.0, 6.0));
__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_maskz_fmadd_pd
@@ -2882,6 +2890,8 @@ __m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d _
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_fmadd_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 14.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmadd_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 8.0, 0.0));
__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_maskz_fmsub_pd
@@ -2891,6 +2901,8 @@ __m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d _
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_fmsub_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, 2.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fmsub_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -2.0, 0.0));
__m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_maskz_fnmadd_pd
@@ -2900,6 +2912,8 @@ __m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_fnmadd_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmadd_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, -2.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmadd_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 2.0, 0.0));
__m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_maskz_fnmsub_pd
@@ -2910,6 +2924,8 @@ __m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_fnmsub_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmsub_pd((__mmask8)0b10, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 0.0, -14.0));
+TEST_CONSTEXPR(match_m128d(_mm_maskz_fnmsub_pd((__mmask8)0b01, (__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -8.0, 0.0));
__m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_mask_fmadd_pd
@@ -2918,6 +2934,8 @@ __m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_fmadd_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, 32.0, 44.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 14.0, 22.0, 3.0, 4.0));
__m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_mask_fmsub_pd
@@ -2927,6 +2945,8 @@ __m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_fmsub_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, 10.0, 20.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -4.0, 2.0, 3.0, 4.0));
__m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fmadd_pd
@@ -2935,6 +2955,8 @@ __m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask3_fmadd_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, 32.0, 44.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), 14.0, 22.0, 11.0, 12.0));
__m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fnmadd_pd
@@ -2944,6 +2966,8 @@ __m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmas
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, -10.0, -20.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), 4.0, -2.0, 11.0, 12.0));
__m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_maskz_fmadd_pd
@@ -2952,6 +2976,8 @@ __m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_fmadd_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmadd_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, 32.0, 44.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmadd_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 14.0, 22.0, 0.0, 0.0));
__m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_maskz_fmsub_pd
@@ -2961,6 +2987,8 @@ __m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_fmsub_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmsub_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, 10.0, 20.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fmsub_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -4.0, 2.0, 0.0, 0.0));
__m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_maskz_fnmadd_pd
@@ -2970,6 +2998,8 @@ __m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m25
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_fnmadd_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmadd_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, -10.0, -20.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmadd_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 4.0, -2.0, 0.0, 0.0));
__m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_maskz_fnmsub_pd
@@ -2980,6 +3010,8 @@ __m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m25
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_fnmsub_pd(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmsub_pd((__mmask8)0b1100, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 0.0, 0.0, -32.0, -44.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_fnmsub_pd((__mmask8)0b0011, (__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -14.0, -22.0, 0.0, 0.0));
__m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_mask_fmadd_ps
@@ -2988,6 +3020,8 @@ __m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_fmadd_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, 32.0, 44.0));
+TEST_CONSTEXPR(match_m128(_mm_mask_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 14.0, 22.0, 3.0, 4.0));
__m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_mask_fmsub_ps
@@ -2997,6 +3031,8 @@ __m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_fmsub_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, 10.0, 20.0));
+TEST_CONSTEXPR(match_m128(_mm_mask_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -4.0, 2.0, 3.0, 4.0));
__m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fmadd_ps
@@ -3005,6 +3041,8 @@ __m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask3_fmadd_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128(_mm_mask3_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, 32.0, 44.0));
+TEST_CONSTEXPR(match_m128(_mm_mask3_fmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), 14.0, 22.0, 11.0, 12.0));
__m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fnmadd_ps
@@ -3014,6 +3052,8 @@ __m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask3_fnmadd_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128(_mm_mask3_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, -10.0, -20.0));
+TEST_CONSTEXPR(match_m128(_mm_mask3_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), 4.0, -2.0, 11.0, 12.0));
__m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_maskz_fmadd_ps
@@ -3022,6 +3062,8 @@ __m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_fmadd_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_fmadd_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, 32.0, 44.0));
+TEST_CONSTEXPR(match_m128(_mm_maskz_fmadd_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 14.0, 22.0, 0.0, 0.0));
__m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_maskz_fmsub_ps
@@ -3031,6 +3073,8 @@ __m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_fmsub_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_fmsub_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, 10.0, 20.0));
+TEST_CONSTEXPR(match_m128(_mm_maskz_fmsub_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -4.0, 2.0, 0.0, 0.0));
__m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_maskz_fnmadd_ps
@@ -3040,6 +3084,8 @@ __m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_fnmadd_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_fnmadd_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, -10.0, -20.0));
+TEST_CONSTEXPR(match_m128(_mm_maskz_fnmadd_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 4.0, -2.0, 0.0, 0.0));
__m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_maskz_fnmsub_ps
@@ -3050,6 +3096,8 @@ __m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_fnmsub_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_fnmsub_ps((__mmask8)0b1100, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 0.0, 0.0, -32.0, -44.0));
+TEST_CONSTEXPR(match_m128(_mm_maskz_fnmsub_ps((__mmask8)0b0011, (__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -14.0, -22.0, 0.0, 0.0));
__m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_mask_fmadd_ps
@@ -3057,6 +3105,8 @@ __m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_fmadd_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 86.0, 106.0, 128.0, 152.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 5.0, 6.0, 7.0, 8.0));
__m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_mask_fmsub_ps
@@ -3065,6 +3115,8 @@ __m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_fmsub_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, 44.0, 62.0, 82.0, 104.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 5.0, 6.0, 7.0, 8.0));
__m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fmadd_ps
@@ -3072,6 +3124,8 @@ __m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask3_fmadd_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask3_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 86.0, 106.0, 128.0, 152.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask3_fmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 26.0, 38.0, 52.0, 68.0, 21.0, 22.0, 23.0, 24.0));
__m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fnmadd_ps
@@ -3080,6 +3134,8 @@ __m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 _
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256( _mm256_mask3_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -44.0, -62.0, -82.0, -104.0));
+TEST_CONSTEXPR(match_m256( _mm256_mask3_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), 8.0, -2.0, -14.0, -28.0, 21.0, 22.0, 23.0, 24.0));
__m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_maskz_fmadd_ps
@@ -3087,6 +3143,8 @@ __m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_fmadd_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fmadd_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 86.0, 106.0, 128.0, 152.0));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fmadd_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 26.0, 38.0, 52.0, 68.0, 0.0, 0.0, 0.0, 0.0));
__m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_maskz_fmsub_ps
@@ -3095,6 +3153,8 @@ __m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_fmsub_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fmsub_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, 44.0, 62.0, 82.0, 104.0));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fmsub_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -8.0, 2.0, 14.0, 28.0, 0.0, 0.0, 0.0, 0.0));
__m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_maskz_fnmadd_ps
@@ -3103,6 +3163,8 @@ __m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 _
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_fnmadd_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmadd_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, -44.0, -62.0, -82.0, -104.0));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmadd_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 0.0, 0.0, 0.0, 0.0));
__m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_maskz_fnmsub_ps
@@ -3112,6 +3174,8 @@ __m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 _
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_fnmsub_ps(__U, __A, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmsub_ps((__mmask8)0b11110000, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 0.0, 0.0, 0.0, 0.0, -86.0, -106.0, -128.0, -152.0));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_fnmsub_ps((__mmask8)0b00001111, (__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 0.0, 0.0, 0.0, 0.0));
__m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_mask_fmaddsub_pd
@@ -3296,6 +3360,8 @@ __m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 _
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask3_fmsub_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, 2.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), -2.0, 6.0));
__m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fmsub_pd
@@ -3305,6 +3371,8 @@ __m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask3_fmsub_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, 10.0, 20.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), -4.0, 2.0, 11.0, 12.0));
__m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fmsub_ps
@@ -3314,6 +3382,8 @@ __m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask3_fmsub_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128(_mm_mask3_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, 10.0, 20.0));
+TEST_CONSTEXPR(match_m128(_mm_mask3_fmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), -4.0, 2.0, 11.0, 12.0));
__m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fmsub_ps
@@ -3322,6 +3392,8 @@ __m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask3_fmsub_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask3_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, 44.0, 62.0, 82.0, 104.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask3_fmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -8.0, 2.0, 14.0, 28.0, 21.0, 22.0, 23.0, 24.0));
__m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fmsubadd_pd
@@ -3366,6 +3438,8 @@ __m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d _
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_fnmadd_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_fnmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, -2.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask_fnmadd_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 2.0, 2.0));
__m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_mask_fnmadd_pd
@@ -3375,6 +3449,8 @@ __m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_fnmadd_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, -10.0, -20.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmadd_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 4.0, -2.0, 3.0, 4.0));
__m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_mask_fnmadd_ps
@@ -3384,6 +3460,8 @@ __m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_fnmadd_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, -10.0, -20.0));
+TEST_CONSTEXPR(match_m128(_mm_mask_fnmadd_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 4.0, -2.0, 3.0, 4.0));
__m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_mask_fnmadd_ps
@@ -3392,6 +3470,8 @@ __m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_fnmadd_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -44.0, -62.0, -82.0, -104.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask_fnmadd_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 8.0, -2.0, -14.0, -28.0, 5.0, 6.0, 7.0, 8.0));
__m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
// CHECK-LABEL: test_mm_mask_fnmsub_pd
@@ -3402,6 +3482,8 @@ __m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d _
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_fnmsub_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_fnmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b10, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), 1.0, -14.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask_fnmsub_pd((__m128d){1.0, 2.0}, (__mmask8)0b01, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}), -8.0, 2.0));
__m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fnmsub_pd
@@ -3412,6 +3494,8 @@ __m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask3_fnmsub_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b10), 5.0, -14.0));
+TEST_CONSTEXPR(match_m128d(_mm_mask3_fnmsub_pd((__m128d){1.0, 2.0}, (__m128d){3.0, 4.0}, (__m128d){5.0, 6.0}, (__mmask8)0b01), -8.0, 6.0));
__m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
// CHECK-LABEL: test_mm256_mask_fnmsub_pd
@@ -3422,6 +3506,8 @@ __m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_fnmsub_pd(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b1100, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), 1.0, 2.0, -32.0, -44.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__mmask8)0b0011, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }), -14.0, -22.0, 3.0, 4.0));
__m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fnmsub_pd
@@ -3432,6 +3518,8 @@ __m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmas
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b1100), 9.0, 10.0, -32.0, -44.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask3_fnmsub_pd((__m256d){ 1.0, 2.0, 3.0, 4.0 }, (__m256d){ 5.0, 6.0, 7.0, 8.0 }, (__m256d){ 9.0, 10.0, 11.0, 12.0 }, (__mmask8)0b0011), -14.0, -22.0, 11.0, 12.0));
__m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK-LABEL: test_mm_mask_fnmsub_ps
@@ -3442,6 +3530,8 @@ __m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_fnmsub_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b1100, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), 1.0, 2.0, -32.0, -44.0));
+TEST_CONSTEXPR(match_m128(_mm_mask_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__mmask8)0b0011, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}), -14.0, -22.0, 3.0, 4.0));
__m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm_mask3_fnmsub_ps
@@ -3452,6 +3542,8 @@ __m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask3_fnmsub_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m128(_mm_mask3_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b1100), 9.0, 10.0, -32.0, -44.0));
+TEST_CONSTEXPR(match_m128(_mm_mask3_fnmsub_ps((__m128){1.0, 2.0, 3.0, 4.0}, (__m128){5.0, 6.0, 7.0, 8.0}, (__m128){9.0, 10.0, 11.0, 12.0}, (__mmask8)0b0011), -14.0, -22.0, 11.0, 12.0));
__m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
// CHECK-LABEL: test_mm256_mask_fnmsub_ps
@@ -3461,6 +3553,8 @@ __m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_fnmsub_ps(__A, __U, __B, __C);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b11110000, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), 1.0, 2.0, 3.0, 4.0, -86.0, -106.0, -128.0, -152.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__mmask8)0b00001111, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}), -26.0, -38.0, -52.0, -68.0, 5.0, 6.0, 7.0, 8.0));
__m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
// CHECK-LABEL: test_mm256_mask3_fnmsub_ps
@@ -3470,6 +3564,8 @@ __m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 _
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U);
}
+TEST_CONSTEXPR(match_m256(_mm256_mask3_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b11110000), 17.0, 18.0, 19.0, 20.0, -86.0, -106.0, -128.0, -152.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask3_fnmsub_ps((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m256){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}, (__m256){17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}, (__mmask8)0b00001111), -26.0, -38.0, -52.0, -68.0, 21.0, 22.0, 23.0, 24.0));
__m128d test_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mask_add_pd
More information about the cfe-commits
mailing list