[clang] f9256ca - [Headers][X86] Allow AVX512 masked arithmetic ss/sd intrinsics to be used in constexpr (#162816)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 18 02:23:44 PST 2025
Author: woruyu
Date: 2025-11-18T10:23:39Z
New Revision: f9256ca6cc8636347a73c028a1e30596aa27ac89
URL: https://github.com/llvm/llvm-project/commit/f9256ca6cc8636347a73c028a1e30596aa27ac89
DIFF: https://github.com/llvm/llvm-project/commit/f9256ca6cc8636347a73c028a1e30596aa27ac89.diff
LOG: [Headers][X86] Allow AVX512 masked arithmetic ss/sd intrinsics to be used in constexpr (#162816)
This PR just resolves ss/sd part of AVX512 masked arithmetic intrinsics of #160559.
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.td
clang/lib/AST/ByteCode/InterpBuiltin.cpp
clang/lib/AST/ExprConstant.cpp
clang/lib/Headers/avx10_2bf16intrin.h
clang/lib/Headers/avx512fintrin.h
clang/lib/Headers/avx512fp16intrin.h
clang/test/CodeGen/X86/avx10_2bf16-builtins.c
clang/test/CodeGen/X86/avx512f-builtins.c
clang/test/CodeGen/X86/avx512fp16-builtins.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index a656fe341c8e0..7a14c6ec21a1a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4117,15 +4117,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">;
}
-let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectsh_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
}
-let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bf16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 30426565407ba..5a96320e12b6f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2838,6 +2838,30 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
return true;
}
+/// Scalar variant of AVX512 predicated select:
+/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
+/// All other elements are taken from RHS.
+static bool interp__builtin_select_scalar(InterpState &S,
+ const CallExpr *Call) {
+ unsigned N =
+ Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements();
+
+ const Pointer &W = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ APSInt U = popToAPSInt(S, Call->getArg(0));
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ bool TakeA0 = U.getZExtValue() & 1ULL;
+
+ for (unsigned I = TakeA0; I != N; ++I)
+ Dst.elem<Floating>(I) = W.elem<Floating>(I);
+ if (TakeA0)
+ Dst.elem<Floating>(0) = A.elem<Floating>(0);
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
APSInt Mask = popToAPSInt(S, Call->getArg(2));
@@ -4151,6 +4175,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return APInt::getAllOnes(DstBits);
});
+ case clang::X86::BI__builtin_ia32_selectss_128:
+ case clang::X86::BI__builtin_ia32_selectsd_128:
+ case clang::X86::BI__builtin_ia32_selectsh_128:
+ case clang::X86::BI__builtin_ia32_selectsbf_128:
+ return interp__builtin_select_scalar(S, Call);
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ed1f1b7508ffc..74f6e3acb6b39 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12202,6 +12202,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), SourceLen), E);
};
+ auto EvalSelectScalar = [&](unsigned Len) -> bool {
+ APSInt Mask;
+ APValue AVal, WVal;
+ if (!EvaluateInteger(E->getArg(0), Mask, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(1), AVal) ||
+ !EvaluateAsRValue(Info, E->getArg(2), WVal))
+ return false;
+
+ bool TakeA0 = (Mask.getZExtValue() & 1u) != 0;
+ SmallVector<APValue, 4> Res;
+ Res.reserve(Len);
+ Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0));
+ for (unsigned I = 1; I < Len; ++I)
+ Res.push_back(WVal.getVectorElt(I));
+ APValue V(Res.data(), Res.size());
+ return Success(V, E);
+ };
+
switch (E->getBuiltinCallee()) {
default:
return false;
@@ -12505,6 +12523,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
+ case clang::X86::BI__builtin_ia32_selectss_128:
+ return EvalSelectScalar(4);
+ case clang::X86::BI__builtin_ia32_selectsd_128:
+ return EvalSelectScalar(2);
+ case clang::X86::BI__builtin_ia32_selectsh_128:
+ case clang::X86::BI__builtin_ia32_selectsbf_128:
+ return EvalSelectScalar(8);
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 9f5b726d7b789..3df6930f94be3 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -221,12 +221,12 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a,
return __a;
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B),
_mm_setzero_pbh());
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 53b18df764370..531c23a210586 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1820,14 +1820,14 @@ _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -1850,14 +1850,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
@@ -1935,14 +1935,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -1964,14 +1964,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
@@ -2050,14 +2050,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -2079,14 +2079,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
@@ -2165,14 +2165,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
@@ -2195,14 +2195,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 2776450387480..4c6bf3a3fa968 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -588,23 +588,20 @@ _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_add_sh(__m128h __A, __m128h __B) {
__A[0] += __B[0];
return __A;
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_add_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, __W);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_add_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
}
@@ -624,23 +621,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
- __m128h __B) {
+static __inline__ __m128h
+ __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sub_sh(__m128h __A, __m128h __B) {
__A[0] -= __B[0];
return __A;
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_sub_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, __W);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_sub_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
}
@@ -660,23 +654,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
- __m128h __B) {
+static __inline__ __m128h
+ __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mul_sh(__m128h __A, __m128h __B) {
__A[0] *= __B[0];
return __A;
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_mul_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, __W);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_mul_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
}
@@ -696,23 +687,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
- __m128h __B) {
+static __inline__ __m128h
+ __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_div_sh(__m128h __A, __m128h __B) {
__A[0] /= __B[0];
return __A;
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_div_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, __W);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
__A = _mm_div_sh(__A, __B);
return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
}
@@ -960,22 +948,19 @@ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P,
}
// moves with vmovsh:
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a,
- __m128h __b) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_move_sh(__m128h __a, __m128h __b) {
__a[0] = __b[0];
return __a;
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) {
return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
_mm_setzero_ph());
}
diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
index f8a4c51d9ceb3..fac7ef2e2bf29 100644
--- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
@@ -1,7 +1,11 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m256bh test_mm256_setzero_pbh() {
// CHECK-LABEL: @test_mm256_setzero_pbh
@@ -353,6 +357,7 @@ __m128bh test_mm_move_sbh(__m128bh A, __m128bh B) {
// CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 0
return _mm_move_sbh(A, B);
}
+TEST_CONSTEXPR(match_m128bh(_mm_move_sbh((__m128bh)(__v8bf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}, (__m128bh)(__v8bf){9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f,16.0f}), 9.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
__m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
// CHECK-LABEL: @test_mm_mask_move_sbh
@@ -366,6 +371,7 @@ __m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128b
// CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
return _mm_mask_move_sbh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128bh(_mm_mask_move_sbh((__m128bh)(__v8bf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}, (__mmask8)0x01, (__m128bh)(__v8bf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}, (__m128bh)(__v8bf){9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f,16.0f}), 9.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
__m128bh test_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
// CHECK-LABEL: @test_mm_maskz_move_sbh
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index ec813e5acd7cf..7e62a7d92890f 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3302,6 +3302,8 @@ __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_add_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_add_ss((__m128)(__v4sf){10.0f, 100.0f, 200.0f, 300.0f}, 0x1,(__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f},(__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}),4.0f, 100.0f, 200.0f, 300.0f));
+
__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_add_ss
// CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round
@@ -3317,6 +3319,8 @@ __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_add_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_add_ss(0x1, (__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}), 4.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_add_round_sd
// CHECK: @llvm.x86.avx512.mask.add.sd.round
@@ -3347,6 +3351,8 @@ __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_add_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_add_sd((__m128d)(__v2df){10.0, 999.0}, 0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 999.0));
+
__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_add_sd
// CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round
@@ -3362,6 +3368,8 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_add_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_add_sd(0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 0.0));
+
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_sub_round_pd
// CHECK: @llvm.x86.avx512.sub.pd.512
@@ -3450,6 +3458,8 @@ __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_sub_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_sub_ss((__m128)(__v4sf){-1.0f, 10.0f, 20.0f, 30.0f}, 0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 10.0f, 20.0f, 30.0f));
+
__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_sub_ss
// CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round
@@ -3465,6 +3475,8 @@ __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_sub_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_sub_ss(0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_sub_round_sd
// CHECK: @llvm.x86.avx512.mask.sub.sd.round
@@ -3495,6 +3507,8 @@ __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_sub_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_sub_sd((__m128d)(__v2df){-1.0, 111.0}, 0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 111.0));
+
__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_sub_sd
// CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round
@@ -3510,6 +3524,8 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_sub_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_sub_sd(0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 0.0));
+
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_mul_round_pd
// CHECK: @llvm.x86.avx512.mul.pd.512
@@ -3598,6 +3614,8 @@ __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_mul_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_mul_ss((__m128)(__v4sf){42.0f, -1.0f, -2.0f, -3.0f}, 0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, -1.0f, -2.0f, -3.0f));
+
__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_mul_ss
// CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round
@@ -3613,6 +3631,8 @@ __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_mul_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_mul_ss(0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mul_round_sd
// CHECK: @llvm.x86.avx512.mask.mul.sd.round
@@ -3643,6 +3663,8 @@ __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_mul_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_mul_sd((__m128d)(__v2df){123.0, -9.0}, 0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, -9.0));
+
__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_mul_sd
// CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round
@@ -3658,6 +3680,8 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_mul_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_mul_sd(0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, 0.0));
+
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_div_round_pd
// CHECK: @llvm.x86.avx512.div.pd.512
@@ -3757,6 +3781,8 @@ __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_div_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_div_ss((__m128)(__v4sf){-7.0f, 5.0f, 6.0f, 7.0f}, 0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 5.0f, 6.0f, 7.0f));
+
__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_div_ss
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
@@ -3771,6 +3797,8 @@ __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_div_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_div_ss(0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 0.0f, 0.0f, 0.0f));
+
__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_div_round_sd
// CHECK: @llvm.x86.avx512.mask.div.sd.round
@@ -3800,6 +3828,8 @@ __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_div_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_div_sd((__m128d)(__v2df){-8.0, 44.0}, 0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 44.0));
+
__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_div_sd
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -3814,6 +3844,8 @@ __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_div_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_div_sd(0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 0.0));
+
__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_max_round_ss
// CHECK: @llvm.x86.avx512.mask.max.ss.round
@@ -11673,6 +11705,7 @@ __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
// CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0
return _mm_mask_move_ss ( __W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_move_ss((__m128)(__v4sf){1.0f,2.0f,3.0f,4.0f}, (__mmask8)0x01, (__m128)(__v4sf){100.0f,200.0f,300.0f,400.0f}, (__m128)(__v4sf){9.0f,10.0f,11.0f,12.0f}), 9.0f,2.0f,3.0f,4.0f));
__m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
{
@@ -11687,6 +11720,7 @@ __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
// CHECK-NEXT: insertelement <4 x float> [[VEC]], float [[SEL]], i64 0
return _mm_maskz_move_ss (__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_move_ss((__mmask8)0x01, (__m128)(__v4sf){0.0f,0.0f,0.0f,0.0f}, (__m128)(__v4sf){9.0f,10.0f,11.0f,12.0f}), 9.0f,0.0f,0.0f,0.0f));
__m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
@@ -11701,6 +11735,7 @@ __m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __
// CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0
return _mm_mask_move_sd ( __W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_move_sd((__m128d)(__v2df){1.0,2.0}, (__mmask8)0x01, (__m128d)(__v2df){100.0,200.0}, (__m128d)(__v2df){9.0,10.0}), 9.0,2.0));
__m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
{
@@ -11715,6 +11750,7 @@ __m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
// CHECK-NEXT: insertelement <2 x double> [[VEC]], double [[SEL]], i64 0
return _mm_maskz_move_sd (__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_move_sd((__mmask8)0x01, (__m128d)(__v2df){0.0,0.0}, (__m128d)(__v2df){9.0,10.0}), 9.0,0.0));
void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A)
{
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index f0a0a3b28542f..1c8ab8ca52099 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -796,6 +796,8 @@ __m128h test_mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_add_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_add_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),110.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_add_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -810,6 +812,7 @@ __m128h test_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_add_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_add_sh((__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),110.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_add_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_add_sh
@@ -849,6 +852,8 @@ __m128h test_mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_sub_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_sub_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){20.0f,21.0f,22.0f,23.0f,24.0f,25.0f,26.0f,27.0f},(__m128h)(__v8hf){5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f}),15.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_sub_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -863,6 +868,7 @@ __m128h test_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_sub_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_sub_sh((__mmask8)0x01,(__m128h)(__v8hf){20.0f,21.0f,22.0f,23.0f,24.0f,25.0f,26.0f,27.0f},(__m128h)(__v8hf){5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f}),15.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_sub_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_sub_sh
@@ -902,6 +908,8 @@ __m128h test_mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_mul_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_mul_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f},(__m128h)(__v8hf){4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f}),12.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_mul_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -916,6 +924,7 @@ __m128h test_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_mul_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_mul_sh((__mmask8)0x01,(__m128h)(__v8hf){3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f},(__m128h)(__v8hf){4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f}),12.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_mul_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_mul_sh
@@ -955,6 +964,8 @@ __m128h test_mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_mask_div_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_div_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f},(__m128h)(__v8hf){4.0f,3.0f,2.0f,1.0f,2.0f,3.0f,4.0f,5.0f}),2.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
+
__m128h test_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_div_sh
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i32 0
@@ -969,6 +980,7 @@ __m128h test_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
return _mm_maskz_div_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_div_sh((__mmask8)0x01,(__m128h)(__v8hf){8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f},(__m128h)(__v8hf){4.0f,3.0f,2.0f,1.0f,2.0f,3.0f,4.0f,5.0f}),2.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
__m128h test_mm_div_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_div_sh
@@ -1622,6 +1634,7 @@ __m128h test_mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B
// CHECK-NEXT: insertelement <8 x half> [[VEC]], half [[SEL]], i64 0
return _mm_mask_move_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_move_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}, (__mmask8)0x01, (__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f}, (__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}), 100.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f));
__m128h test_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_move_sh
@@ -1635,6 +1648,7 @@ __m128h test_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-NEXT: insertelement <8 x half> [[VEC]], half [[SEL]], i64 0
return _mm_maskz_move_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_move_sh((__mmask8)0x01, (__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f}, (__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}), 100.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f));
short test_mm_cvtsi128_si16(__m128i A) {
// CHECK-LABEL: test_mm_cvtsi128_si16
More information about the cfe-commits
mailing list