[clang] 8a682b7 - [clang][bytecode][X86] Allow AVX512 funnel shift by scalar immediate intrinsics to be used in constexpr (#157681)

via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 9 09:48:36 PDT 2025


Author: Simon Pilgrim
Date: 2025-09-09T16:48:32Z
New Revision: 8a682b7f063a431799f79becb53319af1e840701

URL: https://github.com/llvm/llvm-project/commit/8a682b7f063a431799f79becb53319af1e840701
DIFF: https://github.com/llvm/llvm-project/commit/8a682b7f063a431799f79becb53319af1e840701.diff

LOG: [clang][bytecode][X86] Allow AVX512 funnel shift by scalar immediate intrinsics to be used in constexpr (#157681)

Extends interp__builtin_elementwise_triop to handle (vector, vector, scalar) trinary op intrinsics

Fixes #153152

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/test/CodeGen/X86/avx512vbmi2-builtins.c
    clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 9680c2b5108a2..72001e097550e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1772,75 +1772,30 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
   def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
 }
 
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpshldd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
   def vpshldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+  def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
   def vpshldw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
+  def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
 }
 
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
   def vpshrdd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
   def vpshrdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+  def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
   def vpshrdw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
 }
 
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+  def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+  def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+  def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+  def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+  def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
   def vpshrdw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
 }
 

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6f0987b0adb99..910b459930765 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,11 +2851,32 @@ static bool interp__builtin_elementwise_triop(
     return true;
   }
 
-  // Vector type.
   const auto *VecT = Arg0Type->castAs<VectorType>();
   const PrimType &ElemT = *S.getContext().classify(VecT->getElementType());
   unsigned NumElems = VecT->getNumElements();
+  bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+  // Vector + Vector + Scalar case.
+  if (!Arg2Type->isVectorType()) {
+    APSInt Op2 = popToAPSInt(
+        S.Stk, *S.getContext().classify(Call->getArg(2)->getType()));
+
+    const Pointer &Op1 = S.Stk.pop<Pointer>();
+    const Pointer &Op0 = S.Stk.pop<Pointer>();
+    const Pointer &Dst = S.Stk.peek<Pointer>();
+    for (unsigned I = 0; I != NumElems; ++I) {
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+        Dst.elem<T>(I) = static_cast<T>(APSInt(
+            Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
+            DestUnsigned));
+      });
+    }
+    Dst.initializeAllElements();
+
+    return true;
+  }
 
+  // Vector type.
   const Pointer &Op2 = S.Stk.pop<Pointer>();
   const Pointer &Op1 = S.Stk.pop<Pointer>();
   const Pointer &Op0 = S.Stk.pop<Pointer>();
@@ -3435,6 +3456,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return F;
         });
 
+  case X86::BI__builtin_ia32_vpshldd128:
+  case X86::BI__builtin_ia32_vpshldd256:
+  case X86::BI__builtin_ia32_vpshldd512:
+  case X86::BI__builtin_ia32_vpshldq128:
+  case X86::BI__builtin_ia32_vpshldq256:
+  case X86::BI__builtin_ia32_vpshldq512:
+  case X86::BI__builtin_ia32_vpshldw128:
+  case X86::BI__builtin_ia32_vpshldw256:
+  case X86::BI__builtin_ia32_vpshldw512:
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call,
+        [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
+          return llvm::APIntOps::fshl(Hi, Lo, Amt);
+        });
+
+  case X86::BI__builtin_ia32_vpshrdd128:
+  case X86::BI__builtin_ia32_vpshrdd256:
+  case X86::BI__builtin_ia32_vpshrdd512:
+  case X86::BI__builtin_ia32_vpshrdq128:
+  case X86::BI__builtin_ia32_vpshrdq256:
+  case X86::BI__builtin_ia32_vpshrdq512:
+  case X86::BI__builtin_ia32_vpshrdw128:
+  case X86::BI__builtin_ia32_vpshrdw256:
+  case X86::BI__builtin_ia32_vpshrdw512:
+    // NOTE: Reversed Hi/Lo operands.
+    return interp__builtin_elementwise_triop(
+        S, OpPC, Call,
+        [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
+          return llvm::APIntOps::fshr(Hi, Lo, Amt);
+        });
+
   case clang::X86::BI__builtin_ia32_blendvpd:
   case clang::X86::BI__builtin_ia32_blendvpd256:
   case clang::X86::BI__builtin_ia32_blendvps:

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 16f03c84384ae..13e025e56c547 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11874,6 +11874,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case X86::BI__builtin_ia32_vpshldd128:
+  case X86::BI__builtin_ia32_vpshldd256:
+  case X86::BI__builtin_ia32_vpshldd512:
+  case X86::BI__builtin_ia32_vpshldq128:
+  case X86::BI__builtin_ia32_vpshldq256:
+  case X86::BI__builtin_ia32_vpshldq512:
+  case X86::BI__builtin_ia32_vpshldw128:
+  case X86::BI__builtin_ia32_vpshldw256:
+  case X86::BI__builtin_ia32_vpshldw512: {
+    APValue SourceHi, SourceLo, SourceAmt;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceLo) ||
+        !EvaluateAsRValue(Info, E->getArg(2), SourceAmt))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    unsigned SourceLen = SourceHi.getVectorLength();
+    SmallVector<APValue, 32> ResultElements;
+    ResultElements.reserve(SourceLen);
+
+    APInt Amt = SourceAmt.getInt();
+    for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+      APInt Hi = SourceHi.getVectorElt(EltNum).getInt();
+      APInt Lo = SourceLo.getVectorElt(EltNum).getInt();
+      APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt);
+      ResultElements.push_back(
+          APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType())));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case X86::BI__builtin_ia32_vpshrdd128:
+  case X86::BI__builtin_ia32_vpshrdd256:
+  case X86::BI__builtin_ia32_vpshrdd512:
+  case X86::BI__builtin_ia32_vpshrdq128:
+  case X86::BI__builtin_ia32_vpshrdq256:
+  case X86::BI__builtin_ia32_vpshrdq512:
+  case X86::BI__builtin_ia32_vpshrdw128:
+  case X86::BI__builtin_ia32_vpshrdw256:
+  case X86::BI__builtin_ia32_vpshrdw512: {
+    // NOTE: Reversed Hi/Lo operands.
+    APValue SourceHi, SourceLo, SourceAmt;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLo) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceHi) ||
+        !EvaluateAsRValue(Info, E->getArg(2), SourceAmt))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    unsigned SourceLen = SourceHi.getVectorLength();
+    SmallVector<APValue, 32> ResultElements;
+    ResultElements.reserve(SourceLen);
+
+    APInt Amt = SourceAmt.getInt();
+    for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+      APInt Hi = SourceHi.getVectorElt(EltNum).getInt();
+      APInt Lo = SourceLo.getVectorElt(EltNum).getInt();
+      APInt R = llvm::APIntOps::fshr(Hi, Lo, Amt);
+      ResultElements.push_back(
+          APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType())));
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case X86::BI__builtin_ia32_blendvpd:
   case X86::BI__builtin_ia32_blendvpd256:
   case X86::BI__builtin_ia32_blendvps:

diff  --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
index aecd965551ce1..560035598a6e4 100644
--- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
@@ -96,6 +96,7 @@ __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_shldi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51),  999, 38280596832649216LL, -40532396646334464LL, 999, 999, 999, -47287796087390209LL, 999));
 
 __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shldi_epi64
@@ -103,12 +104,14 @@ __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_shldi_epi64(__U, __A, __B, 63);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}),  51),  0, 38280596832649216LL, -40532396646334464LL, 0, 0, 0, -47287796087390209LL, 0));
 
 __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_shldi_epi64
   // CHECK: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
   return _mm512_shldi_epi64(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_shldi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}),  51),  38280596832649215LL, 38280596832649216LL, -40532396646334464LL, 45035996273704959LL, -42784196460019713LL, 47287796087390208LL, -47287796087390209LL, 54043195528445951LL));
 
 __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shldi_epi32
@@ -116,6 +119,7 @@ __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shldi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11),  65536, 999, 999, 73727, 999, -73729, -75777, 81919, 999, 86015, 999, 999, 999, 94207, -92161, 999));
 
 __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shldi_epi32
@@ -123,12 +127,14 @@ __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_shldi_epi32(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}),  11),  65536, 0, 0, 73727, 0, -73729, -75777, 81919, 0, 86015, 0, 0, 0, 94207, -92161, 0));
 
 __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_shldi_epi32
   // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
   return _mm512_shldi_epi32(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_shldi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}),  11),  65536, 67584, -69632, 73727, 73728, -73729, -75777, 81919, -79873, 86015, 88063, -86017, -88065, 94207, -92161, -96256));
 
 __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shldi_epi16
@@ -136,6 +142,7 @@ __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7),  8319, 999, -8321, 999, 8704, -8832, 999, -8961, 9216, 9344, 999, 999, -9728, 9983, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 11136, 999, 999, 999, 11775, 11903, 999, -11905, 999));
 
 __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shldi_epi16
@@ -143,12 +150,14 @@ __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_shldi_epi16(__U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}),  7),  8319, 0, -8321, 0, 8704, -8832, 0, -8961, 9216, 9344, 0, 0, -9728, 9983, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 11136, 0, 0, 0, 11775, 11903, 0, -11905, 0));
 
 __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_shldi_epi16
   // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
   return _mm512_shldi_epi16(__A, __B, 15);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_shldi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}),  7),  8319, 8320, -8321, 8576, 8704, -8832, 8960, -8961, 9216, 9344, -9472, 9727, -9728, 9983, 10111, -10112, -10240, -10241, 10623, -10497, 10752, 11007, 11008, 11136, -11137, -11392, 11647, 11775, 11903, -11777, -11905, 12160));
 
 __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shrdi_epi64
@@ -156,6 +165,7 @@ __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51),  999, 16384, 32767, 999, 999, 999, -49153, 999));
 
 __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shrdi_epi64
@@ -163,12 +173,14 @@ __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}),  51),  0, 16384, 32767, 0, 0, 0, -49153, 0));
 
 __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_shrdi_epi64
   // CHECK: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
   return _mm512_shrdi_epi64(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_shrdi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}),  51),  -8192, 16384, 32767, -32768, -32769, 49152, -49153, -65536));
 
 __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shrdi_epi32
@@ -176,6 +188,7 @@ __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shrdi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11),  2097152, 999, 999, -8388608, 999, -10485761, -12582913, -16777216, 999, -20971520, 999, 999, 999, -29360128, -29360129, 999));
 
 __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shrdi_epi32
@@ -183,12 +196,14 @@ __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shrdi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}),  11),  2097152, 0, 0, -8388608, 0, -10485761, -12582913, -16777216, 0, -20971520, 0, 0, 0, -29360128, -29360129, 0));
 
 __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_shrdi_epi32
   // CHECK: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
   return _mm512_shrdi_epi32(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_shrdi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}),  11),  2097152, 4194304, 8388607, -8388608, 10485760, -10485761, -12582913, -16777216, -16777217, -20971520, -23068672, -23068673, -25165825, -29360128, -29360129, 35651583));
 
 __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shrdi_epi16
@@ -196,6 +211,7 @@ __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shrdi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7),  -512, 999, -1025, 999, 2560, 3583, 999, -3585, 4608, 5120, 999, 999, 7167, -7168, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 12288, 999, 999, 999, -14336, -14848, 999, -15361, 999));
 
 __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shrdi_epi16
@@ -203,12 +219,14 @@ __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shrdi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}),  7),  -512, 0, -1025, 0, 2560, 3583, 0, -3585, 4608, 5120, 0, 0, 7167, -7168, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 12288, 0, 0, 0, -14336, -14848, 0, -15361, 0));
 
 __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_shrdi_epi16
   // CHECK: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31))
   return _mm512_shrdi_epi16(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_shrdi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}),  7),  -512, 1024, -1025, 2048, 2560, 3583, 3584, -3585, 4608, 5120, 6143, -6144, 7167, -7168, -7680, 8703, 9215, -8705, -9728, -9729, 10752, -11264, 11776, 12288, -12289, 13823, -13824, -14336, -14848, -14849, -15361, 16384));
 
 __m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shldv_epi64

diff  --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c
index 913264fe877cd..e1e8578ea414a 100644
--- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c
@@ -180,6 +180,7 @@ __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_mask_shldi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50),  -7881299347898369LL, -10133099161583616LL, 999, 12384898975268864LL));
 
 __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shldi_epi64
@@ -187,12 +188,14 @@ __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_shldi_epi64(__U, __A, __B, 63);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_shldi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}),  50),  -7881299347898369LL, -10133099161583616LL, 0, 12384898975268864LL));
 
 __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_shldi_epi64
   // CHECK: call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31))
   return _mm256_shldi_epi64(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_shldi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}),  50),  -7881299347898369LL, -10133099161583616LL, 11258999068426240LL, 12384898975268864LL));
 
 __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_shldi_epi64
@@ -200,6 +203,7 @@ __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47);
 }
+TEST_CONSTEXPR(match_v2di(_mm_mask_shldi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5),  999, -160));
 
 __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shldi_epi64
@@ -207,12 +211,14 @@ __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_shldi_epi64(__U, __A, __B, 63);
 }
+TEST_CONSTEXPR(match_v2di(_mm_maskz_shldi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}),  5),  0, -160));
 
 __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_shldi_epi64
   // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31))
   return _mm_shldi_epi64(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v2di(_mm_shldi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}),  5),  -97, -160));
 
 __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shldi_epi32
@@ -220,6 +226,7 @@ __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_mask_shldi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9),  999, 999, 9216, -9217, 10240, 999, -11264, -11776));
 
 __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shldi_epi32
@@ -227,12 +234,14 @@ __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_shldi_epi32(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_shldi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}),  9),  0, 0, 9216, -9217, 10240, 0, -11264, -11776));
 
 __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_shldi_epi32
   // CHECK: call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31))
   return _mm256_shldi_epi32(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_shldi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}),  9),  -8192, 9215, 9216, -9217, 10240, 10752, -11264, -11776));
 
 __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_shldi_epi32
@@ -240,6 +249,7 @@ __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v4si(_mm_mask_shldi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10),  8192, 999, 11263, -11264));
 
 __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shldi_epi32
@@ -247,12 +257,14 @@ __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_shldi_epi32(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v4si(_mm_maskz_shldi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}),  10),  8192, 0, 11263, -11264));
 
 __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_shldi_epi32
   // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31))
   return _mm_shldi_epi32(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v4si(_mm_shldi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}),  10),  8192, 9216, 11263, -11264));
 
 __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shldi_epi16
@@ -260,6 +272,7 @@ __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shldi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10),  -32768, 999, 999, 999, 999, -27648, 999, -24577, 25599, 999, 22528, 999, 21503, 999, 999, 999));
 
 __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shldi_epi16
@@ -267,12 +280,14 @@ __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_shldi_epi16(__U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shldi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}),  10),  -32768, 0, 0, 0, 0, -27648, 0, -24577, 25599, 0, 22528, 0, 21503, 0, 0, 0));
 
 __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_shldi_epi16
   // CHECK: call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31))
   return _mm256_shldi_epi16(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_shldi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}),  10),  -32768, 32767, 30720, -28673, 29695, -27648, 27647, -24577, 25599, 23552, 22528, 21504, 21503, 20479, 19455, 18431));
 
 __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_shldi_epi16
@@ -280,6 +295,7 @@ __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shldi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8),  999, 999, -4608, -4864, 5375, 999, 999, 6143));
 
 __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shldi_epi16
@@ -287,12 +303,14 @@ __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_shldi_epi16(__U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shldi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}),  8),  0, 0, -4608, -4864, 5375, 0, 0, 6143));
 
 __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_shldi_epi16
   // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31))
   return _mm_shldi_epi16(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_shldi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}),  8),  4351, 4607, -4608, -4864, 5375, -5376, -5632, 6143));
 
 __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shrdi_epi64
@@ -300,6 +318,7 @@ __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_mask_shrdi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50),  -1, 49151, 999, 65536));
 
 __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shrdi_epi64
@@ -307,12 +326,14 @@ __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_shrdi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}),  50),  -1, 49151, 0, 65536));
 
 __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_shrdi_epi64
   // CHECK: call <4 x i64>  @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)
   return _mm256_shrdi_epi64(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_shrdi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}),  50),  -1, 49151, 49152, 65536));
 
 __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_shrdi_epi64
@@ -320,6 +341,7 @@ __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47);
 }
+TEST_CONSTEXPR(match_v2di(_mm_mask_shrdi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5),  999, 1729382256910270463LL));
 
 __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shrdi_epi64
@@ -327,12 +349,14 @@ __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_shrdi_epi64(__U, __A, __B, 63);
 }
+TEST_CONSTEXPR(match_v2di(_mm_maskz_shrdi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}),  5),  0, 1729382256910270463LL));
 
 __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_shrdi_epi64
   // CHECK: call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31))
   return _mm_shrdi_epi64(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v2di(_mm_shrdi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}),  5),  -1, 1729382256910270463LL));
 
 __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shrdi_epi32
@@ -340,6 +364,7 @@ __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_mask_shrdi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9),  999, 999, 25165824, -25165825, 41943040, 999, 67108863, 75497471));
 
 __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shrdi_epi32
@@ -347,12 +372,14 @@ __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_shrdi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}),  9),  0, 0, 25165824, -25165825, 41943040, 0, 67108863, 75497471));
 
 __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_shrdi_epi32
   // CHECK: call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)
   return _mm256_shrdi_epi32(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_shrdi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}),  9),  16777215, -16777216, 25165824, -25165825, 41943040, 50331648, 67108863, 75497471));
 
 __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_shrdi_epi32
@@ -360,6 +387,7 @@ __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v4si(_mm_mask_shrdi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10),  4194304, 999, -12582912, 20971519));
 
 __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shrdi_epi32
@@ -367,12 +395,14 @@ __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_shrdi_epi32(__U, __A, __B, 15);
 }
+TEST_CONSTEXPR(match_v4si(_mm_maskz_shrdi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}),  10),  4194304, 0, -12582912, 20971519));
 
 __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_shrdi_epi32
   // CHECK: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31))
   return _mm_shrdi_epi32(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v4si(_mm_shrdi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}),  10),  4194304, 8388608, -12582912, 20971519));
 
 __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shrdi_epi16
@@ -380,6 +410,7 @@ __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shrdi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10),  64, 999, 999, 999, 999, 384, 999, -512, -513, 999, 767, 999, -769, 999, 999, 999));
 
 __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shrdi_epi16
@@ -387,12 +418,14 @@ __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shrdi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}),  10),  64, 0, 0, 0, 0, 384, 0, -512, -513, 0, 767, 0, -769, 0, 0, 0));
 
 __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_shrdi_epi16
   // CHECK: call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31))
   return _mm256_shrdi_epi16(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_shrdi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}),  10),  64, -65, 255, -256, -257, 384, -385, -512, -513, 703, 767, 831, -769, -833, -897, -961));
 
 __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_shrdi_epi16
@@ -400,6 +433,7 @@ __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shrdi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8),  999, 999, 1023, 1279, -1280, 999, 999, -2048));
 
 __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shrdi_epi16
@@ -407,12 +441,14 @@ __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_shrdi_epi16(__U, __A, __B, 7);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shrdi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}),  8),  0, 0, 1023, 1279, -1280, 0, 0, -2048));
 
 __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_shrdi_epi16
   // CHECK: call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31))
   return _mm_shrdi_epi16(__A, __B, 31);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_shrdi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}),  8),  -256, -512, 1023, 1279, -1280, 1791, 2047, -2048));
 
 __m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shldv_epi64


        


More information about the cfe-commits mailing list