r330323 - Lowering x86 adds/addus/subs/subus intrinsics (clang)
Chandler Carruth via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 26 14:45:22 PDT 2018
FYI reverting due to the LLVM commit getting reverted. See
http://llvm.org/PR37260 for details.
On Thu, Apr 19, 2018 at 6:32 AM Alexander Ivchenko via cfe-commits <
cfe-commits at lists.llvm.org> wrote:
> Author: aivchenk
> Date: Thu Apr 19 05:15:11 2018
> New Revision: 330323
>
> URL: http://llvm.org/viewvc/llvm-project?rev=330323&view=rev
> Log:
> Lowering x86 adds/addus/subs/subus intrinsics (clang)
>
> This is the patch that lowers x86 intrinsics to native IR
> in order to enable optimizations.
>
> Patch by tkrupa
>
> Differential Revision: https://reviews.llvm.org/D44786
>
> Modified:
> cfe/trunk/lib/CodeGen/CGBuiltin.cpp
> cfe/trunk/test/CodeGen/avx2-builtins.c
> cfe/trunk/test/CodeGen/avx512bw-builtins.c
> cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
> cfe/trunk/test/CodeGen/sse2-builtins.c
>
> Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Apr 19 05:15:11 2018
> @@ -8449,6 +8449,76 @@ static Value *EmitX86SExtMask(CodeGenFun
> return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
> }
>
> +// Emit addition or subtraction with saturation.
> +// Handles both signed and unsigned intrinsics.
> +static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr
> *E,
> + SmallVectorImpl<Value *> &Ops,
> + bool IsAddition, bool Signed) {
> +
> + // Collect vector elements and type data.
> + llvm::Type *ResultType = CGF.ConvertType(E->getType());
> + int NumElements = ResultType->getVectorNumElements();
> + Value *Res;
> + if (!IsAddition && !Signed) {
> + Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0],
> Ops[1]);
> + Value *Select = CGF.Builder.CreateSelect(ICmp, Ops[0], Ops[1]);
> + Res = CGF.Builder.CreateSub(Select, Ops[1]);
> + } else {
> + unsigned EltSizeInBits = ResultType->getScalarSizeInBits();
> + llvm::Type *ExtElementType = EltSizeInBits == 8 ?
> + CGF.Builder.getInt16Ty() :
> + CGF.Builder.getInt32Ty();
> +
> + // Extending vectors to next possible width to make space for possible
> + // overflow.
> + llvm::Type *ExtType = llvm::VectorType::get(ExtElementType,
> NumElements);
> + Value *VecA = Signed ? CGF.Builder.CreateSExt(Ops[0], ExtType)
> + : CGF.Builder.CreateZExt(Ops[0], ExtType);
> + Value *VecB = Signed ? CGF.Builder.CreateSExt(Ops[1], ExtType)
> + : CGF.Builder.CreateZExt(Ops[1], ExtType);
> +
> + llvm::Value *ExtProduct = IsAddition ? CGF.Builder.CreateAdd(VecA,
> VecB)
> + : CGF.Builder.CreateSub(VecA,
> VecB);
> +
> + // Create vector of the same type as expected result with max possible
> + // values and extend it to the same type as the product of the
> addition.
> + APInt SignedMaxValue =
> + llvm::APInt::getSignedMaxValue(EltSizeInBits);
> + Value *Max = Signed ? llvm::ConstantInt::get(ResultType,
> SignedMaxValue)
> + : llvm::Constant::getAllOnesValue(ResultType);
> + Value *ExtMaxVec = Signed ? CGF.Builder.CreateSExt(Max, ExtType)
> + : CGF.Builder.CreateZExt(Max, ExtType);
> + // In Product, replace all overflowed values with max values of
> non-extended
> + // type.
> + ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE :
> ICmpInst::ICMP_ULE;
> + Value *Cmp = CGF.Builder.CreateICmp(Pred, ExtProduct,
> + ExtMaxVec); // 1 if no overflow.
> + Value *SaturatedProduct = CGF.Builder.CreateSelect(
> + Cmp, ExtProduct, ExtMaxVec); // If overflowed, copy from max
> values.
> +
> + if (Signed) {
> + APInt SignedMinValue =
> + llvm::APInt::getSignedMinValue(EltSizeInBits);
> + Value *Min = llvm::ConstantInt::get(ResultType, SignedMinValue);
> + Value *ExtMinVec = CGF.Builder.CreateSExt(Min, ExtType);
> + Value *IsNegative =
> + CGF.Builder.CreateICmp(ICmpInst::ICMP_SLT, SaturatedProduct,
> ExtMinVec);
> + SaturatedProduct =
> + CGF.Builder.CreateSelect(IsNegative, ExtMinVec, SaturatedProduct);
> + }
> +
> + Res = CGF.Builder.CreateTrunc(SaturatedProduct,
> + ResultType); // Trunc to ResultType.
> + }
> + if (E->getNumArgs() == 4) { // For masked intrinsics.
> + Value *VecSRC = Ops[2];
> + Value *Mask = Ops[3];
> + return EmitX86Select(CGF, Mask, Res, VecSRC);
> + }
> +
> + return Res;
> +}
> +
> Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
> const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
> StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
> @@ -9516,10 +9586,37 @@ Value *CodeGenFunction::EmitX86BuiltinEx
> Load->setVolatile(true);
> return Load;
> }
> + case X86::BI__builtin_ia32_paddusb512_mask:
> + case X86::BI__builtin_ia32_paddusw512_mask:
> + case X86::BI__builtin_ia32_paddusb256:
> + case X86::BI__builtin_ia32_paddusw256:
> + case X86::BI__builtin_ia32_paddusb128:
> + case X86::BI__builtin_ia32_paddusw128:
> + return EmitX86AddSubSatExpr(*this, E, Ops, true, false); // Add,
> unsigned.
> + case X86::BI__builtin_ia32_paddsb512_mask:
> + case X86::BI__builtin_ia32_paddsw512_mask:
> + case X86::BI__builtin_ia32_paddsb256:
> + case X86::BI__builtin_ia32_paddsw256:
> + case X86::BI__builtin_ia32_paddsb128:
> + case X86::BI__builtin_ia32_paddsw128:
> + return EmitX86AddSubSatExpr(*this, E, Ops, true, true); // Add,
> signed.
> + case X86::BI__builtin_ia32_psubusb512_mask:
> + case X86::BI__builtin_ia32_psubusw512_mask:
> + case X86::BI__builtin_ia32_psubusb256:
> + case X86::BI__builtin_ia32_psubusw256:
> + case X86::BI__builtin_ia32_psubusb128:
> + case X86::BI__builtin_ia32_psubusw128:
> + return EmitX86AddSubSatExpr(*this, E, Ops, false, false); // Sub,
> unsigned.
> + case X86::BI__builtin_ia32_psubsb512_mask:
> + case X86::BI__builtin_ia32_psubsw512_mask:
> + case X86::BI__builtin_ia32_psubsb256:
> + case X86::BI__builtin_ia32_psubsw256:
> + case X86::BI__builtin_ia32_psubsb128:
> + case X86::BI__builtin_ia32_psubsw128:
> + return EmitX86AddSubSatExpr(*this, E, Ops, false, true); // Sub,
> signed.
> }
> }
>
> -
> Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
> const CallExpr *E) {
> SmallVector<Value*, 4> Ops;
>
> Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu Apr 19 05:15:11 2018
> @@ -56,25 +56,53 @@ __m256i test_mm256_add_epi64(__m256i a,
>
> __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_adds_epi8
> - // CHECK: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> + // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> return _mm256_adds_epi8(a, b);
> }
>
> __m256i test_mm256_adds_epi16(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_adds_epi16
> - // CHECK: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> + // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> return _mm256_adds_epi16(a, b);
> }
>
> __m256i test_mm256_adds_epu8(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_adds_epu8
> - // CHECK: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> + // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> return _mm256_adds_epu8(a, b);
> }
>
> __m256i test_mm256_adds_epu16(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_adds_epu16
> - // CHECK: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> + // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535>
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> return _mm256_adds_epu16(a, b);
> }
>
> @@ -1171,25 +1199,47 @@ __m256i test_mm256_sub_epi64(__m256i a,
>
> __m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_subs_epi8
> - // CHECK: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> + // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> return _mm256_subs_epi8(a, b);
> }
>
> __m256i test_mm256_subs_epi16(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_subs_epi16
> - // CHECK: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> + // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: sub <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> return _mm256_subs_epi16(a, b);
> }
>
> __m256i test_mm256_subs_epu8(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_subs_epu8
> - // CHECK: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> + // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> + // CHECK: icmp ugt <32 x i8> {{.*}}, {{.*}}
> + // CHECK: select <32 x i1> {{.*}}, <32 x i8> {{.*}}, <32 x i8> {{.*}}
> + // CHECK: sub <32 x i8> {{.*}}, {{.*}}
> return _mm256_subs_epu8(a, b);
> }
>
> __m256i test_mm256_subs_epu16(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_subs_epu16
> - // CHECK: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> + // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> + // CHECK: icmp ugt <16 x i16> {{.*}}, {{.*}}
> + // CHECK: select <16 x i1> {{.*}}, <16 x i16> {{.*}}, <16 x i16> {{.*}}
> + // CHECK: sub <16 x i16> {{.*}}, {{.*}}
> return _mm256_subs_epu16(a, b);
> }
>
>
> Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Thu Apr 19 05:15:11 2018
> @@ -594,62 +594,154 @@ __m512i test_mm512_maskz_packus_epi16(__
> }
> __m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_adds_epi8
> - // CHECK: @llvm.x86.avx512.mask.padds.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> return _mm512_adds_epi8(__A,__B);
> }
> __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_adds_epi8
> - // CHECK: @llvm.x86.avx512.mask.padds.b.512
> - return _mm512_mask_adds_epi8(__W,__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> + return _mm512_mask_adds_epi8(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_adds_epi8
> - // CHECK: @llvm.x86.avx512.mask.padds.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> return _mm512_maskz_adds_epi8(__U,__A,__B);
> }
> __m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_adds_epi16
> - // CHECK: @llvm.x86.avx512.mask.padds.w.512
> - return _mm512_adds_epi16(__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + return _mm512_adds_epi16(__A,__B);
> }
> __m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_adds_epi16
> - // CHECK: @llvm.x86.avx512.mask.padds.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> return _mm512_mask_adds_epi16(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_adds_epi16
> - // CHECK: @llvm.x86.avx512.mask.padds.w.512
> - return _mm512_maskz_adds_epi16(__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_maskz_adds_epi16(__U,__A,__B);
> }
> __m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_adds_epu8
> - // CHECK: @llvm.x86.avx512.mask.paddus.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> return _mm512_adds_epu8(__A,__B);
> }
> __m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_adds_epu8
> - // CHECK: @llvm.x86.avx512.mask.paddus.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> return _mm512_mask_adds_epu8(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_adds_epu8
> - // CHECK: @llvm.x86.avx512.mask.paddus.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> return _mm512_maskz_adds_epu8(__U,__A,__B);
> }
> __m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_adds_epu16
> - // CHECK: @llvm.x86.avx512.mask.paddus.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> return _mm512_adds_epu16(__A,__B);
> }
> __m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_adds_epu16
> - // CHECK: @llvm.x86.avx512.mask.paddus.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> return _mm512_mask_adds_epu16(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_adds_epu16
> - // CHECK: @llvm.x86.avx512.mask.paddus.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> return _mm512_maskz_adds_epu16(__U,__A,__B);
> }
> __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
> @@ -903,63 +995,137 @@ __m512i test_mm512_maskz_shuffle_epi8(__
> }
> __m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_subs_epi8
> - // CHECK: @llvm.x86.avx512.mask.psubs.b.512
> - return _mm512_subs_epi8(__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +return _mm512_subs_epi8(__A,__B);
> }
> __m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_subs_epi8
> - // CHECK: @llvm.x86.avx512.mask.psubs.b.512
> - return _mm512_mask_subs_epi8(__W,__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_mask_subs_epi8(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_subs_epi8
> - // CHECK: @llvm.x86.avx512.mask.psubs.b.512
> - return _mm512_maskz_subs_epi8(__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_maskz_subs_epi8(__U,__A,__B);
> }
> __m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_subs_epi16
> - // CHECK: @llvm.x86.avx512.mask.psubs.w.512
> - return _mm512_subs_epi16(__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +return _mm512_subs_epi16(__A,__B);
> }
> __m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_subs_epi16
> - // CHECK: @llvm.x86.avx512.mask.psubs.w.512
> - return _mm512_mask_subs_epi16(__W,__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_mask_subs_epi16(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_subs_epi16
> - // CHECK: @llvm.x86.avx512.mask.psubs.w.512
> - return _mm512_maskz_subs_epi16(__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_maskz_subs_epi16(__U,__A,__B);
> }
> __m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_subs_epu8
> - // CHECK: @llvm.x86.avx512.mask.psubus.b.512
> - return _mm512_subs_epu8(__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
> + // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
> + // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
> + // CHECK: sub <64 x i8> {{.*}}, {{.*}}
> +return _mm512_subs_epu8(__A,__B);
> }
> __m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_subs_epu8
> - // CHECK: @llvm.x86.avx512.mask.psubus.b.512
> - return _mm512_mask_subs_epu8(__W,__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
> + // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
> + // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
> + // CHECK: sub <64 x i8> {{.*}}, {{.*}}
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_mask_subs_epu8(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_subs_epu8
> - // CHECK: @llvm.x86.avx512.mask.psubus.b.512
> - return _mm512_maskz_subs_epu8(__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
> + // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
> + // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
> + // CHECK: sub <64 x i8> {{.*}}, {{.*}}
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_maskz_subs_epu8(__U,__A,__B);
> }
> __m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_subs_epu16
> - // CHECK: @llvm.x86.avx512.mask.psubus.w.512
> - return _mm512_subs_epu16(__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
> + // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
> + // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
> + // CHECK: sub <32 x i16> {{.*}}, {{.*}}
> +return _mm512_subs_epu16(__A,__B);
> }
> __m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_subs_epu16
> - // CHECK: @llvm.x86.avx512.mask.psubus.w.512
> - return _mm512_mask_subs_epu16(__W,__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
> + // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
> + // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
> + // CHECK: sub <32 x i16> {{.*}}, {{.*}}
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_mask_subs_epu16(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_subs_epu16
> - // CHECK: @llvm.x86.avx512.mask.psubus.w.512
> - return _mm512_maskz_subs_epu16(__U,__A,__B);
> + // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
> + // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
> + // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
> + // CHECK: sub <32 x i16> {{.*}}, {{.*}}
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_maskz_subs_epu16(__U,__A,__B);
> }
> __m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I,
> __mmask32 __U, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi16
>
> Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Thu Apr 19 05:15:11 2018
> @@ -1075,97 +1075,211 @@ __m256i test_mm256_mask_packus_epi16(__m
>
> __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_adds_epi8
> - // CHECK: @llvm.x86.sse2.padds.b
> + // CHECK-NOT: @llvm.x86.sse2.padds.b
> + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16>
> %{{.*}}
> + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
> return _mm_mask_adds_epi8(__W,__U,__A,__B);
> }
> __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
> // CHECK-LABEL: @test_mm_maskz_adds_epi8
> - // CHECK: @llvm.x86.sse2.padds.b
> + // CHECK-NOT: @llvm.x86.sse2.padds.b
> + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16>
> %{{.*}}
> + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
> return _mm_maskz_adds_epi8(__U,__A,__B);
> }
> __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i
> __A, __m256i __B) {
> // CHECK-LABEL: @test_mm256_mask_adds_epi8
> - // CHECK: @llvm.x86.avx2.padds.b
> + // CHECK-NOT: @llvm.x86.avx2.padds.b
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
> return _mm256_mask_adds_epi8(__W,__U,__A,__B);
> }
> __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i
> __B) {
> // CHECK-LABEL: @test_mm256_maskz_adds_epi8
> - // CHECK: @llvm.x86.avx2.padds.b
> + // CHECK-NOT: @llvm.x86.avx2.padds.b
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
> return _mm256_maskz_adds_epi8(__U,__A,__B);
> }
> __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_adds_epi16
> - // CHECK: @llvm.x86.sse2.padds.w
> + // CHECK-NOT: @llvm.x86.sse2.padds.w
> + // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767>
> + // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8
> x i32> %{{.*}}
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
> return _mm_mask_adds_epi16(__W,__U,__A,__B);
> }
> __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
> // CHECK-LABEL: @test_mm_maskz_adds_epi16
> - // CHECK: @llvm.x86.sse2.padds.w
> + // CHECK-NOT: @llvm.x86.sse2.padds.w
> + // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767>
> + // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8
> x i32> %{{.*}}
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
> return _mm_maskz_adds_epi16(__U,__A,__B);
> }
> __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i
> __A, __m256i __B) {
> // CHECK-LABEL: @test_mm256_mask_adds_epi16
> - // CHECK: @llvm.x86.avx2.padds.w
> + // CHECK-NOT: @llvm.x86.avx2.padds.w
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
> return _mm256_mask_adds_epi16(__W,__U,__A,__B);
> }
> __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i
> __B) {
> // CHECK-LABEL: @test_mm256_maskz_adds_epi16
> - // CHECK: @llvm.x86.avx2.padds.w
> + // CHECK-NOT: @llvm.x86.avx2.padds.w
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> + // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
> return _mm256_maskz_adds_epi16(__U,__A,__B);
> }
> -__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A,
> __m128i __B) {
> +__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_adds_epu8
> - // CHECK: @llvm.x86.sse2.paddus.b
> + // CHECK-NOT: @llvm.x86.sse2.paddus.b
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
> return _mm_mask_adds_epu8(__W,__U,__A,__B);
> }
> __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
> // CHECK-LABEL: @test_mm_maskz_adds_epu8
> - // CHECK: @llvm.x86.sse2.paddus.b
> + // CHECK-NOT: @llvm.x86.sse2.paddus.b
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
> return _mm_maskz_adds_epu8(__U,__A,__B);
> }
> __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i
> __A, __m256i __B) {
> // CHECK-LABEL: @test_mm256_mask_adds_epu8
> - // CHECK: @llvm.x86.avx2.paddus.b
> + // CHECK-NOT: @llvm.x86.avx2.paddus.b
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
> return _mm256_mask_adds_epu8(__W,__U,__A,__B);
> }
> __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i
> __B) {
> // CHECK-LABEL: @test_mm256_maskz_adds_epu8
> - // CHECK: @llvm.x86.avx2.paddus.b
> + // CHECK-NOT: @llvm.x86.avx2.paddus.b
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
> return _mm256_maskz_adds_epu8(__U,__A,__B);
> }
> __m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_adds_epu16
> - // CHECK: @llvm.x86.sse2.paddus.w
> + // CHECK-NOT: @llvm.x86.sse2.paddus.w
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535>
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
> return _mm_mask_adds_epu16(__W,__U,__A,__B);
> }
> __m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
> // CHECK-LABEL: @test_mm_maskz_adds_epu16
> - // CHECK: @llvm.x86.sse2.paddus.w
> + // CHECK-NOT: @llvm.x86.sse2.paddus.w
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535>
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
> return _mm_maskz_adds_epu16(__U,__A,__B);
> }
> __m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i
> __A, __m256i __B) {
> // CHECK-LABEL: @test_mm256_mask_adds_epu16
> - // CHECK: @llvm.x86.avx2.paddus.w
> + // CHECK-NOT: @llvm.x86.avx2.paddus.w
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535>
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
> return _mm256_mask_adds_epu16(__W,__U,__A,__B);
> }
> __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i
> __B) {
> // CHECK-LABEL: @test_mm256_maskz_adds_epu16
> - // CHECK: @llvm.x86.avx2.paddus.w
> + // CHECK-NOT: @llvm.x86.avx2.paddus.w
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535>
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
> return _mm256_maskz_adds_epu16(__U,__A,__B);
> }
> @@ -1519,102 +1633,191 @@ __m256i test_mm256_maskz_shuffle_epi8(__
> }
> __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_subs_epi8
> - // CHECK: @llvm.x86.sse2.psubs.b
> + // CHECK-NOT: @llvm.x86.sse2.psubs.b
> + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: sub <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> + // CHECK: select <16 x i1> %{{.*}}, <16 x
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180426/b6df0ea4/attachment-0001.html>
More information about the cfe-commits
mailing list