r330323 - Lowering x86 adds/addus/subs/subus intrinsics (clang)

Chandler Carruth via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 26 14:45:22 PDT 2018


FYI reverting due to the LLVM commit getting reverted. See
http://llvm.org/PR37260 for details.

On Thu, Apr 19, 2018 at 6:32 AM Alexander Ivchenko via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Author: aivchenk
> Date: Thu Apr 19 05:15:11 2018
> New Revision: 330323
>
> URL: http://llvm.org/viewvc/llvm-project?rev=330323&view=rev
> Log:
> Lowering x86 adds/addus/subs/subus intrinsics (clang)
>
> This is the patch that lowers x86 intrinsics to native IR
> in order to enable optimizations.
>
> Patch by tkrupa
>
> Differential Revision: https://reviews.llvm.org/D44786
>
> Modified:
>     cfe/trunk/lib/CodeGen/CGBuiltin.cpp
>     cfe/trunk/test/CodeGen/avx2-builtins.c
>     cfe/trunk/test/CodeGen/avx512bw-builtins.c
>     cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
>     cfe/trunk/test/CodeGen/sse2-builtins.c
>
> Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Apr 19 05:15:11 2018
> @@ -8449,6 +8449,76 @@ static Value *EmitX86SExtMask(CodeGenFun
>    return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
>  }
>
> +// Emit addition or subtraction with saturation.
> +// Handles both signed and unsigned intrinsics.
> +static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr
> *E,
> +                                   SmallVectorImpl<Value *> &Ops,
> +                                   bool IsAddition, bool Signed) {
> +
> +  // Collect vector elements and type data.
> +  llvm::Type *ResultType = CGF.ConvertType(E->getType());
> +  int NumElements = ResultType->getVectorNumElements();
> +  Value *Res;
> +  if (!IsAddition && !Signed) {
> +    Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0],
> Ops[1]);
> +    Value *Select = CGF.Builder.CreateSelect(ICmp, Ops[0], Ops[1]);
> +    Res = CGF.Builder.CreateSub(Select, Ops[1]);
> +  } else {
> +    unsigned EltSizeInBits = ResultType->getScalarSizeInBits();
> +    llvm::Type *ExtElementType = EltSizeInBits == 8 ?
> +                                 CGF.Builder.getInt16Ty() :
> +                                 CGF.Builder.getInt32Ty();
> +
> +    // Extending vectors to next possible width to make space for possible
> +    // overflow.
> +    llvm::Type *ExtType = llvm::VectorType::get(ExtElementType,
> NumElements);
> +    Value *VecA = Signed ? CGF.Builder.CreateSExt(Ops[0], ExtType)
> +                         : CGF.Builder.CreateZExt(Ops[0], ExtType);
> +    Value *VecB = Signed ? CGF.Builder.CreateSExt(Ops[1], ExtType)
> +                         : CGF.Builder.CreateZExt(Ops[1], ExtType);
> +
> +    llvm::Value *ExtProduct = IsAddition ? CGF.Builder.CreateAdd(VecA,
> VecB)
> +                                         : CGF.Builder.CreateSub(VecA,
> VecB);
> +
> +    // Create vector of the same type as expected result with max possible
> +    // values and extend it to the same type as the product of the
> addition.
> +    APInt SignedMaxValue =
> +        llvm::APInt::getSignedMaxValue(EltSizeInBits);
> +    Value *Max = Signed ? llvm::ConstantInt::get(ResultType,
> SignedMaxValue)
> +                        : llvm::Constant::getAllOnesValue(ResultType);
> +    Value *ExtMaxVec = Signed ? CGF.Builder.CreateSExt(Max, ExtType)
> +                              : CGF.Builder.CreateZExt(Max, ExtType);
> +    // In Product, replace all overflowed values with max values of
> non-extended
> +    // type.
> +    ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE :
> ICmpInst::ICMP_ULE;
> +    Value *Cmp = CGF.Builder.CreateICmp(Pred, ExtProduct,
> +                                        ExtMaxVec); // 1 if no overflow.
> +    Value *SaturatedProduct = CGF.Builder.CreateSelect(
> +        Cmp, ExtProduct, ExtMaxVec); // If overflowed, copy from max
> values.
> +
> +    if (Signed) {
> +      APInt SignedMinValue =
> +          llvm::APInt::getSignedMinValue(EltSizeInBits);
> +      Value *Min = llvm::ConstantInt::get(ResultType, SignedMinValue);
> +      Value *ExtMinVec = CGF.Builder.CreateSExt(Min, ExtType);
> +      Value *IsNegative =
> +        CGF.Builder.CreateICmp(ICmpInst::ICMP_SLT, SaturatedProduct,
> ExtMinVec);
> +      SaturatedProduct =
> +        CGF.Builder.CreateSelect(IsNegative, ExtMinVec, SaturatedProduct);
> +    }
> +
> +    Res = CGF.Builder.CreateTrunc(SaturatedProduct,
> +                                  ResultType); // Trunc to ResultType.
> +  }
> +  if (E->getNumArgs() == 4) { // For masked intrinsics.
> +    Value *VecSRC = Ops[2];
> +    Value *Mask = Ops[3];
> +    return EmitX86Select(CGF, Mask, Res, VecSRC);
> +  }
> +
> +  return Res;
> +}
> +
>  Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
>    const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
>    StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
> @@ -9516,10 +9586,37 @@ Value *CodeGenFunction::EmitX86BuiltinEx
>      Load->setVolatile(true);
>      return Load;
>    }
> +  case X86::BI__builtin_ia32_paddusb512_mask:
> +  case X86::BI__builtin_ia32_paddusw512_mask:
> +  case X86::BI__builtin_ia32_paddusb256:
> +  case X86::BI__builtin_ia32_paddusw256:
> +  case X86::BI__builtin_ia32_paddusb128:
> +  case X86::BI__builtin_ia32_paddusw128:
> +    return EmitX86AddSubSatExpr(*this, E, Ops, true, false); // Add,
> unsigned.
> +  case X86::BI__builtin_ia32_paddsb512_mask:
> +  case X86::BI__builtin_ia32_paddsw512_mask:
> +  case X86::BI__builtin_ia32_paddsb256:
> +  case X86::BI__builtin_ia32_paddsw256:
> +  case X86::BI__builtin_ia32_paddsb128:
> +  case X86::BI__builtin_ia32_paddsw128:
> +    return EmitX86AddSubSatExpr(*this, E, Ops, true, true); // Add,
> signed.
> +  case X86::BI__builtin_ia32_psubusb512_mask:
> +  case X86::BI__builtin_ia32_psubusw512_mask:
> +  case X86::BI__builtin_ia32_psubusb256:
> +  case X86::BI__builtin_ia32_psubusw256:
> +  case X86::BI__builtin_ia32_psubusb128:
> +  case X86::BI__builtin_ia32_psubusw128:
> +    return EmitX86AddSubSatExpr(*this, E, Ops, false, false); // Sub,
> unsigned.
> +  case X86::BI__builtin_ia32_psubsb512_mask:
> +  case X86::BI__builtin_ia32_psubsw512_mask:
> +  case X86::BI__builtin_ia32_psubsb256:
> +  case X86::BI__builtin_ia32_psubsw256:
> +  case X86::BI__builtin_ia32_psubsb128:
> +  case X86::BI__builtin_ia32_psubsw128:
> +    return EmitX86AddSubSatExpr(*this, E, Ops, false, true); // Sub,
> signed.
>    }
>  }
>
> -
>  Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
>                                             const CallExpr *E) {
>    SmallVector<Value*, 4> Ops;
>
> Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu Apr 19 05:15:11 2018
> @@ -56,25 +56,53 @@ __m256i test_mm256_add_epi64(__m256i a,
>
>  __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_adds_epi8
> -  // CHECK: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> +  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    return _mm256_adds_epi8(a, b);
>  }
>
>  __m256i test_mm256_adds_epi16(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_adds_epi16
> -  // CHECK: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> +  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    return _mm256_adds_epi16(a, b);
>  }
>
>  __m256i test_mm256_adds_epu8(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_adds_epu8
> -  // CHECK: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> +  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> +  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    return _mm256_adds_epu8(a, b);
>  }
>
>  __m256i test_mm256_adds_epu16(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_adds_epu16
> -  // CHECK: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> +  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> +  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535>
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    return _mm256_adds_epu16(a, b);
>  }
>
> @@ -1171,25 +1199,47 @@ __m256i test_mm256_sub_epi64(__m256i a,
>
>  __m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_subs_epi8
> -  // CHECK: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> +  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    return _mm256_subs_epi8(a, b);
>  }
>
>  __m256i test_mm256_subs_epi16(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_subs_epi16
> -  // CHECK: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> +  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: sub <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    return _mm256_subs_epi16(a, b);
>  }
>
>  __m256i test_mm256_subs_epu8(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_subs_epu8
> -  // CHECK: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}}, <32
> x i8> %{{.*}})
> +  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> +  // CHECK: icmp ugt <32 x i8> {{.*}}, {{.*}}
> +  // CHECK: select <32 x i1> {{.*}}, <32 x i8> {{.*}}, <32 x i8> {{.*}}
> +  // CHECK: sub <32 x i8> {{.*}}, {{.*}}
>    return _mm256_subs_epu8(a, b);
>  }
>
>  __m256i test_mm256_subs_epu16(__m256i a, __m256i b) {
>    // CHECK-LABEL: test_mm256_subs_epu16
> -  // CHECK: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> +  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>
> %{{.*}}, <16 x i16> %{{.*}})
> +  // CHECK: icmp ugt <16 x i16> {{.*}}, {{.*}}
> +  // CHECK: select <16 x i1> {{.*}}, <16 x i16> {{.*}}, <16 x i16> {{.*}}
> +  // CHECK: sub <16 x i16> {{.*}}, {{.*}}
>    return _mm256_subs_epu16(a, b);
>  }
>
>
> Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Thu Apr 19 05:15:11 2018
> @@ -594,62 +594,154 @@ __m512i test_mm512_maskz_packus_epi16(__
>  }
>  __m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_adds_epi8
> -  // CHECK: @llvm.x86.avx512.mask.padds.b.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
>    return _mm512_adds_epi8(__A,__B);
>  }
>  __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_adds_epi8
> -  // CHECK: @llvm.x86.avx512.mask.padds.b.512
> -  return _mm512_mask_adds_epi8(__W,__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> + return _mm512_mask_adds_epi8(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_adds_epi8
> -  // CHECK: @llvm.x86.avx512.mask.padds.b.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
>    return _mm512_maskz_adds_epi8(__U,__A,__B);
>  }
>  __m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_adds_epi16
> -  // CHECK: @llvm.x86.avx512.mask.padds.w.512
> -  return _mm512_adds_epi16(__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + return _mm512_adds_epi16(__A,__B);
>  }
>  __m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_adds_epi16
> -  // CHECK: @llvm.x86.avx512.mask.padds.w.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
>    return _mm512_mask_adds_epi16(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_adds_epi16
> -  // CHECK: @llvm.x86.avx512.mask.padds.w.512
> -  return _mm512_maskz_adds_epi16(__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_maskz_adds_epi16(__U,__A,__B);
>  }
>  __m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_adds_epu8
> -  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
> +  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
>    return _mm512_adds_epu8(__A,__B);
>  }
>  __m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_adds_epu8
> -  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
> +  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
>    return _mm512_mask_adds_epu8(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_adds_epu8
> -  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
> +  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
>    return _mm512_maskz_adds_epu8(__U,__A,__B);
>  }
>  __m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_adds_epu16
> -  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
> +  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
>    return _mm512_adds_epu16(__A,__B);
>  }
>  __m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_adds_epu16
> -  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
> +  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
>    return _mm512_mask_adds_epu16(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_adds_epu16
> -  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
> +  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
> +  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
>    return _mm512_maskz_adds_epu16(__U,__A,__B);
>  }
>  __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
> @@ -903,63 +995,137 @@ __m512i test_mm512_maskz_shuffle_epi8(__
>  }
>  __m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_subs_epi8
> -  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
> -  return _mm512_subs_epi8(__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +return _mm512_subs_epi8(__A,__B);
>  }
>  __m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_subs_epi8
> -  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
> -  return _mm512_mask_subs_epi8(__W,__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_mask_subs_epi8(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_subs_epi8
> -  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
> -  return _mm512_maskz_subs_epi8(__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
> +  // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128>, <64 x i16> %{{.*}}
> +  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_maskz_subs_epi8(__U,__A,__B);
>  }
>  __m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_subs_epi16
> -  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
> -  return _mm512_subs_epi16(__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +return _mm512_subs_epi16(__A,__B);
>  }
>  __m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_subs_epi16
> -  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
> -  return _mm512_mask_subs_epi16(__W,__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_mask_subs_epi16(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_subs_epi16
> -  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
> -  return _mm512_maskz_subs_epi16(__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
> +  // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x
> i32> %{{.*}}
> +  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_maskz_subs_epi16(__U,__A,__B);
>  }
>  __m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_subs_epu8
> -  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
> -  return _mm512_subs_epu8(__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
> +  // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
> +  // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
> +  // CHECK: sub <64 x i8> {{.*}}, {{.*}}
> +return _mm512_subs_epu8(__A,__B);
>  }
>  __m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_subs_epu8
> -  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
> -  return _mm512_mask_subs_epu8(__W,__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
> +  // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
> +  // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
> +  // CHECK: sub <64 x i8> {{.*}}, {{.*}}
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_mask_subs_epu8(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_subs_epu8
> -  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
> -  return _mm512_maskz_subs_epu8(__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
> +  // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
> +  // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
> +  // CHECK: sub <64 x i8> {{.*}}, {{.*}}
> +  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> +return _mm512_maskz_subs_epu8(__U,__A,__B);
>  }
>  __m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_subs_epu16
> -  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
> -  return _mm512_subs_epu16(__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
> +  // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
> +  // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
> +  // CHECK: sub <32 x i16> {{.*}}, {{.*}}
> +return _mm512_subs_epu16(__A,__B);
>  }
>  __m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask_subs_epu16
> -  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
> -  return _mm512_mask_subs_epu16(__W,__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
> +  // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
> +  // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
> +  // CHECK: sub <32 x i16> {{.*}}, {{.*}}
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_mask_subs_epu16(__W,__U,__A,__B);
>  }
>  __m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i
> __B) {
>    // CHECK-LABEL: @test_mm512_maskz_subs_epu16
> -  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
> -  return _mm512_maskz_subs_epu16(__U,__A,__B);
> +  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
> +  // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
> +  // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
> +  // CHECK: sub <32 x i16> {{.*}}, {{.*}}
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> +return _mm512_maskz_subs_epu16(__U,__A,__B);
>  }
>  __m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I,
> __mmask32 __U, __m512i __B) {
>    // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi16
>
> Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=330323&r1=330322&r2=330323&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Thu Apr 19 05:15:11 2018
> @@ -1075,97 +1075,211 @@ __m256i test_mm256_mask_packus_epi16(__m
>
>  __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A,
>       __m128i __B) {
>    // CHECK-LABEL: @test_mm_mask_adds_epi8
> -  // CHECK: @llvm.x86.sse2.padds.b
> +  // CHECK-NOT: @llvm.x86.sse2.padds.b
> +  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16>
> %{{.*}}
> +  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
>    return _mm_mask_adds_epi8(__W,__U,__A,__B);
>  }
>  __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
>    // CHECK-LABEL: @test_mm_maskz_adds_epi8
> -  // CHECK: @llvm.x86.sse2.padds.b
> +  // CHECK-NOT: @llvm.x86.sse2.padds.b
> +  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16>
> %{{.*}}
> +  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
>    return _mm_maskz_adds_epi8(__U,__A,__B);
>  }
>  __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i
> __A,           __m256i __B) {
>    // CHECK-LABEL: @test_mm256_mask_adds_epi8
> -  // CHECK: @llvm.x86.avx2.padds.b
> +  // CHECK-NOT: @llvm.x86.avx2.padds.b
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
>    return _mm256_mask_adds_epi8(__W,__U,__A,__B);
>  }
>  __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i
> __B) {
>    // CHECK-LABEL: @test_mm256_maskz_adds_epi8
> -  // CHECK: @llvm.x86.avx2.padds.b
> +  // CHECK-NOT: @llvm.x86.avx2.padds.b
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16
> -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128,
> i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32
> x i16> %{{.*}}
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
>    return _mm256_maskz_adds_epi8(__U,__A,__B);
>  }
>  __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A,
>        __m128i __B) {
>    // CHECK-LABEL: @test_mm_mask_adds_epi16
> -  // CHECK: @llvm.x86.sse2.padds.w
> +  // CHECK-NOT: @llvm.x86.sse2.padds.w
> +  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767>
> +  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8
> x i32> %{{.*}}
> +  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
>    // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
>    return _mm_mask_adds_epi16(__W,__U,__A,__B);
>  }
>  __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
>    // CHECK-LABEL: @test_mm_maskz_adds_epi16
> -  // CHECK: @llvm.x86.sse2.padds.w
> +  // CHECK-NOT: @llvm.x86.sse2.padds.w
> +  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767>
> +  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
> +  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8
> x i32> %{{.*}}
> +  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
>    // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
>    return _mm_maskz_adds_epi16(__U,__A,__B);
>  }
>  __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i
> __A,      __m256i __B) {
>    // CHECK-LABEL: @test_mm256_mask_adds_epi16
> -  // CHECK: @llvm.x86.avx2.padds.w
> +  // CHECK-NOT: @llvm.x86.avx2.padds.w
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
>    return _mm256_mask_adds_epi16(__W,__U,__A,__B);
>  }
>  __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i
> __B) {
>    // CHECK-LABEL: @test_mm256_maskz_adds_epi16
> -  // CHECK: @llvm.x86.avx2.padds.w
> +  // CHECK-NOT: @llvm.x86.avx2.padds.w
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767,
> i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32
> 32767, i32 32767, i32 32767>
> +  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768,
> i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32
> -32768, i32 -32768>, <16 x i32> %{{.*}}
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
>    return _mm256_maskz_adds_epi16(__U,__A,__B);
>  }
> -__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A,
>       __m128i __B) {
> +__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A,
> __m128i __B) {
>    // CHECK-LABEL: @test_mm_mask_adds_epu8
> -  // CHECK: @llvm.x86.sse2.paddus.b
> +  // CHECK-NOT: @llvm.x86.sse2.paddus.b
> +  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
>    return _mm_mask_adds_epu8(__W,__U,__A,__B);
>  }
>  __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
>    // CHECK-LABEL: @test_mm_maskz_adds_epu8
> -  // CHECK: @llvm.x86.sse2.paddus.b
> +  // CHECK-NOT: @llvm.x86.sse2.paddus.b
> +  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
>    return _mm_maskz_adds_epu8(__U,__A,__B);
>  }
>  __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i
> __A,           __m256i __B) {
>    // CHECK-LABEL: @test_mm256_mask_adds_epu8
> -  // CHECK: @llvm.x86.avx2.paddus.b
> +  // CHECK-NOT: @llvm.x86.avx2.paddus.b
> +  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
>    return _mm256_mask_adds_epu8(__W,__U,__A,__B);
>  }
>  __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i
> __B) {
>    // CHECK-LABEL: @test_mm256_maskz_adds_epu8
> -  // CHECK: @llvm.x86.avx2.paddus.b
> +  // CHECK-NOT: @llvm.x86.avx2.paddus.b
> +  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> +  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16
> 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> +  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
>    // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
>    return _mm256_maskz_adds_epu8(__U,__A,__B);
>  }
>  __m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A,
>        __m128i __B) {
>    // CHECK-LABEL: @test_mm_mask_adds_epu16
> -  // CHECK: @llvm.x86.sse2.paddus.w
> +  // CHECK-NOT: @llvm.x86.sse2.paddus.w
> +  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535>
> +  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
>    // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
>    return _mm_mask_adds_epu16(__W,__U,__A,__B);
>  }
>  __m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
>    // CHECK-LABEL: @test_mm_maskz_adds_epu16
> -  // CHECK: @llvm.x86.sse2.paddus.w
> +  // CHECK-NOT: @llvm.x86.sse2.paddus.w
> +  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> +  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535>
> +  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
>    // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
>    return _mm_maskz_adds_epu16(__U,__A,__B);
>  }
>  __m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i
> __A,      __m256i __B) {
>    // CHECK-LABEL: @test_mm256_mask_adds_epu16
> -  // CHECK: @llvm.x86.avx2.paddus.w
> +  // CHECK-NOT: @llvm.x86.avx2.paddus.w
> +  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535>
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
>    return _mm256_mask_adds_epu16(__W,__U,__A,__B);
>  }
>  __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i
> __B) {
>    // CHECK-LABEL: @test_mm256_maskz_adds_epu16
> -  // CHECK: @llvm.x86.avx2.paddus.w
> +  // CHECK-NOT: @llvm.x86.avx2.paddus.w
> +  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> +  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> +  // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535,
> i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535>
> +  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
>    // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
>    return _mm256_maskz_adds_epu16(__U,__A,__B);
>  }
> @@ -1519,102 +1633,191 @@ __m256i test_mm256_maskz_shuffle_epi8(__
>  }
>  __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A,
>       __m128i __B) {
>    // CHECK-LABEL: @test_mm_mask_subs_epi8
> -  // CHECK: @llvm.x86.sse2.psubs.b
> +  // CHECK-NOT: @llvm.x86.sse2.psubs.b
> +  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
> +  // CHECK: sub <16 x i16> %{{.*}}, %{{.*}}
> +  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16
> 127, i16 127, i16 127, i16 127, i16 127>
> +  // CHECK: select <16 x i1> %{{.*}}, <16 x
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180426/b6df0ea4/attachment-0001.html>


More information about the cfe-commits mailing list