[llvm] r339659 - [X86] Constant folding of adds/subs intrinsics

Tomasz Krupa via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 14 02:04:02 PDT 2018


Author: tkrupa
Date: Tue Aug 14 02:04:01 2018
New Revision: 339659

URL: http://llvm.org/viewvc/llvm-project?rev=339659&view=rev
Log:
[X86] Constant folding of adds/subs intrinsics

Summary: This adds constant folding of signed add/sub with saturation intrinsics.

Reviewers: craig.topper, spatel, RKSimon, chandlerc, efriedma

Reviewed By: craig.topper

Subscribers: rnk, llvm-commits

Differential Revision: https://reviews.llvm.org/D50499


Added:
    llvm/trunk/test/Transforms/InstCombine/X86/x86-adds-subs.ll
Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=339659&r1=339658&r2=339659&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Aug 14 02:04:01 2018
@@ -243,6 +243,86 @@ Instruction *InstCombiner::SimplifyAnyMe
   return nullptr;
 }
 
+static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
+                                  InstCombiner::BuilderTy &Builder) {
+  bool IsAddition = false;
+  bool IsMasked = false;
+
+  switch (II.getIntrinsicID()) {
+  default: llvm_unreachable("Unexpected intrinsic!");
+  case Intrinsic::x86_sse2_padds_b:
+  case Intrinsic::x86_sse2_padds_w:
+  case Intrinsic::x86_avx2_padds_b:
+  case Intrinsic::x86_avx2_padds_w:
+    IsAddition = true; IsMasked = false;
+    break;
+  case Intrinsic::x86_sse2_psubs_b:
+  case Intrinsic::x86_sse2_psubs_w:
+  case Intrinsic::x86_avx2_psubs_b:
+  case Intrinsic::x86_avx2_psubs_w:
+    IsAddition = false; IsMasked = false;
+    break;
+  case Intrinsic::x86_avx512_mask_padds_b_512:
+  case Intrinsic::x86_avx512_mask_padds_w_512:
+    IsAddition = true; IsMasked = true;
+    break;
+  case Intrinsic::x86_avx512_mask_psubs_b_512:
+  case Intrinsic::x86_avx512_mask_psubs_w_512:
+    IsAddition = false; IsMasked = true;
+    break;
+  }
+
+  auto *Arg0 = dyn_cast<Constant>(II.getOperand(0));
+  auto *Arg1 = dyn_cast<Constant>(II.getOperand(1));
+  auto VT = cast<VectorType>(II.getType());
+  auto SVT = VT->getElementType();
+  unsigned NumElems = VT->getNumElements();
+
+  if (!Arg0 || !Arg1 || (IsMasked && !isa<Constant>(II.getOperand(2))))
+    return nullptr;
+
+  SmallVector<Constant *, 64> Result;
+
+  APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth());
+  APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth());
+  for (unsigned i = 0; i < NumElems; ++i) {
+    auto *Elt0 = Arg0->getAggregateElement(i);
+    auto *Elt1 = Arg1->getAggregateElement(i);
+    if (isa<UndefValue>(Elt0) || isa<UndefValue>(Elt1)) {
+      Result.push_back(UndefValue::get(SVT));
+      continue;
+    }
+
+    if (!isa<ConstantInt>(Elt0) || !isa<ConstantInt>(Elt1))
+      return nullptr;
+
+    const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue();
+    const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue();
+    bool Overflow = false;
+    APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow)
+                                  : Val0.ssub_ov(Val1, Overflow);
+    if (Overflow)
+      ResultElem = Val0.isNegative() ? MinValue : MaxValue;
+    Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
+  }
+
+  Value *ResultVec = ConstantVector::get(Result);
+
+  if (II.getNumArgOperands() == 4) { // For masked intrinsics.
+    Value *Src = II.getOperand(2);
+    auto Mask = II.getOperand(3);
+    if (auto *C = dyn_cast<Constant>(Mask))
+      if (C->isAllOnesValue())
+        return ResultVec;
+    auto *MaskTy = VectorType::get(
+        Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
+    Mask = Builder.CreateBitCast(Mask, MaskTy);
+    ResultVec = Builder.CreateSelect(Mask, ResultVec, Src);
+  }
+
+  return ResultVec;
+}
+
 static Value *simplifyX86immShift(const IntrinsicInst &II,
                                   InstCombiner::BuilderTy &Builder) {
   bool LogicalShift = false;
@@ -2525,6 +2605,24 @@ Instruction *InstCombiner::visitCallInst
     break;
   }
 
+  // Constant fold add/sub with saturation intrinsics.
+  case Intrinsic::x86_sse2_padds_b:
+  case Intrinsic::x86_sse2_padds_w:
+  case Intrinsic::x86_sse2_psubs_b:
+  case Intrinsic::x86_sse2_psubs_w:
+  case Intrinsic::x86_avx2_padds_b:
+  case Intrinsic::x86_avx2_padds_w:
+  case Intrinsic::x86_avx2_psubs_b:
+  case Intrinsic::x86_avx2_psubs_w:
+  case Intrinsic::x86_avx512_mask_padds_b_512:
+  case Intrinsic::x86_avx512_mask_padds_w_512:
+  case Intrinsic::x86_avx512_mask_psubs_b_512:
+  case Intrinsic::x86_avx512_mask_psubs_w_512:
+    if (Value *V = simplifyX86AddsSubs(*II, Builder))
+      return replaceInstUsesWith(*II, V);
+    break;
+    
+
   // Constant fold ashr( <A x Bi>, Ci ).
   // Constant fold lshr( <A x Bi>, Ci ).
   // Constant fold shl( <A x Bi>, Ci ).

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-adds-subs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-adds-subs.ll?rev=339659&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-adds-subs.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-adds-subs.ll Tue Aug 14 02:04:01 2018
@@ -0,0 +1,351 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <16 x i8> @sse2_adds_b_constant() {
+; CHECK-LABEL: @sse2_adds_b_constant(
+; CHECK-NEXT: ret <16 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @sse2_adds_b_constant_underflow() {
+; CHECK-LABEL: @sse2_adds_b_constant_underflow(
+; CHECK-NEXT: ret <16 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @sse2_adds_b_constant_overflow() {
+; CHECK-LABEL: @sse2_adds_b_constant_overflow(
+; CHECK-NEXT: ret <16 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127>
+  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @sse2_adds_b_constant_undefs() {
+; CHECK-LABEL: @sse2_adds_b_constant_undefs(
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @avx2_adds_b_constant() {
+; CHECK-LABEL: @avx2_adds_b_constant(
+; CHECK-NEXT: ret <32 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @avx2_adds_b_constant_underflow() {
+; CHECK-LABEL: @avx2_adds_b_constant_underflow(
+; CHECK-NEXT: ret <32 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @avx2_adds_b_constant_overflow() {
+; CHECK-LABEL: @avx2_adds_b_constant_overflow(
+; CHECK-NEXT: ret <32 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127>
+  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @avx2_adds_b_constant_undefs() {
+; CHECK-LABEL: @avx2_adds_b_constant_undefs(
+; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_adds_b_constant() {
+; CHECK-LABEL: @avx512_mask_adds_b_constant(
+; CHECK-NEXT: ret <64 x i8> <i8 2, i8 0, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -3)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_adds_b_constant_underflow() {
+; CHECK-LABEL: @avx512_mask_adds_b_constant_underflow(
+; CHECK-NEXT: ret <64 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_adds_b_constant_overflow() {
+; CHECK-LABEL: @avx512_mask_adds_b_constant_overflow(
+; CHECK-NEXT: ret <64 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120>, <64 x i8> zeroinitializer, i64 -1)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_adds_b_constant_undefs() {
+; CHECK-LABEL: @avx512_mask_adds_b_constant_undefs(
+; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
+  ret <64 x i8> %1
+}
+
+define <8 x i16> @sse2_adds_w_constant() {
+; CHECK-LABEL: @sse2_adds_w_constant(
+; CHECK-NEXT: ret <8 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16>
+  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_adds_w_constant_underflow() {
+; CHECK-LABEL: @sse2_adds_w_constant_underflow(
+; CHECK-NEXT: ret <8 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768>
+  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -32107>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -12188>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_adds_w_constant_overflow() {
+; CHECK-LABEL: @sse2_adds_w_constant_overflow(
+; CHECK-NEXT: ret <8 x i16> <i16 2, i16 32767, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16>
+  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 1, i16 8248, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> <i16 1, i16 25192, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_adds_w_constant_undefs() {
+; CHECK-LABEL: @sse2_adds_w_constant_undefs(
+; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16>
+  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8>, <8 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
+  ret <8 x i16> %1
+}
+
+define <16 x i16> @avx2_adds_w_constant() {
+; CHECK-LABEL: @avx2_adds_w_constant(
+; CHECK-NEXT: ret <16 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
+  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_adds_w_constant_underflow() {
+; CHECK-LABEL: @avx2_adds_w_constant_underflow(
+; CHECK-NEXT: ret <16 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
+  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -21107, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -15188, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_adds_w_constant_overflow() {
+; CHECK-LABEL: @avx2_adds_w_constant_overflow(
+; CHECK-NEXT: ret <16 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 32767, i16 24, i16 26, i16 28, i16 30, i16 32767>
+  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20125, i16 12, i16 13, i16 14, i16 15, i16 20160>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20230, i16 12, i16 13, i16 14, i16 15, i16 20120>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_adds_w_constant_undefs() {
+; CHECK-LABEL: @avx2_adds_w_constant_undefs(
+; CHECK-NEXT: ret <16 x i16> <i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
+  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <16 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
+  ret <16 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_adds_w_constant() {
+; CHECK-LABEL: @avx512_mask_adds_w_constant(
+; CHECK-NEXT: ret <32 x i16> <i16 2, i16 0, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -3)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_adds_w_constant_underflow() {
+; CHECK-LABEL: @avx512_mask_adds_w_constant_underflow(
+; CHECK-NEXT: ret <32 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20107, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20107, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20168, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20248, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_adds_w_constant_overflow() {
+; CHECK-LABEL: @avx512_mask_adds_w_constant_overflow(
+; CHECK-NEXT: ret <32 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 32767, i16 24, i16 26, i16 28, i16 30, i16 32767, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 32767, i16 24, i16 26, i16 28, i16 30, i16 32767>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20125, i16 12, i16 13, i16 14, i16 15, i16 20200, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20125, i16 12, i16 13, i16 14, i16 15, i16 20200>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20200, i16 12, i16 13, i16 14, i16 15, i16 20120, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20211, i16 12, i16 13, i16 14, i16 15, i16 20120>, <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_adds_w_constant_undefs() {
+; CHECK-LABEL: @avx512_mask_adds_w_constant_undefs(
+; CHECK-NEXT: ret <32 x i16> <i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33, i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <32 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %1
+}
+
+define <16 x i8> @sse2_subs_b_constant() {
+; CHECK-LABEL: @sse2_subs_b_constant(
+; CHECK-NEXT: ret <16 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @sse2_subs_b_constant_underflow() {
+; CHECK-LABEL: @sse2_subs_b_constant_underflow(
+; CHECK-NEXT: ret <16 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @sse2_subs_b_constant_overflow() {
+; CHECK-LABEL: @sse2_subs_b_constant_overflow(
+; CHECK-NEXT: ret <16 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127>
+  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @sse2_subs_b_constant_undefs() {
+; CHECK-LABEL: @sse2_subs_b_constant_undefs(
+; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <16 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @avx2_subs_b_constant() {
+; CHECK-LABEL: @avx2_subs_b_constant(
+; CHECK-NEXT: ret <32 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @avx2_subs_b_constant_underflow() {
+; CHECK-LABEL: @avx2_subs_b_constant_underflow(
+; CHECK-NEXT: ret <32 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @avx2_subs_b_constant_overflow() {
+; CHECK-LABEL: @avx2_subs_b_constant_overflow(
+; CHECK-NEXT: ret <32 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127>
+  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @avx2_subs_b_constant_undefs() {
+; CHECK-LABEL: @avx2_subs_b_constant_undefs(
+; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <32 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_subs_b_constant() {
+; CHECK-LABEL: @avx512_mask_subs_b_constant(
+; CHECK-NEXT: ret <64 x i8> <i8 -2, i8 0, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -3)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_subs_b_constant_underflow() {
+; CHECK-LABEL: @avx512_mask_subs_b_constant_underflow(
+; CHECK-NEXT: ret <64 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_subs_b_constant_overflow() {
+; CHECK-LABEL: @avx512_mask_subs_b_constant_overflow(
+; CHECK-NEXT: ret <64 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120>, <64 x i8> zeroinitializer, i64 -1)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @avx512_mask_subs_b_constant_undefs() {
+; CHECK-LABEL: @avx512_mask_subs_b_constant_undefs(
+; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <64 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
+  ret <64 x i8> %1
+}
+
+define <8 x i16> @sse2_subs_w_constant() {
+; CHECK-LABEL: @sse2_subs_w_constant(
+; CHECK-NEXT: ret <8 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16>
+  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_subs_w_constant_underflow() {
+; CHECK-LABEL: @sse2_subs_w_constant_underflow(
+; CHECK-NEXT: ret <8 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768>
+  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -32107>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 12188>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_subs_w_constant_overflow() {
+; CHECK-LABEL: @sse2_subs_w_constant_overflow(
+; CHECK-NEXT: ret <8 x i16> <i16 -2, i16 32767, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16>
+  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 -1, i16 8248, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8>, <8 x i16> <i16 1, i16 -25192, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_subs_w_constant_undefs() {
+; CHECK-LABEL: @sse2_subs_w_constant_undefs(
+; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16>
+  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8>, <8 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
+  ret <8 x i16> %1
+}
+
+define <16 x i16> @avx2_subs_w_constant() {
+; CHECK-LABEL: @avx2_subs_w_constant(
+; CHECK-NEXT: ret <16 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
+  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_subs_w_constant_underflow() {
+; CHECK-LABEL: @avx2_subs_w_constant_underflow(
+; CHECK-NEXT: ret <16 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
+  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -21107, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 15188, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_subs_w_constant_overflow() {
+; CHECK-LABEL: @avx2_subs_w_constant_overflow(
+; CHECK-NEXT: ret <16 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 32767, i16 -24, i16 -26, i16 -28, i16 -30, i16 32767>
+  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 20125, i16 -12, i16 -13, i16 -14, i16 -15, i16 20160>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 -20230, i16 12, i16 13, i16 14, i16 15, i16 -20120>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_subs_w_constant_undefs() {
+; CHECK-LABEL: @avx2_subs_w_constant_undefs(
+; CHECK-NEXT: ret <16 x i16> <i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
+  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <16 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
+  ret <16 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_subs_w_constant() {
+; CHECK-LABEL: @avx512_mask_subs_w_constant(
+; CHECK-NEXT: ret <32 x i16> <i16 -2, i16 0, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33, i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -3)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_subs_w_constant_underflow() {
+; CHECK-LABEL: @avx512_mask_subs_w_constant_underflow(
+; CHECK-NEXT: ret <32 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33, i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -20107, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -20107, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 20168, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 20248, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_subs_w_constant_overflow() {
+; CHECK-LABEL: @avx512_mask_subs_w_constant_overflow(
+; CHECK-NEXT: ret <32 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 32767, i16 -24, i16 -26, i16 -28, i16 -30, i16 32767, i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 32767, i16 -24, i16 -26, i16 -28, i16 -30, i16 32767>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 20125, i16 -12, i16 -13, i16 -14, i16 -15, i16 20200, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 20125, i16 -12, i16 -13, i16 -14, i16 -15, i16 20200>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 -20200, i16 12, i16 13, i16 14, i16 15, i16 -20120, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 -20211, i16 12, i16 13, i16 14, i16 15, i16 -20120>, <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_mask_subs_w_constant_undefs() {
+; CHECK-LABEL: @avx512_mask_subs_w_constant_undefs(
+; CHECK-NEXT: ret <32 x i16> <i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33, i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <32 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
+  ret <32 x i16> %1
+}
+
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
+declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readnone
+declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readnone
+declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readnone
+declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readnone




More information about the llvm-commits mailing list